Remove trailing space in SiteURL and FeedURL

This commit is contained in:
Frédéric Guillot 2024-03-18 17:09:08 -07:00
parent 8e28e41b02
commit fa9697b972
7 changed files with 274 additions and 22 deletions

View File

@ -217,12 +217,31 @@ func TestParseFeedURL(t *testing.T) {
}
}
func TestParseFeedWithRelativeURL(t *testing.T) {
func TestParseFeedWithRelativeFeedURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link rel="alternate" type="text/html" href="https://example.org/"/>
<link rel="self" type="application/atom+xml" href="/feed"/>
<updated>2003-12-13T18:30:02Z</updated>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
if feed.FeedURL != "https://example.org/feed" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
}
func TestParseFeedWithRelativeSiteURL(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link href="/blog/atom.xml" rel="self" type="application/atom+xml"/>
<link href="/blog"/>
<link href="/blog "/>
<entry>
<title>Test</title>
@ -241,15 +260,47 @@ func TestParseFeedWithRelativeURL(t *testing.T) {
}
if feed.FeedURL != "https://example.org/blog/atom.xml" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
t.Errorf("Incorrect feed URL, got: %q", feed.FeedURL)
}
if feed.SiteURL != "https://example.org/blog" {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
t.Errorf("Incorrect site URL, got: %q", feed.SiteURL)
}
if feed.Entries[0].URL != "https://example.org/blog/article.html" {
t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL)
t.Errorf("Incorrect entry URL, got: %q", feed.Entries[0].URL)
}
}
func TestParseFeedSiteURLWithTrailingSpace(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="http://example.org "/>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
if feed.SiteURL != "http://example.org" {
t.Errorf("Incorrect site URL, got: %q", feed.SiteURL)
}
}
func TestParseFeedWithFeedURLWithTrailingSpace(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<link href="/blog/atom.xml " rel="self" type="application/atom+xml"/>
</feed>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)), "10")
if err != nil {
t.Fatal(err)
}
if feed.FeedURL != "https://example.org/blog/atom.xml" {
t.Errorf("Incorrect site URL, got: %q", feed.FeedURL)
}
}

View File

@ -24,15 +24,15 @@ func NewJSONAdapter(jsonFeed *JSONFeed) *JSONAdapter {
return &JSONAdapter{jsonFeed}
}
func (j *JSONAdapter) BuildFeed(feedURL string) *model.Feed {
func (j *JSONAdapter) BuildFeed(baseURL string) *model.Feed {
feed := &model.Feed{
Title: strings.TrimSpace(j.jsonFeed.Title),
FeedURL: j.jsonFeed.FeedURL,
SiteURL: j.jsonFeed.HomePageURL,
FeedURL: strings.TrimSpace(j.jsonFeed.FeedURL),
SiteURL: strings.TrimSpace(j.jsonFeed.HomePageURL),
}
if feed.FeedURL == "" {
feed.FeedURL = feedURL
feed.FeedURL = strings.TrimSpace(baseURL)
}
// Fallback to the feed URL if the site URL is empty.
@ -40,11 +40,11 @@ func (j *JSONAdapter) BuildFeed(feedURL string) *model.Feed {
feed.SiteURL = feed.FeedURL
}
if feedURL, err := urllib.AbsoluteURL(feedURL, j.jsonFeed.FeedURL); err == nil {
if feedURL, err := urllib.AbsoluteURL(baseURL, feed.FeedURL); err == nil {
feed.FeedURL = feedURL
}
if siteURL, err := urllib.AbsoluteURL(feedURL, j.jsonFeed.HomePageURL); err == nil {
if siteURL, err := urllib.AbsoluteURL(baseURL, feed.SiteURL); err == nil {
feed.SiteURL = siteURL
}

View File

@ -177,6 +177,82 @@ func TestParsePodcast(t *testing.T) {
}
}
func TestParseFeedWithFeedURLWithTrailingSpace(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "https://example.org/",
"feed_url": "https://example.org/feed.json ",
"items": []
}`
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.FeedURL != "https://example.org/feed.json" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
}
func TestParseFeedWithRelativeFeedURL(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "https://example.org/",
"feed_url": "/feed.json",
"items": []
}`
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.FeedURL != "https://example.org/feed.json" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
}
func TestParseFeedSiteURLWithTrailingSpace(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "https://example.org/ ",
"feed_url": "https://example.org/feed.json",
"items": []
}`
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.SiteURL != "https://example.org/" {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
}
}
func TestParseFeedWithRelativeSiteURL(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",
"title": "My Example Feed",
"home_page_url": "/home ",
"feed_url": "https://example.org/feed.json",
"items": []
}`
feed, err := Parse("https://example.org/feed.json", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.SiteURL != "https://example.org/home" {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
}
}
func TestParseFeedWithoutTitle(t *testing.T) {
data := `{
"version": "https://jsonfeed.org/version/1",

View File

@ -24,18 +24,18 @@ func NewRDFAdapter(rdf *RDF) *RDFAdapter {
return &RDFAdapter{rdf}
}
func (r *RDFAdapter) BuildFeed(feedURL string) *model.Feed {
func (r *RDFAdapter) BuildFeed(baseURL string) *model.Feed {
feed := &model.Feed{
Title: stripTags(r.rdf.Channel.Title),
FeedURL: feedURL,
SiteURL: r.rdf.Channel.Link,
FeedURL: strings.TrimSpace(baseURL),
SiteURL: strings.TrimSpace(r.rdf.Channel.Link),
}
if feed.Title == "" {
feed.Title = feedURL
feed.Title = baseURL
}
if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err == nil {
if siteURL, err := urllib.AbsoluteURL(feed.FeedURL, feed.SiteURL); err == nil {
feed.SiteURL = siteURL
}

View File

@ -289,7 +289,37 @@ func TestParseRDFFeedWithRelativeLink(t *testing.T) {
xmlns="http://purl.org/rss/1.0/">
<channel>
<title>Example Feed</title>
<link>/test/index.html</link>
<link>/test/index.html </link>
</channel>
<item>
<title>Example</title>
<link>http://example.org/item</link>
<description>Test</description>
</item>
</rdf:RDF>`
feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
if feed.SiteURL != "http://example.org/test/index.html" {
t.Errorf(`Incorrect SiteURL, got: %q`, feed.SiteURL)
}
if feed.FeedURL != "http://example.org/feed" {
t.Errorf(`Incorrect FeedURL, got: %q`, feed.FeedURL)
}
}
func TestParseRDFFeedSiteURLWithTrailingSpace(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/">
<channel>
<title>Example Feed</title>
<link>http://example.org/test/index.html </link>
</channel>
<item>
<title>Example</title>

View File

@ -26,14 +26,15 @@ func NewRSSAdapter(rss *RSS) *RSSAdapter {
return &RSSAdapter{rss}
}
func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
func (r *RSSAdapter) BuildFeed(baseURL string) *model.Feed {
feed := &model.Feed{
Title: html.UnescapeString(strings.TrimSpace(r.rss.Channel.Title)),
FeedURL: feedURL,
SiteURL: r.rss.Channel.Link,
FeedURL: strings.TrimSpace(baseURL),
SiteURL: strings.TrimSpace(r.rss.Channel.Link),
}
if siteURL, err := urllib.AbsoluteURL(feedURL, r.rss.Channel.Link); err == nil {
// Ensure the Site URL is absolute.
if siteURL, err := urllib.AbsoluteURL(baseURL, feed.SiteURL); err == nil {
feed.SiteURL = siteURL
}
@ -41,7 +42,7 @@ func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed {
for _, atomLink := range r.rss.Channel.AtomLinks.Links {
atomLinkHref := strings.TrimSpace(atomLink.Href)
if atomLinkHref != "" && atomLink.Rel == "self" {
if absoluteFeedURL, err := urllib.AbsoluteURL(feedURL, atomLinkHref); err == nil {
if absoluteFeedURL, err := urllib.AbsoluteURL(feed.FeedURL, atomLinkHref); err == nil {
feed.FeedURL = absoluteFeedURL
break
}

View File

@ -109,6 +109,100 @@ func TestParseRss2Sample(t *testing.T) {
}
}
func TestParseFeedWithFeedURLWithTrailingSpace(t *testing.T) {
data := `<?xml version="1.0"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>Example</title>
<link>https://example.org/</link>
<atom:link href="https://example.org/rss " type="application/rss+xml" rel="self"></atom:link>
<item>
<title>Test</title>
<link>https://example.org/item</link>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/ ", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
if feed.FeedURL != "https://example.org/rss" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
}
func TestParseFeedWithRelativeFeedURL(t *testing.T) {
data := `<?xml version="1.0"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>Example</title>
<link>https://example.org/</link>
<atom:link href="/rss" type="application/rss+xml" rel="self"></atom:link>
<item>
<title>Test</title>
<link>https://example.org/item</link>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
if feed.FeedURL != "https://example.org/rss" {
t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL)
}
}
func TestParseFeedSiteURLWithTrailingSpace(t *testing.T) {
data := `<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>Example</title>
<link>https://example.org/ </link>
<item>
<title>Test</title>
<link>https://example.org/item</link>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
if feed.SiteURL != "https://example.org/" {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
}
}
func TestParseFeedWithRelativeSiteURL(t *testing.T) {
data := `<?xml version="1.0"?>
<rss version="2.0">
<channel>
<title>Example</title>
<link>/example </link>
<item>
<title>Test</title>
<link>https://example.org/item</link>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data)))
if err != nil {
t.Fatal(err)
}
if feed.SiteURL != "https://example.org/example" {
t.Errorf("Incorrect site URL, got: %s", feed.SiteURL)
}
}
func TestParseFeedWithoutTitle(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">