Ignore empty link when discovering feeds

This commit is contained in:
Frédéric Guillot 2023-02-26 17:09:50 -08:00
parent bb5f3ec6a8
commit aaa1625724
2 changed files with 293 additions and 4 deletions

View File

@ -87,12 +87,12 @@ func parseWebPage(websiteURL string, data io.Reader) (Subscriptions, *errors.Loc
if title, exists := s.Attr("title"); exists {
subscription.Title = title
} else {
subscription.Title = "Feed"
}
if feedURL, exists := s.Attr("href"); exists {
subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
if feedURL != "" {
subscription.URL, _ = url.AbsoluteURL(websiteURL, feedURL)
}
}
if subscription.Title == "" {

View File

@ -4,7 +4,10 @@
package subscription
import "testing"
import (
"strings"
"testing"
)
func TestFindYoutubeChannelFeed(t *testing.T) {
scenarios := map[string]string{
@ -19,3 +22,289 @@ func TestFindYoutubeChannelFeed(t *testing.T) {
}
}
}
func TestParseWebPageWithRssFeed(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/rss" rel="alternate" type="application/rss+xml" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/rss" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "rss" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithAtomFeed(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/atom.xml" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "atom" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithJSONFeed(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "json" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithOldJSONFeedMimeType(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/feed.json" rel="alternate" type="application/json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "json" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithRelativeFeedURL(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="/feed.json" rel="alternate" type="application/feed+json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Some Title" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "json" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithEmptyTitle(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="/feed.json" rel="alternate" type="application/feed+json">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 1 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "json" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithMultipleFeeds(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href="http://example.org/atom.xml" rel="alternate" type="application/atom+xml" title="Atom Feed">
<link href="http://example.org/feed.json" rel="alternate" type="application/feed+json" title="JSON Feed">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 2 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
if subscriptions[0].Title != "Atom Feed" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[0].URL != "http://example.org/atom.xml" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[0].Type != "atom" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
if subscriptions[1].Title != "JSON Feed" {
t.Errorf(`Incorrect subscription title: %q`, subscriptions[0].Title)
}
if subscriptions[1].URL != "http://example.org/feed.json" {
t.Errorf(`Incorrect subscription URL: %q`, subscriptions[0].URL)
}
if subscriptions[1].Type != "json" {
t.Errorf(`Incorrect subscription type: %q`, subscriptions[0].Type)
}
}
func TestParseWebPageWithEmptyFeedURL(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link href rel="alternate" type="application/feed+json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 0 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
}
func TestParseWebPageWithNoHref(t *testing.T) {
htmlPage := `
<!doctype html>
<html>
<head>
<link rel="alternate" type="application/feed+json" title="Some Title">
</head>
<body>
</body>
</html>`
subscriptions, err := parseWebPage("http://example.org/", strings.NewReader(htmlPage))
if err != nil {
t.Fatalf(`Parsing a correctly formatted HTML page should not return any error: %v`, err)
}
if len(subscriptions) != 0 {
t.Fatal(`Incorrect number of subscriptions returned`)
}
}