diff --git a/reader/rss/parser_test.go b/reader/rss/parser_test.go
index 9be293b4..4b7bf761 100644
--- a/reader/rss/parser_test.go
+++ b/reader/rss/parser_test.go
@@ -95,6 +95,10 @@ func TestParseRss2Sample(t *testing.T) {
if feed.Entries[0].Content != `How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's Star City.` {
t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content)
}
+
+ if feed.Entries[1].URL != "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572" {
+ t.Errorf("Incorrect entry URL, got: %s", feed.Entries[1].URL)
+ }
}
func TestParseFeedWithoutTitle(t *testing.T) {
@@ -230,6 +234,34 @@ func TestParseEntryWithoutLink(t *testing.T) {
}
}
+func TestParseEntryWithOnlyGuidPermalink(t *testing.T) {
+ data := `
+
+
+ https://example.org/
+ -
+ https://example.org/some-article.html
+
+ -
+ https://example.org/another-article.html
+
+
+ `
+
+ feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if feed.Entries[0].URL != "https://example.org/some-article.html" {
+ t.Errorf("Incorrect entry link, got: %s", feed.Entries[0].URL)
+ }
+
+ if feed.Entries[1].URL != "https://example.org/another-article.html" {
+ t.Errorf("Incorrect entry link, got: %s", feed.Entries[1].URL)
+ }
+}
+
func TestParseEntryWithAtomLink(t *testing.T) {
data := `
diff --git a/reader/rss/rss.go b/reader/rss/rss.go
index fb042632..76ed9c9a 100644
--- a/reader/rss/rss.go
+++ b/reader/rss/rss.go
@@ -118,6 +118,12 @@ func (r rssFeed) feedAuthor() string {
return sanitizer.StripTags(strings.TrimSpace(author))
}
+type rssGUID struct {
+ XMLName xml.Name
+ Data string `xml:",chardata"`
+ IsPermaLink string `xml:"isPermaLink,attr"`
+}
+
type rssLink struct {
XMLName xml.Name
Data string `xml:",chardata"`
@@ -159,7 +165,7 @@ func (enclosure *rssEnclosure) Size() int64 {
}
type rssItem struct {
- GUID string `xml:"guid"`
+ GUID rssGUID `xml:"guid"`
Title []rssTitle `xml:"title"`
Links []rssLink `xml:"link"`
Description string `xml:"description"`
@@ -237,7 +243,7 @@ func (r *rssItem) entryAuthor() string {
}
func (r *rssItem) entryHash() string {
- for _, value := range []string{r.GUID, r.entryURL()} {
+ for _, value := range []string{r.GUID.Data, r.entryURL()} {
if value != "" {
return crypto.Hash(value)
}
@@ -291,6 +297,13 @@ func (r *rssItem) entryURL() string {
}
}
+ // Specs: https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt
+ // isPermaLink is optional, its default value is true.
+ // If its value is false, the guid may not be assumed to be a url, or a url to anything in particular.
+ if r.GUID.IsPermaLink == "true" || r.GUID.IsPermaLink == "" {
+ return strings.TrimSpace(r.GUID.Data)
+ }
+
return ""
}