From 648b9a8f6f1e2aad032caed11df64f449ea157c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Wed, 13 Mar 2024 21:06:28 -0700 Subject: [PATCH] Refactor RSS Parser to use an adapter --- internal/reader/atom/atom_10.go | 2 +- internal/reader/googleplay/googleplay.go | 2 +- internal/reader/itunes/itunes.go | 4 +- internal/reader/media/media.go | 11 +- internal/reader/rdf/adapter.go | 5 +- internal/reader/rss/adapter.go | 310 +++++++++++++++++ internal/reader/rss/feedburner.go | 4 +- internal/reader/rss/parser.go | 6 +- internal/reader/rss/parser_test.go | 107 ++++++ internal/reader/rss/podcast.go | 3 +- internal/reader/rss/rss.go | 407 ++++------------------- 11 files changed, 497 insertions(+), 364 deletions(-) create mode 100644 internal/reader/rss/adapter.go diff --git a/internal/reader/atom/atom_10.go b/internal/reader/atom/atom_10.go index 5b67e073..798a8748 100644 --- a/internal/reader/atom/atom_10.go +++ b/internal/reader/atom/atom_10.go @@ -91,7 +91,7 @@ type atom10Entry struct { Content atom10Text `xml:"http://www.w3.org/2005/Atom content"` Authors atomAuthors `xml:"author"` Categories []atom10Category `xml:"category"` - media.Element + media.MediaItemElement } func (a *atom10Entry) Transform() *model.Entry { diff --git a/internal/reader/googleplay/googleplay.go b/internal/reader/googleplay/googleplay.go index 38dcc71f..79404efb 100644 --- a/internal/reader/googleplay/googleplay.go +++ b/internal/reader/googleplay/googleplay.go @@ -6,7 +6,7 @@ package googleplay // import "miniflux.app/v2/internal/reader/googleplay" // Specs: // https://support.google.com/googleplay/podcasts/answer/6260341 // https://www.google.com/schemas/play-podcasts/1.0/play-podcasts.xsd -type GooglePlayFeedElement struct { +type GooglePlayChannelElement struct { GooglePlayAuthor string `xml:"http://www.google.com/schemas/play-podcasts/1.0 author"` GooglePlayEmail string `xml:"http://www.google.com/schemas/play-podcasts/1.0 email"` GooglePlayImage GooglePlayImageElement `xml:"http://www.google.com/schemas/play-podcasts/1.0 image"` diff --git a/internal/reader/itunes/itunes.go b/internal/reader/itunes/itunes.go index 1673f306..87a02f0d 100644 --- a/internal/reader/itunes/itunes.go +++ b/internal/reader/itunes/itunes.go @@ -6,7 +6,7 @@ package itunes // import "miniflux.app/v2/internal/reader/itunes" import "strings" // Specs: https://help.apple.com/itc/podcasts_connect/#/itcb54353390 -type ItunesFeedElement struct { +type ItunesChannelElement struct { ItunesAuthor string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd author"` ItunesBlock string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd block"` ItunesCategories []ItunesCategoryElement `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd category"` @@ -22,7 +22,7 @@ type ItunesFeedElement struct { ItunesType string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd type"` } -func (i *ItunesFeedElement) GetItunesCategories() []string { +func (i *ItunesChannelElement) GetItunesCategories() []string { var categories []string for _, category := range i.ItunesCategories { categories = append(categories, category.Text) diff --git a/internal/reader/media/media.go b/internal/reader/media/media.go index df84bf03..7fe4684d 100644 --- a/internal/reader/media/media.go +++ b/internal/reader/media/media.go @@ -11,9 +11,8 @@ import ( var textLinkRegex = regexp.MustCompile(`(?mi)(\bhttps?:\/\/[-A-Z0-9+&@#\/%?=~_|!:,.;]*[-A-Z0-9+&@#\/%=~_|])`) -// Element represents XML media elements. // Specs: https://www.rssboard.org/media-rss -type Element struct { +type MediaItemElement struct { MediaGroups []Group `xml:"http://search.yahoo.com/mrss/ group"` MediaContents []Content `xml:"http://search.yahoo.com/mrss/ content"` MediaThumbnails []Thumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"` @@ -22,7 +21,7 @@ type Element struct { } // AllMediaThumbnails returns all thumbnail elements merged together. -func (e *Element) AllMediaThumbnails() []Thumbnail { +func (e *MediaItemElement) AllMediaThumbnails() []Thumbnail { var items []Thumbnail items = append(items, e.MediaThumbnails...) for _, mediaGroup := range e.MediaGroups { @@ -32,7 +31,7 @@ func (e *Element) AllMediaThumbnails() []Thumbnail { } // AllMediaContents returns all content elements merged together. -func (e *Element) AllMediaContents() []Content { +func (e *MediaItemElement) AllMediaContents() []Content { var items []Content items = append(items, e.MediaContents...) for _, mediaGroup := range e.MediaGroups { @@ -42,7 +41,7 @@ func (e *Element) AllMediaContents() []Content { } // AllMediaPeerLinks returns all peer link elements merged together. -func (e *Element) AllMediaPeerLinks() []PeerLink { +func (e *MediaItemElement) AllMediaPeerLinks() []PeerLink { var items []PeerLink items = append(items, e.MediaPeerLinks...) for _, mediaGroup := range e.MediaGroups { @@ -52,7 +51,7 @@ func (e *Element) AllMediaPeerLinks() []PeerLink { } // FirstMediaDescription returns the first description element. -func (e *Element) FirstMediaDescription() string { +func (e *MediaItemElement) FirstMediaDescription() string { description := e.MediaDescriptions.First() if description != "" { return description diff --git a/internal/reader/rdf/adapter.go b/internal/reader/rdf/adapter.go index 812badbc..bc8c76ed 100644 --- a/internal/reader/rdf/adapter.go +++ b/internal/reader/rdf/adapter.go @@ -28,15 +28,14 @@ func (r *RDFAdapter) BuildFeed(feedURL string) *model.Feed { feed := &model.Feed{ Title: stripTags(r.rdf.Channel.Title), FeedURL: feedURL, + SiteURL: r.rdf.Channel.Link, } if feed.Title == "" { feed.Title = feedURL } - if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err != nil { - feed.SiteURL = r.rdf.Channel.Link - } else { + if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err == nil { feed.SiteURL = siteURL } diff --git a/internal/reader/rss/adapter.go b/internal/reader/rss/adapter.go new file mode 100644 index 00000000..5c1785a9 --- /dev/null +++ b/internal/reader/rss/adapter.go @@ -0,0 +1,310 @@ +// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package rss // import "miniflux.app/v2/internal/reader/rss" + +import ( + "html" + "log/slog" + "path" + "strconv" + "strings" + "time" + + "miniflux.app/v2/internal/crypto" + "miniflux.app/v2/internal/model" + "miniflux.app/v2/internal/reader/date" + "miniflux.app/v2/internal/reader/sanitizer" + "miniflux.app/v2/internal/urllib" +) + +type RSSAdapter struct { + rss *RSS +} + +func NewRSSAdapter(rss *RSS) *RSSAdapter { + return &RSSAdapter{rss} +} + +func (r *RSSAdapter) BuildFeed(feedURL string) *model.Feed { + feed := &model.Feed{ + Title: html.UnescapeString(strings.TrimSpace(r.rss.Channel.Title)), + FeedURL: feedURL, + SiteURL: r.rss.Channel.Link, + } + + if siteURL, err := urllib.AbsoluteURL(feedURL, r.rss.Channel.Link); err == nil { + feed.SiteURL = siteURL + } + + // Try to find the feed URL from the Atom links. + for _, atomLink := range r.rss.Channel.AtomLinks.Links { + atomLinkHref := strings.TrimSpace(atomLink.URL) + if atomLinkHref != "" && atomLink.Rel == "self" { + if absoluteFeedURL, err := urllib.AbsoluteURL(feedURL, atomLinkHref); err == nil { + feed.FeedURL = absoluteFeedURL + break + } + } + } + + // Fallback to the site URL if the title is empty. + if feed.Title == "" { + feed.Title = feed.SiteURL + } + + // Get TTL if defined. + if r.rss.Channel.TTL != "" { + if ttl, err := strconv.Atoi(r.rss.Channel.TTL); err == nil { + feed.TTL = ttl + } + } + + // Get the feed icon URL if defined. + if r.rss.Channel.Image != nil { + if absoluteIconURL, err := urllib.AbsoluteURL(feed.SiteURL, r.rss.Channel.Image.URL); err == nil { + feed.IconURL = absoluteIconURL + } + } + + for _, item := range r.rss.Channel.Items { + entry := model.NewEntry() + entry.Author = findEntryAuthor(&item) + entry.Date = findEntryDate(&item) + entry.Content = findEntryContent(&item) + entry.Enclosures = findEntryEnclosures(&item) + + // Populate the entry URL. + entryURL := findEntryURL(&item) + if entryURL == "" { + entry.URL = feed.SiteURL + } else { + if absoluteEntryURL, err := urllib.AbsoluteURL(feed.SiteURL, entryURL); err == nil { + entry.URL = absoluteEntryURL + } else { + entry.URL = entryURL + } + } + + // Populate the entry title. + entry.Title = findEntryTitle(&item) + if entry.Title == "" { + entry.Title = sanitizer.TruncateHTML(entry.Content, 100) + } + + if entry.Title == "" { + entry.Title = entry.URL + } + + if entry.Author == "" { + entry.Author = findFeedAuthor(&r.rss.Channel) + } + + // Generate the entry hash. + for _, value := range []string{item.GUID.Data, entryURL} { + if value != "" { + entry.Hash = crypto.Hash(value) + break + } + } + + // Find CommentsURL if defined. + if absoluteCommentsURL := strings.TrimSpace(item.CommentsURL); absoluteCommentsURL != "" && urllib.IsAbsoluteURL(absoluteCommentsURL) { + entry.CommentsURL = absoluteCommentsURL + } + + // Set podcast listening time. + if item.ItunesDuration != "" { + if duration, err := getDurationInMinutes(item.ItunesDuration); err == nil { + entry.ReadingTime = duration + } + } + + // Populate entry categories. + entry.Tags = append(entry.Tags, item.Categories...) + entry.Tags = append(entry.Tags, r.rss.Channel.Categories...) + entry.Tags = append(entry.Tags, r.rss.Channel.GetItunesCategories()...) + + if r.rss.Channel.GooglePlayCategory.Text != "" { + entry.Tags = append(entry.Tags, r.rss.Channel.GooglePlayCategory.Text) + } + + feed.Entries = append(feed.Entries, entry) + } + + return feed +} + +func findFeedAuthor(rssChannel *RSSChannel) string { + var author string + switch { + case rssChannel.ItunesAuthor != "": + author = rssChannel.ItunesAuthor + case rssChannel.GooglePlayAuthor != "": + author = rssChannel.GooglePlayAuthor + case rssChannel.ItunesOwner.String() != "": + author = rssChannel.ItunesOwner.String() + case rssChannel.ManagingEditor != "": + author = rssChannel.ManagingEditor + case rssChannel.Webmaster != "": + author = rssChannel.Webmaster + } + return sanitizer.StripTags(strings.TrimSpace(author)) +} + +func findEntryTitle(rssItem *RSSItem) string { + title := rssItem.Title + + if rssItem.DublinCoreTitle != "" { + title = rssItem.DublinCoreTitle + } + + return html.UnescapeString(strings.TrimSpace(title)) +} + +func findEntryURL(rssItem *RSSItem) string { + for _, link := range []string{rssItem.FeedBurnerLink, rssItem.Link} { + if link != "" { + return strings.TrimSpace(link) + } + } + + for _, atomLink := range rssItem.AtomLinks.Links { + if atomLink.URL != "" && (strings.EqualFold(atomLink.Rel, "alternate") || atomLink.Rel == "") { + return strings.TrimSpace(atomLink.URL) + } + } + + // Specs: https://cyber.harvard.edu/rss/rss.html#ltguidgtSubelementOfLtitemgt + // isPermaLink is optional, its default value is true. + // If its value is false, the guid may not be assumed to be a url, or a url to anything in particular. + if rssItem.GUID.IsPermaLink == "true" || rssItem.GUID.IsPermaLink == "" { + return strings.TrimSpace(rssItem.GUID.Data) + } + + return "" +} + +func findEntryContent(rssItem *RSSItem) string { + for _, value := range []string{ + rssItem.DublinCoreContent, + rssItem.Description, + rssItem.GooglePlayDescription, + rssItem.ItunesSummary, + rssItem.ItunesSubtitle, + } { + if value != "" { + return value + } + } + return "" +} + +func findEntryDate(rssItem *RSSItem) time.Time { + value := rssItem.PubDate + if rssItem.DublinCoreDate != "" { + value = rssItem.DublinCoreDate + } + + if value != "" { + result, err := date.Parse(value) + if err != nil { + slog.Debug("Unable to parse date from RSS feed", + slog.String("date", value), + slog.String("guid", rssItem.GUID.Data), + slog.Any("error", err), + ) + return time.Now() + } + + return result + } + + return time.Now() +} + +func findEntryAuthor(rssItem *RSSItem) string { + var author string + + switch { + case rssItem.GooglePlayAuthor != "": + author = rssItem.GooglePlayAuthor + case rssItem.ItunesAuthor != "": + author = rssItem.ItunesAuthor + case rssItem.DublinCoreCreator != "": + author = rssItem.DublinCoreCreator + case rssItem.AtomAuthor.String() != "": + author = rssItem.AtomAuthor.String() + case strings.Contains(rssItem.Author.Inner, " + + + My Podcast Feed + http://example.org + some.email@example.org + + Podcasting with RSS + http://www.example.org/entries/1 + An overview of RSS podcasting + Fri, 15 Jul 2005 00:00:00 -0500 + http://www.example.org/entries/1 + + + + + ` + + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) + } + + if feed.Entries[0].URL != "http://www.example.org/entries/1" { + t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) + } + + if len(feed.Entries[0].Enclosures) != 2 { + t.Errorf("Incorrect number of enclosures, got: %d", len(feed.Entries[0].Enclosures)) + } + + if feed.Entries[0].Enclosures[0].URL != "http://www.example.org/myaudiofile.mp3" { + t.Errorf("Incorrect enclosure URL, got: %s", feed.Entries[0].Enclosures[0].URL) + } + + if feed.Entries[0].Enclosures[0].MimeType != "audio/mpeg" { + t.Errorf("Incorrect enclosure type, got: %s", feed.Entries[0].Enclosures[0].MimeType) + } + + if feed.Entries[0].Enclosures[0].Size != 0 { + t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size) + } + + if feed.Entries[0].Enclosures[1].Size != 0 { + t.Errorf("Incorrect enclosure length, got: %d", feed.Entries[0].Enclosures[0].Size) + } +} + func TestParseEntryWithEmptyEnclosureURL(t *testing.T) { data := ` @@ -1306,6 +1359,60 @@ func TestParseEntryWithMediaPeerLink(t *testing.T) { } } +func TestParseItunesDuration(t *testing.T) { + data := ` + + + Podcast Example + http://www.example.com/index.html + + Podcast Episode + http://example.com/episode.m4a + Tue, 08 Mar 2016 12:00:00 GMT + 1:23:45 + + + ` + + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) + if err != nil { + t.Fatal(err) + } + + expected := 83 + result := feed.Entries[0].ReadingTime + if expected != result { + t.Errorf(`Unexpected podcast duration, got %d instead of %d`, result, expected) + } +} + +func TestParseIncorrectItunesDuration(t *testing.T) { + data := ` + + + Podcast Example + http://www.example.com/index.html + + Podcast Episode + http://example.com/episode.m4a + Tue, 08 Mar 2016 12:00:00 GMT + invalid + + + ` + + feed, err := Parse("https://example.org/", bytes.NewReader([]byte(data))) + if err != nil { + t.Fatal(err) + } + + expected := 0 + result := feed.Entries[0].ReadingTime + if expected != result { + t.Errorf(`Unexpected podcast duration, got %d instead of %d`, result, expected) + } +} + func TestEntryDescriptionFromItunesSummary(t *testing.T) { data := ` diff --git a/internal/reader/rss/podcast.go b/internal/reader/rss/podcast.go index 9a1f365b..7fd93f4a 100644 --- a/internal/reader/rss/podcast.go +++ b/internal/reader/rss/podcast.go @@ -12,8 +12,7 @@ import ( var ErrInvalidDurationFormat = errors.New("rss: invalid duration format") -// normalizeDuration returns the duration tag value as a number of minutes -func normalizeDuration(rawDuration string) (int, error) { +func getDurationInMinutes(rawDuration string) (int, error) { var sumSeconds int durationParts := strings.Split(rawDuration, ":") diff --git a/internal/reader/rss/rss.go b/internal/reader/rss/rss.go index be53c4b0..7935166d 100644 --- a/internal/reader/rss/rss.go +++ b/internal/reader/rss/rss.go @@ -5,391 +5,110 @@ package rss // import "miniflux.app/v2/internal/reader/rss" import ( "encoding/xml" - "html" - "log/slog" - "path" "strconv" "strings" - "time" - "miniflux.app/v2/internal/crypto" - "miniflux.app/v2/internal/model" - "miniflux.app/v2/internal/reader/date" "miniflux.app/v2/internal/reader/dublincore" "miniflux.app/v2/internal/reader/googleplay" "miniflux.app/v2/internal/reader/itunes" "miniflux.app/v2/internal/reader/media" - "miniflux.app/v2/internal/reader/sanitizer" - "miniflux.app/v2/internal/urllib" ) // Specs: https://www.rssboard.org/rss-specification -type rssFeed struct { - XMLName xml.Name `xml:"rss"` +type RSS struct { Version string `xml:"rss version,attr"` - Channel rssChannel `xml:"rss channel"` + Channel RSSChannel `xml:"rss channel"` } -type rssChannel struct { - Categories []string `xml:"rss category"` +type RSSChannel struct { Title string `xml:"rss title"` Link string `xml:"rss link"` - ImageURL string `xml:"rss image>url"` - Language string `xml:"rss language"` Description string `xml:"rss description"` - PubDate string `xml:"rss pubDate"` + Language string `xml:"rss language"` + Copyright string `xml:"rss copyRight"` ManagingEditor string `xml:"rss managingEditor"` Webmaster string `xml:"rss webMaster"` - TimeToLive rssTTL `xml:"rss ttl"` - Items []rssItem `xml:"rss item"` + PubDate string `xml:"rss pubDate"` + LastBuildDate string `xml:"rss lastBuildDate"` + Categories []string `xml:"rss category"` + Generator string `xml:"rss generator"` + Docs string `xml:"rss docs"` + Cloud *RSSCloud `xml:"rss cloud"` + Image *RSSImage `xml:"rss image"` + TTL string `xml:"rss ttl"` + SkipHours []string `xml:"rss skipHours>hour"` + SkipDays []string `xml:"rss skipDays>day"` + Items []RSSItem `xml:"rss item"` AtomLinks - itunes.ItunesFeedElement - googleplay.GooglePlayFeedElement + itunes.ItunesChannelElement + googleplay.GooglePlayChannelElement } -type rssTTL struct { - Data string `xml:",chardata"` +type RSSCloud struct { + Domain string `xml:"domain,attr"` + Port string `xml:"port,attr"` + Path string `xml:"path,attr"` + RegisterProcedure string `xml:"registerProcedure,attr"` + Protocol string `xml:"protocol,attr"` } -func (r *rssTTL) Value() int { - if r.Data == "" { - return 0 - } +type RSSImage struct { + // URL is the URL of a GIF, JPEG or PNG image that represents the channel. + URL string `xml:"url"` - value, err := strconv.Atoi(r.Data) - if err != nil { - return 0 - } + // Title describes the image, it's used in the ALT attribute of the HTML tag when the channel is rendered in HTML. + Title string `xml:"title"` - return value + // Link is the URL of the site, when the channel is rendered, the image is a link to the site. + Link string `xml:"link"` } -func (r *rssFeed) Transform(baseURL string) *model.Feed { - var err error - - feed := new(model.Feed) - - siteURL := r.siteURL() - feed.SiteURL, err = urllib.AbsoluteURL(baseURL, siteURL) - if err != nil { - feed.SiteURL = siteURL - } - - feedURL := r.feedURL() - feed.FeedURL, err = urllib.AbsoluteURL(baseURL, feedURL) - if err != nil { - feed.FeedURL = feedURL - } - - feed.Title = html.UnescapeString(strings.TrimSpace(r.Channel.Title)) - if feed.Title == "" { - feed.Title = feed.SiteURL - } - - feed.IconURL = strings.TrimSpace(r.Channel.ImageURL) - feed.TTL = r.Channel.TimeToLive.Value() - - for _, item := range r.Channel.Items { - entry := item.Transform() - if entry.Author == "" { - entry.Author = r.feedAuthor() - } - - if entry.URL == "" { - entry.URL = feed.SiteURL - } else { - entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL) - if err == nil { - entry.URL = entryURL - } - } - - if entry.Title == "" { - entry.Title = sanitizer.TruncateHTML(entry.Content, 100) - } - - if entry.Title == "" { - entry.Title = entry.URL - } - - entry.Tags = append(entry.Tags, r.Channel.Categories...) - entry.Tags = append(entry.Tags, r.Channel.GetItunesCategories()...) - - if r.Channel.GooglePlayCategory.Text != "" { - entry.Tags = append(entry.Tags, r.Channel.GooglePlayCategory.Text) - } - - feed.Entries = append(feed.Entries, entry) - } - - return feed -} - -func (r *rssFeed) siteURL() string { - return strings.TrimSpace(r.Channel.Link) -} - -func (r *rssFeed) feedURL() string { - for _, atomLink := range r.Channel.AtomLinks.Links { - if atomLink.Rel == "self" { - return strings.TrimSpace(atomLink.URL) - } - } - return "" -} - -func (r rssFeed) feedAuthor() string { - var author string - switch { - case r.Channel.ItunesAuthor != "": - author = r.Channel.ItunesAuthor - case r.Channel.GooglePlayAuthor != "": - author = r.Channel.GooglePlayAuthor - case r.Channel.ItunesOwner.String() != "": - author = r.Channel.ItunesOwner.String() - case r.Channel.ManagingEditor != "": - author = r.Channel.ManagingEditor - case r.Channel.Webmaster != "": - author = r.Channel.Webmaster - } - return sanitizer.StripTags(strings.TrimSpace(author)) -} - -type rssGUID struct { - XMLName xml.Name - Data string `xml:",chardata"` - IsPermaLink string `xml:"isPermaLink,attr"` -} - -type rssAuthor struct { - XMLName xml.Name - Data string `xml:",chardata"` - Inner string `xml:",innerxml"` -} - -type rssEnclosure struct { - URL string `xml:"url,attr"` - Type string `xml:"type,attr"` - Length string `xml:"length,attr"` -} - -func (enclosure *rssEnclosure) Size() int64 { - if enclosure.Length == "" { - return 0 - } - size, _ := strconv.ParseInt(enclosure.Length, 10, 0) - return size -} - -type rssItem struct { - GUID rssGUID `xml:"rss guid"` - Title string `xml:"rss title"` - Link string `xml:"rss link"` - Description string `xml:"rss description"` - PubDate string `xml:"rss pubDate"` - Author rssAuthor `xml:"rss author"` - Comments string `xml:"rss comments"` - EnclosureLinks []rssEnclosure `xml:"rss enclosure"` - Categories []string `xml:"rss category"` +type RSSItem struct { + Title string `xml:"rss title"` + Link string `xml:"rss link"` + Description string `xml:"rss description"` + Author RSSAuthor `xml:"rss author"` + Categories []string `xml:"rss category"` + CommentsURL string `xml:"rss comments"` + Enclosures []RSSEnclosure `xml:"rss enclosure"` + GUID RSSGUID `xml:"rss guid"` + PubDate string `xml:"rss pubDate"` + Source RSSSource `xml:"rss source"` dublincore.DublinCoreItemElement - FeedBurnerElement - media.Element + FeedBurnerItemElement + media.MediaItemElement AtomAuthor AtomLinks itunes.ItunesItemElement googleplay.GooglePlayItemElement } -func (r *rssItem) Transform() *model.Entry { - entry := model.NewEntry() - entry.URL = r.entryURL() - entry.CommentsURL = r.entryCommentsURL() - entry.Date = r.entryDate() - entry.Author = r.entryAuthor() - entry.Hash = r.entryHash() - entry.Content = r.entryContent() - entry.Title = r.entryTitle() - entry.Enclosures = r.entryEnclosures() - entry.Tags = r.Categories - if duration, err := normalizeDuration(r.ItunesDuration); err == nil { - entry.ReadingTime = duration - } - - return entry +type RSSAuthor struct { + XMLName xml.Name + Data string `xml:",chardata"` + Inner string `xml:",innerxml"` } -func (r *rssItem) entryDate() time.Time { - value := r.PubDate - if r.DublinCoreDate != "" { - value = r.DublinCoreDate - } - - if value != "" { - result, err := date.Parse(value) - if err != nil { - slog.Debug("Unable to parse date from RSS feed", - slog.String("date", value), - slog.String("guid", r.GUID.Data), - slog.Any("error", err), - ) - return time.Now() - } - - return result - } - - return time.Now() +type RSSEnclosure struct { + URL string `xml:"url,attr"` + Type string `xml:"type,attr"` + Length string `xml:"length,attr"` } -func (r *rssItem) entryAuthor() string { - var author string - - switch { - case r.GooglePlayAuthor != "": - author = r.GooglePlayAuthor - case r.ItunesAuthor != "": - author = r.ItunesAuthor - case r.DublinCoreCreator != "": - author = r.DublinCoreCreator - case r.AtomAuthor.String() != "": - author = r.AtomAuthor.String() - case strings.Contains(r.Author.Inner, "