mirror of https://github.com/miniflux/v2.git
Refactor RDF parser to use an adapter
Avoid tight coupling between `model.Feed` and the original XML RDF feed.
This commit is contained in:
parent
ee3486af66
commit
6bc4b35e38
|
@ -3,29 +3,13 @@
|
||||||
|
|
||||||
package dublincore // import "miniflux.app/v2/internal/reader/dublincore"
|
package dublincore // import "miniflux.app/v2/internal/reader/dublincore"
|
||||||
|
|
||||||
import (
|
type DublinCoreChannelElement struct {
|
||||||
"strings"
|
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
||||||
|
|
||||||
"miniflux.app/v2/internal/reader/sanitizer"
|
|
||||||
)
|
|
||||||
|
|
||||||
// DublinCoreFeedElement represents Dublin Core feed XML elements.
|
|
||||||
type DublinCoreFeedElement struct {
|
|
||||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (feed *DublinCoreFeedElement) GetSanitizedCreator() string {
|
|
||||||
return strings.TrimSpace(sanitizer.StripTags(feed.DublinCoreCreator))
|
|
||||||
}
|
|
||||||
|
|
||||||
// DublinCoreItemElement represents Dublin Core entry XML elements.
|
|
||||||
type DublinCoreItemElement struct {
|
type DublinCoreItemElement struct {
|
||||||
DublinCoreTitle string `xml:"http://purl.org/dc/elements/1.1/ title"`
|
DublinCoreTitle string `xml:"http://purl.org/dc/elements/1.1/ title"`
|
||||||
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
|
||||||
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
|
||||||
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (item *DublinCoreItemElement) GetSanitizedCreator() string {
|
|
||||||
return strings.TrimSpace(sanitizer.StripTags(item.DublinCoreCreator))
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,115 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: Apache-2.0
|
||||||
|
|
||||||
|
package rdf // import "miniflux.app/v2/internal/reader/rdf"
|
||||||
|
|
||||||
|
import (
|
||||||
|
"html"
|
||||||
|
"log/slog"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"miniflux.app/v2/internal/crypto"
|
||||||
|
"miniflux.app/v2/internal/model"
|
||||||
|
"miniflux.app/v2/internal/reader/date"
|
||||||
|
"miniflux.app/v2/internal/reader/sanitizer"
|
||||||
|
"miniflux.app/v2/internal/urllib"
|
||||||
|
)
|
||||||
|
|
||||||
|
type RDFAdapter struct {
|
||||||
|
rdf *RDF
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewRDFAdapter(rdf *RDF) *RDFAdapter {
|
||||||
|
return &RDFAdapter{rdf}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *RDFAdapter) BuildFeed(feedURL string) *model.Feed {
|
||||||
|
feed := &model.Feed{
|
||||||
|
Title: stripTags(r.rdf.Channel.Title),
|
||||||
|
FeedURL: feedURL,
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Title == "" {
|
||||||
|
feed.Title = feedURL
|
||||||
|
}
|
||||||
|
|
||||||
|
if siteURL, err := urllib.AbsoluteURL(feedURL, r.rdf.Channel.Link); err != nil {
|
||||||
|
feed.SiteURL = r.rdf.Channel.Link
|
||||||
|
} else {
|
||||||
|
feed.SiteURL = siteURL
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range r.rdf.Items {
|
||||||
|
entry := model.NewEntry()
|
||||||
|
itemLink := strings.TrimSpace(item.Link)
|
||||||
|
|
||||||
|
// Populate the entry URL.
|
||||||
|
if itemLink == "" {
|
||||||
|
entry.URL = feed.SiteURL // Fallback to the feed URL if the entry URL is empty.
|
||||||
|
} else if entryURL, err := urllib.AbsoluteURL(feed.SiteURL, itemLink); err == nil {
|
||||||
|
entry.URL = entryURL
|
||||||
|
} else {
|
||||||
|
entry.URL = itemLink
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populate the entry title.
|
||||||
|
for _, title := range []string{item.Title, item.DublinCoreTitle} {
|
||||||
|
title = strings.TrimSpace(title)
|
||||||
|
if title != "" {
|
||||||
|
entry.Title = html.UnescapeString(title)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the entry title is empty, we use the entry URL as a fallback.
|
||||||
|
if entry.Title == "" {
|
||||||
|
entry.Title = entry.URL
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populate the entry content.
|
||||||
|
if item.DublinCoreContent != "" {
|
||||||
|
entry.Content = item.DublinCoreContent
|
||||||
|
} else {
|
||||||
|
entry.Content = item.Description
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate the entry hash.
|
||||||
|
hashValue := itemLink
|
||||||
|
if hashValue == "" {
|
||||||
|
hashValue = item.Title + item.Description // Fallback to the title and description if the link is empty.
|
||||||
|
}
|
||||||
|
|
||||||
|
entry.Hash = crypto.Hash(hashValue)
|
||||||
|
|
||||||
|
// Populate the entry date.
|
||||||
|
entry.Date = time.Now()
|
||||||
|
if item.DublinCoreDate != "" {
|
||||||
|
if itemDate, err := date.Parse(item.DublinCoreDate); err != nil {
|
||||||
|
slog.Debug("Unable to parse date from RDF feed",
|
||||||
|
slog.String("date", item.DublinCoreDate),
|
||||||
|
slog.String("link", itemLink),
|
||||||
|
slog.Any("error", err),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
entry.Date = itemDate
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Populate the entry author.
|
||||||
|
switch {
|
||||||
|
case item.DublinCoreCreator != "":
|
||||||
|
entry.Author = stripTags(item.DublinCoreCreator)
|
||||||
|
case r.rdf.Channel.DublinCoreCreator != "":
|
||||||
|
entry.Author = stripTags(r.rdf.Channel.DublinCoreCreator)
|
||||||
|
}
|
||||||
|
|
||||||
|
feed.Entries = append(feed.Entries, entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
return feed
|
||||||
|
}
|
||||||
|
|
||||||
|
func stripTags(value string) string {
|
||||||
|
return strings.TrimSpace(sanitizer.StripTags(value))
|
||||||
|
}
|
|
@ -13,10 +13,10 @@ import (
|
||||||
|
|
||||||
// Parse returns a normalized feed struct from a RDF feed.
|
// Parse returns a normalized feed struct from a RDF feed.
|
||||||
func Parse(baseURL string, data io.ReadSeeker) (*model.Feed, error) {
|
func Parse(baseURL string, data io.ReadSeeker) (*model.Feed, error) {
|
||||||
feed := new(rdfFeed)
|
xmlFeed := new(RDF)
|
||||||
if err := xml.NewXMLDecoder(data).Decode(feed); err != nil {
|
if err := xml.NewXMLDecoder(data).Decode(xmlFeed); err != nil {
|
||||||
return nil, fmt.Errorf("rdf: unable to parse feed: %w", err)
|
return nil, fmt.Errorf("rdf: unable to parse feed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return feed.Transform(baseURL), nil
|
return NewRDFAdapter(xmlFeed).BuildFeed(baseURL), nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -228,63 +228,87 @@ func TestParseRDFSampleWithDublinCore(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
|
func TestParseRDFFeedWithEmptyTitle(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
|
||||||
<rdf:RDF
|
<rdf:RDF
|
||||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
xmlns="http://purl.org/rss/1.0/">
|
||||||
xmlns="http://purl.org/rss/1.0/"
|
|
||||||
>
|
|
||||||
|
|
||||||
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
|
||||||
<title>Meerkat</title>
|
|
||||||
<link>http://meerkat.oreillynet.com</link>
|
|
||||||
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
|
|
||||||
</channel>
|
|
||||||
|
|
||||||
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
|
||||||
<title>XML: A Disruptive Technology</title>
|
|
||||||
<link>http://c.moreover.com/click/here.pl?r123</link>
|
|
||||||
<dc:description>
|
|
||||||
XML is placing increasingly heavy loads on the existing technical
|
|
||||||
infrastructure of the Internet.
|
|
||||||
</dc:description>
|
|
||||||
</item>
|
|
||||||
</rdf:RDF>`
|
|
||||||
|
|
||||||
feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data)))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
|
|
||||||
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseItemRelativeURL(t *testing.T) {
|
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
|
||||||
<channel>
|
<channel>
|
||||||
<title>Example</title>
|
<link>http://example.org/item</link>
|
||||||
<link>http://example.org</link>
|
|
||||||
</channel>
|
</channel>
|
||||||
|
|
||||||
<item>
|
<item>
|
||||||
<title>Title</title>
|
<title>Example</title>
|
||||||
|
<link>http://example.org/item</link>
|
||||||
<description>Test</description>
|
<description>Test</description>
|
||||||
<link>something.html</link>
|
|
||||||
</item>
|
</item>
|
||||||
</rdf:RDF>`
|
</rdf:RDF>`
|
||||||
|
|
||||||
feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data)))
|
feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if feed.Entries[0].URL != "http://example.org/something.html" {
|
if feed.Title != "http://example.org/feed" {
|
||||||
t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
|
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRDFFeedWithEmptyLink(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/">
|
||||||
|
<channel>
|
||||||
|
<title>Example Feed</title>
|
||||||
|
</channel>
|
||||||
|
<item>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>http://example.org/item</link>
|
||||||
|
<description>Test</description>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.SiteURL != "http://example.org/feed" {
|
||||||
|
t.Errorf(`Incorrect SiteURL, got: %q`, feed.SiteURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.FeedURL != "http://example.org/feed" {
|
||||||
|
t.Errorf(`Incorrect FeedURL, got: %q`, feed.FeedURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRDFFeedWithRelativeLink(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/">
|
||||||
|
<channel>
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link>/test/index.html</link>
|
||||||
|
</channel>
|
||||||
|
<item>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>http://example.org/item</link>
|
||||||
|
<description>Test</description>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.SiteURL != "http://example.org/test/index.html" {
|
||||||
|
t.Errorf(`Incorrect SiteURL, got: %q`, feed.SiteURL)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.FeedURL != "http://example.org/feed" {
|
||||||
|
t.Errorf(`Incorrect FeedURL, got: %q`, feed.FeedURL)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -321,63 +345,7 @@ func TestParseItemWithoutLink(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseItemWithDublicCoreDate(t *testing.T) {
|
func TestParseItemRelativeURL(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
|
||||||
<channel>
|
|
||||||
<title>Example</title>
|
|
||||||
<link>http://example.org</link>
|
|
||||||
</channel>
|
|
||||||
|
|
||||||
<item>
|
|
||||||
<title>Title</title>
|
|
||||||
<description>Test</description>
|
|
||||||
<link>http://example.org/test.html</link>
|
|
||||||
<dc:creator>Tester</dc:creator>
|
|
||||||
<dc:date>2018-04-10T05:00:00+00:00</dc:date>
|
|
||||||
</item>
|
|
||||||
</rdf:RDF>`
|
|
||||||
|
|
||||||
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
expectedDate := time.Date(2018, time.April, 10, 5, 0, 0, 0, time.UTC)
|
|
||||||
if !feed.Entries[0].Date.Equal(expectedDate) {
|
|
||||||
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) {
|
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
|
||||||
<channel>
|
|
||||||
<title>Example</title>
|
|
||||||
<link>http://example.org</link>
|
|
||||||
</channel>
|
|
||||||
|
|
||||||
<item>
|
|
||||||
<title>Title</title>
|
|
||||||
<description>Test</description>
|
|
||||||
<link>http://example.org/test.html</link>
|
|
||||||
<dc:creator><a href="http://example.org/author1">Author 1</a> (University 1), <a href="http://example.org/author2">Author 2</a> (University 2)</dc:creator>
|
|
||||||
<dc:date>2018-04-10T05:00:00+00:00</dc:date>
|
|
||||||
</item>
|
|
||||||
</rdf:RDF>`
|
|
||||||
|
|
||||||
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
expectedAuthor := "Author 1 (University 1), Author 2 (University 2)"
|
|
||||||
if feed.Entries[0].Author != expectedAuthor {
|
|
||||||
t.Errorf("Incorrect entry author, got: %s, want: %s", feed.Entries[0].Author, expectedAuthor)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseItemWithoutDate(t *testing.T) {
|
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||||
<channel>
|
<channel>
|
||||||
|
@ -388,90 +356,17 @@ func TestParseItemWithoutDate(t *testing.T) {
|
||||||
<item>
|
<item>
|
||||||
<title>Title</title>
|
<title>Title</title>
|
||||||
<description>Test</description>
|
<description>Test</description>
|
||||||
<link>http://example.org/test.html</link>
|
<link>something.html</link>
|
||||||
</item>
|
</item>
|
||||||
</rdf:RDF>`
|
</rdf:RDF>`
|
||||||
|
|
||||||
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
expectedDate := time.Now().In(time.Local)
|
if feed.Entries[0].URL != "http://example.org/something.html" {
|
||||||
diff := expectedDate.Sub(feed.Entries[0].Date)
|
t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL)
|
||||||
if diff > time.Second {
|
|
||||||
t.Errorf("Incorrect entry date, got: %v", diff)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseItemWithEncodedHTMLTitle(t *testing.T) {
|
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
|
||||||
<channel>
|
|
||||||
<title>Example</title>
|
|
||||||
<link>http://example.org</link>
|
|
||||||
</channel>
|
|
||||||
|
|
||||||
<item>
|
|
||||||
<title>AT&amp;T</title>
|
|
||||||
<description>Test</description>
|
|
||||||
<link>http://example.org/test.html</link>
|
|
||||||
</item>
|
|
||||||
</rdf:RDF>`
|
|
||||||
|
|
||||||
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if feed.Entries[0].Title != `AT&T` {
|
|
||||||
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseInvalidXml(t *testing.T) {
|
|
||||||
data := `garbage`
|
|
||||||
_, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
|
||||||
if err == nil {
|
|
||||||
t.Fatal("Parse should returns an error")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseFeedWithHTMLEntity(t *testing.T) {
|
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
|
||||||
<channel>
|
|
||||||
<title>Example Feed</title>
|
|
||||||
<link>http://example.org</link>
|
|
||||||
</channel>
|
|
||||||
</rdf:RDF>`
|
|
||||||
|
|
||||||
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if feed.Title != "Example \u00a0 Feed" {
|
|
||||||
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
|
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
|
||||||
<channel>
|
|
||||||
<title>Example Feed</title>
|
|
||||||
<link>http://example.org/a&b</link>
|
|
||||||
</channel>
|
|
||||||
</rdf:RDF>`
|
|
||||||
|
|
||||||
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
|
||||||
if err != nil {
|
|
||||||
t.Fatal(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if feed.SiteURL != "http://example.org/a&b" {
|
|
||||||
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -539,6 +434,130 @@ func TestParseFeedWithURLWrappedInSpaces(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseRDFItemWitEmptyTitleElement(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/">
|
||||||
|
<channel>
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link>http://example.org/</link>
|
||||||
|
</channel>
|
||||||
|
<item>
|
||||||
|
<title> </title>
|
||||||
|
<link>http://example.org/item</link>
|
||||||
|
<description>Test</description>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(feed.Entries) != 1 {
|
||||||
|
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := `http://example.org/item`
|
||||||
|
result := feed.Entries[0].Title
|
||||||
|
if result != expected {
|
||||||
|
t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRDFItemWithDublinCoreTitleElement(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||||
|
<channel>
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link>http://example.org/</link>
|
||||||
|
</channel>
|
||||||
|
<item>
|
||||||
|
<dc:title>Dublin Core Title</dc:title>
|
||||||
|
<link>http://example.org/</link>
|
||||||
|
<description>Test</description>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(feed.Entries) != 1 {
|
||||||
|
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := `Dublin Core Title`
|
||||||
|
result := feed.Entries[0].Title
|
||||||
|
if result != expected {
|
||||||
|
t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRDFItemWithDuplicateTitleElement(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF
|
||||||
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
||||||
|
<channel>
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<link>http://example.org/</link>
|
||||||
|
</channel>
|
||||||
|
<item>
|
||||||
|
<title>Item Title</title>
|
||||||
|
<dc:title/>
|
||||||
|
<link>http://example.org/</link>
|
||||||
|
<description>Test</description>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(feed.Entries) != 1 {
|
||||||
|
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
||||||
|
}
|
||||||
|
|
||||||
|
expected := `Item Title`
|
||||||
|
result := feed.Entries[0].Title
|
||||||
|
if result != expected {
|
||||||
|
t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseItemWithEncodedHTMLTitle(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||||
|
<channel>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>http://example.org</link>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>AT&amp;T</title>
|
||||||
|
<description>Test</description>
|
||||||
|
<link>http://example.org/test.html</link>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if feed.Entries[0].Title != `AT&T` {
|
||||||
|
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseRDFWithContentEncoded(t *testing.T) {
|
func TestParseRDFWithContentEncoded(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rdf:RDF
|
<rdf:RDF
|
||||||
|
@ -605,101 +624,194 @@ func TestParseRDFWithEncodedHTMLDescription(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseRDFItemWithDuplicateTitleElement(t *testing.T) {
|
func TestParseItemWithoutDate(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||||
|
<channel>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>http://example.org</link>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>Title</title>
|
||||||
|
<description>Test</description>
|
||||||
|
<link>http://example.org/test.html</link>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedDate := time.Now().In(time.Local)
|
||||||
|
diff := expectedDate.Sub(feed.Entries[0].Date)
|
||||||
|
if diff > time.Second {
|
||||||
|
t.Errorf("Incorrect entry date, got: %v", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseItemWithDublicCoreDate(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
||||||
|
<channel>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>http://example.org</link>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>Title</title>
|
||||||
|
<description>Test</description>
|
||||||
|
<link>http://example.org/test.html</link>
|
||||||
|
<dc:creator>Tester</dc:creator>
|
||||||
|
<dc:date>2018-04-10T05:00:00+00:00</dc:date>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedDate := time.Date(2018, time.April, 10, 5, 0, 0, 0, time.UTC)
|
||||||
|
if !feed.Entries[0].Date.Equal(expectedDate) {
|
||||||
|
t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseItemWithInvalidDublicCoreDate(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
||||||
|
<channel>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>http://example.org</link>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>Title</title>
|
||||||
|
<description>Test</description>
|
||||||
|
<link>http://example.org/test.html</link>
|
||||||
|
<dc:creator>Tester</dc:creator>
|
||||||
|
<dc:date>20-04-10T05:00:00+00:00</dc:date>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedDate := time.Now().In(time.Local)
|
||||||
|
diff := expectedDate.Sub(feed.Entries[0].Date)
|
||||||
|
if diff > time.Second {
|
||||||
|
t.Errorf("Incorrect entry date, got: %v", diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) {
|
||||||
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
|
||||||
|
<channel>
|
||||||
|
<title>Example</title>
|
||||||
|
<link>http://example.org</link>
|
||||||
|
</channel>
|
||||||
|
|
||||||
|
<item>
|
||||||
|
<title>Title</title>
|
||||||
|
<description>Test</description>
|
||||||
|
<link>http://example.org/test.html</link>
|
||||||
|
<dc:creator><a href="http://example.org/author1">Author 1</a> (University 1), <a href="http://example.org/author2">Author 2</a> (University 2)</dc:creator>
|
||||||
|
<dc:date>2018-04-10T05:00:00+00:00</dc:date>
|
||||||
|
</item>
|
||||||
|
</rdf:RDF>`
|
||||||
|
|
||||||
|
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedAuthor := "Author 1 (University 1), Author 2 (University 2)"
|
||||||
|
if feed.Entries[0].Author != expectedAuthor {
|
||||||
|
t.Errorf("Incorrect entry author, got: %s, want: %s", feed.Entries[0].Author, expectedAuthor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseItemWithOnlyFeedAuthor(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rdf:RDF
|
<rdf:RDF
|
||||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||||
|
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||||
xmlns="http://purl.org/rss/1.0/"
|
xmlns="http://purl.org/rss/1.0/"
|
||||||
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
>
|
||||||
<channel>
|
|
||||||
<title>Example Feed</title>
|
<channel rdf:about="http://meerkat.oreillynet.com/?_fl=rss1.0">
|
||||||
<link>http://example.org/</link>
|
<title>Meerkat</title>
|
||||||
|
<link>http://meerkat.oreillynet.com</link>
|
||||||
|
<dc:creator>Rael Dornfest (mailto:rael@oreilly.com)</dc:creator>
|
||||||
</channel>
|
</channel>
|
||||||
<item>
|
|
||||||
<title>Item Title</title>
|
<item rdf:about="http://c.moreover.com/click/here.pl?r123">
|
||||||
<dc:title/>
|
<title>XML: A Disruptive Technology</title>
|
||||||
<link>http://example.org/</link>
|
<link>http://c.moreover.com/click/here.pl?r123</link>
|
||||||
<description>Test</description>
|
<dc:description>
|
||||||
|
XML is placing increasingly heavy loads on the existing technical
|
||||||
|
infrastructure of the Internet.
|
||||||
|
</dc:description>
|
||||||
</item>
|
</item>
|
||||||
</rdf:RDF>`
|
</rdf:RDF>`
|
||||||
|
|
||||||
feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
|
feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(feed.Entries) != 1 {
|
if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" {
|
||||||
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
|
||||||
}
|
|
||||||
|
|
||||||
expected := `Item Title`
|
|
||||||
result := feed.Entries[0].Title
|
|
||||||
if result != expected {
|
|
||||||
t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseRDFItemWithDublinCoreTitleElement(t *testing.T) {
|
func TestParseInvalidXml(t *testing.T) {
|
||||||
|
data := `garbage`
|
||||||
|
_, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("Parse should returns an error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseFeedWithHTMLEntity(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rdf:RDF
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
||||||
xmlns="http://purl.org/rss/1.0/"
|
|
||||||
xmlns:dc="http://purl.org/dc/elements/1.1/">
|
|
||||||
<channel>
|
<channel>
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
<link>http://example.org/</link>
|
<link>http://example.org</link>
|
||||||
</channel>
|
</channel>
|
||||||
<item>
|
|
||||||
<dc:title>Dublin Core Title</dc:title>
|
|
||||||
<link>http://example.org/</link>
|
|
||||||
<description>Test</description>
|
|
||||||
</item>
|
|
||||||
</rdf:RDF>`
|
</rdf:RDF>`
|
||||||
|
|
||||||
feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
|
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(feed.Entries) != 1 {
|
if feed.Title != "Example \u00a0 Feed" {
|
||||||
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
t.Errorf(`Incorrect title, got: %q`, feed.Title)
|
||||||
}
|
|
||||||
|
|
||||||
expected := `Dublin Core Title`
|
|
||||||
result := feed.Entries[0].Title
|
|
||||||
if result != expected {
|
|
||||||
t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestParseRDFItemWitEmptyTitleElement(t *testing.T) {
|
func TestParseFeedWithInvalidCharacterEntity(t *testing.T) {
|
||||||
data := `<?xml version="1.0" encoding="utf-8"?>
|
data := `<?xml version="1.0" encoding="utf-8"?>
|
||||||
<rdf:RDF
|
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/">
|
||||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
|
||||||
xmlns="http://purl.org/rss/1.0/">
|
|
||||||
<channel>
|
<channel>
|
||||||
<title>Example Feed</title>
|
<title>Example Feed</title>
|
||||||
<link>http://example.org/</link>
|
<link>http://example.org/a&b</link>
|
||||||
</channel>
|
</channel>
|
||||||
<item>
|
|
||||||
<title> </title>
|
|
||||||
<link>http://example.org/item</link>
|
|
||||||
<description>Test</description>
|
|
||||||
</item>
|
|
||||||
</rdf:RDF>`
|
</rdf:RDF>`
|
||||||
|
|
||||||
feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data)))
|
feed, err := Parse("http://example.org", bytes.NewReader([]byte(data)))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(feed.Entries) != 1 {
|
if feed.SiteURL != "http://example.org/a&b" {
|
||||||
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
|
t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL)
|
||||||
}
|
|
||||||
|
|
||||||
expected := `http://example.org/item`
|
|
||||||
result := feed.Entries[0].Title
|
|
||||||
if result != expected {
|
|
||||||
t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,130 +5,27 @@ package rdf // import "miniflux.app/v2/internal/reader/rdf"
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"html"
|
|
||||||
"log/slog"
|
|
||||||
"strings"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"miniflux.app/v2/internal/crypto"
|
|
||||||
"miniflux.app/v2/internal/model"
|
|
||||||
"miniflux.app/v2/internal/reader/date"
|
|
||||||
"miniflux.app/v2/internal/reader/dublincore"
|
"miniflux.app/v2/internal/reader/dublincore"
|
||||||
"miniflux.app/v2/internal/reader/sanitizer"
|
|
||||||
"miniflux.app/v2/internal/urllib"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type rdfFeed struct {
|
// RDF sepcs: https://web.resource.org/rss/1.0/spec
|
||||||
XMLName xml.Name `xml:"RDF"`
|
type RDF struct {
|
||||||
Title string `xml:"channel>title"`
|
XMLName xml.Name `xml:"http://www.w3.org/1999/02/22-rdf-syntax-ns# RDF"`
|
||||||
Link string `xml:"channel>link"`
|
Channel RDFChannel `xml:"channel"`
|
||||||
Items []rdfItem `xml:"item"`
|
Items []RDFItem `xml:"item"`
|
||||||
dublincore.DublinCoreFeedElement
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *rdfFeed) Transform(baseURL string) *model.Feed {
|
type RDFChannel struct {
|
||||||
var err error
|
Title string `xml:"title"`
|
||||||
feed := new(model.Feed)
|
Link string `xml:"link"`
|
||||||
feed.Title = sanitizer.StripTags(r.Title)
|
Description string `xml:"description"`
|
||||||
feed.FeedURL = baseURL
|
dublincore.DublinCoreChannelElement
|
||||||
feed.SiteURL, err = urllib.AbsoluteURL(baseURL, r.Link)
|
|
||||||
if err != nil {
|
|
||||||
feed.SiteURL = r.Link
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, item := range r.Items {
|
type RDFItem struct {
|
||||||
entry := item.Transform()
|
|
||||||
if entry.Author == "" && r.DublinCoreCreator != "" {
|
|
||||||
entry.Author = r.GetSanitizedCreator()
|
|
||||||
}
|
|
||||||
|
|
||||||
if entry.URL == "" {
|
|
||||||
entry.URL = feed.SiteURL
|
|
||||||
} else {
|
|
||||||
entryURL, err := urllib.AbsoluteURL(feed.SiteURL, entry.URL)
|
|
||||||
if err == nil {
|
|
||||||
entry.URL = entryURL
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
feed.Entries = append(feed.Entries, entry)
|
|
||||||
}
|
|
||||||
|
|
||||||
return feed
|
|
||||||
}
|
|
||||||
|
|
||||||
type rdfItem struct {
|
|
||||||
Title string `xml:"http://purl.org/rss/1.0/ title"`
|
Title string `xml:"http://purl.org/rss/1.0/ title"`
|
||||||
Link string `xml:"link"`
|
Link string `xml:"link"`
|
||||||
Description string `xml:"description"`
|
Description string `xml:"description"`
|
||||||
dublincore.DublinCoreItemElement
|
dublincore.DublinCoreItemElement
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *rdfItem) Transform() *model.Entry {
|
|
||||||
entry := model.NewEntry()
|
|
||||||
entry.Title = r.entryTitle()
|
|
||||||
entry.Author = r.entryAuthor()
|
|
||||||
entry.URL = r.entryURL()
|
|
||||||
entry.Content = r.entryContent()
|
|
||||||
entry.Hash = r.entryHash()
|
|
||||||
entry.Date = r.entryDate()
|
|
||||||
|
|
||||||
if entry.Title == "" {
|
|
||||||
entry.Title = entry.URL
|
|
||||||
}
|
|
||||||
return entry
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *rdfItem) entryTitle() string {
|
|
||||||
for _, title := range []string{r.Title, r.DublinCoreTitle} {
|
|
||||||
title = strings.TrimSpace(title)
|
|
||||||
if title != "" {
|
|
||||||
return html.UnescapeString(title)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *rdfItem) entryContent() string {
|
|
||||||
switch {
|
|
||||||
case r.DublinCoreContent != "":
|
|
||||||
return r.DublinCoreContent
|
|
||||||
default:
|
|
||||||
return r.Description
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *rdfItem) entryAuthor() string {
|
|
||||||
return r.GetSanitizedCreator()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *rdfItem) entryURL() string {
|
|
||||||
return strings.TrimSpace(r.Link)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *rdfItem) entryDate() time.Time {
|
|
||||||
if r.DublinCoreDate != "" {
|
|
||||||
result, err := date.Parse(r.DublinCoreDate)
|
|
||||||
if err != nil {
|
|
||||||
slog.Debug("Unable to parse date from RDF feed",
|
|
||||||
slog.String("date", r.DublinCoreDate),
|
|
||||||
slog.String("link", r.Link),
|
|
||||||
slog.Any("error", err),
|
|
||||||
)
|
|
||||||
return time.Now()
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
|
||||||
return time.Now()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r *rdfItem) entryHash() string {
|
|
||||||
value := r.Link
|
|
||||||
if value == "" {
|
|
||||||
value = r.Title + r.Description
|
|
||||||
}
|
|
||||||
|
|
||||||
return crypto.Hash(value)
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue