Improve Dublin Core support for RDF feeds

This commit is contained in:
Frédéric Guillot 2019-12-23 14:39:54 -08:00
parent 1b33bb3d1c
commit 200b1c304b
3 changed files with 86 additions and 15 deletions

17
reader/rdf/dublincore.go Normal file
View File

@ -0,0 +1,17 @@
// Copyright 2019 Frédéric Guillot. All rights reserved.
// Use of this source code is governed by the Apache 2.0
// license that can be found in the LICENSE file.
package rdf // import "miniflux.app/reader/rdf"
// DublinCoreFeedElement represents Dublin Core feed XML elements.
type DublinCoreFeedElement struct {
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ channel>creator"`
}
// DublinCoreEntryElement represents Dublin Core entry XML elements.
type DublinCoreEntryElement struct {
DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"`
DublinCoreCreator string `xml:"http://purl.org/dc/elements/1.1/ creator"`
DublinCoreContent string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"`
}

View File

@ -486,3 +486,36 @@ func TestParseFeedWithURLWrappedInSpaces(t *testing.T) {
t.Errorf(`Unexpected entry URL, got %q`, feed.Entries[0].URL)
}
}
func TestParseRDFWithContentEncoded(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://purl.org/rss/1.0/"
xmlns:content="http://purl.org/rss/1.0/modules/content/">
<channel>
<title>Example Feed</title>
<link>http://example.org/</link>
</channel>
<item>
<title>Item Title</title>
<link>http://example.org/</link>
<content:encoded><![CDATA[<p>Test</p>]]></content:encoded>
</item>
</rdf:RDF>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if len(feed.Entries) != 1 {
t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries))
}
expected := `<p>Test</p>`
result := feed.Entries[0].Content
if result != expected {
t.Errorf(`Unexpected entry URL, got %q instead of %q`, result, expected)
}
}

View File

@ -21,8 +21,8 @@ type rdfFeed struct {
XMLName xml.Name `xml:"RDF"`
Title string `xml:"channel>title"`
Link string `xml:"channel>link"`
Creator string `xml:"channel>creator"`
Items []rdfItem `xml:"item"`
DublinCoreFeedElement
}
func (r *rdfFeed) Transform() *model.Feed {
@ -32,9 +32,10 @@ func (r *rdfFeed) Transform() *model.Feed {
for _, item := range r.Items {
entry := item.Transform()
if entry.Author == "" && r.Creator != "" {
entry.Author = sanitizer.StripTags(r.Creator)
if entry.Author == "" && r.DublinCoreCreator != "" {
entry.Author = strings.TrimSpace(r.DublinCoreCreator)
}
entry.Author = sanitizer.StripTags(entry.Author)
if entry.URL == "" {
entry.URL = feed.SiteURL
@ -55,24 +56,44 @@ type rdfItem struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
Creator string `xml:"creator"`
Date string `xml:"date"`
DublinCoreEntryElement
}
func (r *rdfItem) Transform() *model.Entry {
entry := new(model.Entry)
entry.Title = strings.TrimSpace(r.Title)
entry.Author = strings.TrimSpace(r.Creator)
entry.URL = strings.TrimSpace(r.Link)
entry.Content = r.Description
entry.Hash = getHash(r)
entry.Date = getDate(r)
entry.Title = r.entryTitle()
entry.Author = r.entryAuthor()
entry.URL = r.entryURL()
entry.Content = r.entryContent()
entry.Hash = r.entryHash()
entry.Date = r.entryDate()
return entry
}
func getDate(r *rdfItem) time.Time {
if r.Date != "" {
result, err := date.Parse(r.Date)
func (r *rdfItem) entryTitle() string {
return strings.TrimSpace(r.Title)
}
func (r *rdfItem) entryContent() string {
switch {
case r.DublinCoreContent != "":
return r.DublinCoreContent
default:
return r.Description
}
}
func (r *rdfItem) entryAuthor() string {
return strings.TrimSpace(r.DublinCoreCreator)
}
func (r *rdfItem) entryURL() string {
return strings.TrimSpace(r.Link)
}
func (r *rdfItem) entryDate() time.Time {
if r.DublinCoreDate != "" {
result, err := date.Parse(r.DublinCoreDate)
if err != nil {
logger.Error("rdf: %v", err)
return time.Now()
@ -84,7 +105,7 @@ func getDate(r *rdfItem) time.Time {
return time.Now()
}
func getHash(r *rdfItem) string {
func (r *rdfItem) entryHash() string {
value := r.Link
if value == "" {
value = r.Title + r.Description