Handle RSS author elements with inner HTML

This commit is contained in:
Frédéric Guillot 2018-03-18 11:57:46 -07:00
parent 34cdffda88
commit 6ea4da3bce
2 changed files with 30 additions and 3 deletions

View File

@ -230,6 +230,31 @@ func TestParseFeedURLWithAtomLink(t *testing.T) {
}
}
func TestParseEntryWithAuthorAndInnerHTML(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
<channel>
<title>Example</title>
<link>https://example.org/</link>
<atom:link href="https://example.org/rss" type="application/rss+xml" rel="self"></atom:link>
<item>
<title>Test</title>
<link>https://example.org/item</link>
<author>by <a itemprop="url" class="author" rel="author" href="/author/foobar">Foo Bar</a></author>
</item>
</channel>
</rss>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Entries[0].Author != "by Foo Bar" {
t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author)
}
}
func TestParseEntryWithAtomAuthor(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">

View File

@ -15,6 +15,7 @@ import (
"github.com/miniflux/miniflux/logger"
"github.com/miniflux/miniflux/model"
"github.com/miniflux/miniflux/reader/date"
"github.com/miniflux/miniflux/reader/sanitizer"
"github.com/miniflux/miniflux/url"
)
@ -56,6 +57,7 @@ type rssAuthor struct {
XMLName xml.Name
Data string `xml:",chardata"`
Name string `xml:"name"`
Inner string `xml:",innerxml"`
}
type rssEnclosure struct {
@ -100,7 +102,7 @@ func (r *rssFeed) Transform() *model.Feed {
if entry.Author == "" && r.ItunesAuthor != "" {
entry.Author = r.ItunesAuthor
}
entry.Author = strings.TrimSpace(entry.Author)
entry.Author = strings.TrimSpace(sanitizer.StripTags(entry.Author))
if entry.URL == "" {
entry.URL = feed.SiteURL
@ -146,8 +148,8 @@ func (r *rssItem) GetAuthor() string {
return element.Name
}
if element.Data != "" {
return element.Data
if element.Inner != "" {
return element.Inner
}
}