Handle Atom feeds with HTML title

This commit is contained in:
Frédéric Guillot 2018-02-17 12:21:58 -08:00
parent 3ba280e10c
commit 9292d5d604
2 changed files with 92 additions and 2 deletions

View File

@ -14,6 +14,7 @@ import (
"github.com/miniflux/miniflux/logger"
"github.com/miniflux/miniflux/model"
"github.com/miniflux/miniflux/reader/date"
"github.com/miniflux/miniflux/reader/sanitizer"
"github.com/miniflux/miniflux/url"
)
@ -28,7 +29,7 @@ type atomFeed struct {
type atomEntry struct {
ID string `xml:"id"`
Title string `xml:"title"`
Title atomContent `xml:"title"`
Updated string `xml:"updated"`
Links []atomLink `xml:"link"`
Summary string `xml:"summary"`
@ -97,7 +98,7 @@ func (a *atomEntry) Transform() *model.Entry {
entry.Author = getAuthor(a.Author)
entry.Hash = getHash(a)
entry.Content = getContent(a)
entry.Title = strings.TrimSpace(a.Title)
entry.Title = getTitle(a)
entry.Enclosures = getEnclosures(a)
return entry
}
@ -160,6 +161,17 @@ func getContent(a *atomEntry) string {
return ""
}
func getTitle(a *atomEntry) string {
title := ""
if a.Title.Type == "xhtml" {
title = a.Title.XML
} else {
title = a.Title.Data
}
return strings.TrimSpace(sanitizer.StripTags(title))
}
func getHash(a *atomEntry) string {
for _, value := range []string{a.ID, getURL(a.Links)} {
if value != "" {

View File

@ -206,6 +206,84 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) {
}
}
func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link href="http://example.org/"/>
<entry>
<title type="html"><![CDATA[Test &#8220;Test&#8221;]]></title>
<link href="http://example.org/2003/12/13/atom03"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
</entry>
</feed>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Entries[0].Title != "Test “Test”" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
}
func TestParseEntryTitleWithHTML(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link href="http://example.org/"/>
<entry>
<title type="html">&lt;code&gt;Test&lt;/code&gt; Test</title>
<link href="http://example.org/2003/12/13/atom03"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
</entry>
</feed>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Entries[0].Title != "Test Test" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
}
func TestParseEntryTitleWithXHTML(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<link href="http://example.org/"/>
<entry>
<title type="xhtml"><code>Test</code> Test</title>
<link href="http://example.org/2003/12/13/atom03"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
</entry>
</feed>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Error(err)
}
if feed.Entries[0].Title != "Test Test" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
}
func TestParseEntryWithAuthorName(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">