From 9292d5d6049de7a876c5428039c63e7c7720452b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Sat, 17 Feb 2018 12:21:58 -0800 Subject: [PATCH] Handle Atom feeds with HTML title --- reader/atom/atom.go | 16 +++++++- reader/atom/parser_test.go | 78 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 2 deletions(-) diff --git a/reader/atom/atom.go b/reader/atom/atom.go index 68a3903c..46971259 100644 --- a/reader/atom/atom.go +++ b/reader/atom/atom.go @@ -14,6 +14,7 @@ import ( "github.com/miniflux/miniflux/logger" "github.com/miniflux/miniflux/model" "github.com/miniflux/miniflux/reader/date" + "github.com/miniflux/miniflux/reader/sanitizer" "github.com/miniflux/miniflux/url" ) @@ -28,7 +29,7 @@ type atomFeed struct { type atomEntry struct { ID string `xml:"id"` - Title string `xml:"title"` + Title atomContent `xml:"title"` Updated string `xml:"updated"` Links []atomLink `xml:"link"` Summary string `xml:"summary"` @@ -97,7 +98,7 @@ func (a *atomEntry) Transform() *model.Entry { entry.Author = getAuthor(a.Author) entry.Hash = getHash(a) entry.Content = getContent(a) - entry.Title = strings.TrimSpace(a.Title) + entry.Title = getTitle(a) entry.Enclosures = getEnclosures(a) return entry } @@ -160,6 +161,17 @@ func getContent(a *atomEntry) string { return "" } +func getTitle(a *atomEntry) string { + title := "" + if a.Title.Type == "xhtml" { + title = a.Title.XML + } else { + title = a.Title.Data + } + + return strings.TrimSpace(sanitizer.StripTags(title)) +} + func getHash(a *atomEntry) string { for _, value := range []string{a.ID, getURL(a.Links)} { if value != "" { diff --git a/reader/atom/parser_test.go b/reader/atom/parser_test.go index be07383c..ec9186c4 100644 --- a/reader/atom/parser_test.go +++ b/reader/atom/parser_test.go @@ -206,6 +206,84 @@ func TestParseEntryTitleWithWhitespaces(t *testing.T) { } } +func TestParseEntryTitleWithHTMLAndCDATA(t *testing.T) { + data := ` + + Example Feed + + + + <![CDATA[Test “Test”]]> + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Title != "Test “Test”" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithHTML(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Title != "Test Test" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + +func TestParseEntryTitleWithXHTML(t *testing.T) { + data := ` + + Example Feed + + + + <code>Test</code> Test + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + 2003-12-13T18:30:02Z + Some text. + + + ` + + feed, err := Parse(bytes.NewBufferString(data)) + if err != nil { + t.Error(err) + } + + if feed.Entries[0].Title != "Test Test" { + t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) + } +} + func TestParseEntryWithAuthorName(t *testing.T) { data := `