From 96f3e888cf0754c0ef4053598bb03399f2284956 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Fri, 19 Mar 2021 18:39:44 -0700 Subject: [PATCH] Handle RDF feed with HTML encoded entry title Example: http://rss.slashdot.org/Slashdot/slashdotMain --- reader/rdf/parser_test.go | 58 +++++++++++++++++++++++++++++++++++++++ reader/rdf/rdf.go | 3 +- 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/reader/rdf/parser_test.go b/reader/rdf/parser_test.go index 9383fb03..c1bc67cd 100644 --- a/reader/rdf/parser_test.go +++ b/reader/rdf/parser_test.go @@ -377,6 +377,31 @@ func TestParseItemWithoutDate(t *testing.T) { } } +func TestParseItemWithEncodedHTMLTitle(t *testing.T) { + data := ` + + + Example + http://example.org + + + + AT&amp;T + Test + http://example.org/test.html + + ` + + feed, err := Parse("http://example.org", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if feed.Entries[0].Title != `AT&T` { + t.Errorf("Incorrect entry title, got: %v", feed.Entries[0].Title) + } +} + func TestParseInvalidXml(t *testing.T) { data := `garbage` _, err := Parse("http://example.org", bytes.NewBufferString(data)) @@ -519,3 +544,36 @@ func TestParseRDFWithContentEncoded(t *testing.T) { t.Errorf(`Unexpected entry URL, got %q instead of %q`, result, expected) } } + +func TestParseRDFWithEncodedHTMLDescription(t *testing.T) { + data := ` + + + Example Feed + http://example.org/ + + + Item Title + http://example.org/ + AT&amp;T <img src="https://example.org/img.png"></a> + + ` + + feed, err := Parse("http://example.org/", bytes.NewBufferString(data)) + if err != nil { + t.Fatal(err) + } + + if len(feed.Entries) != 1 { + t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) + } + + expected := `AT&T ` + result := feed.Entries[0].Content + if result != expected { + t.Errorf(`Unexpected entry URL, got %v instead of %v`, result, expected) + } +} diff --git a/reader/rdf/rdf.go b/reader/rdf/rdf.go index 1710897a..9a86bff7 100644 --- a/reader/rdf/rdf.go +++ b/reader/rdf/rdf.go @@ -6,6 +6,7 @@ package rdf // import "miniflux.app/reader/rdf" import ( "encoding/xml" + "html" "strings" "time" @@ -75,7 +76,7 @@ func (r *rdfItem) Transform() *model.Entry { } func (r *rdfItem) entryTitle() string { - return strings.TrimSpace(r.Title) + return html.UnescapeString(strings.TrimSpace(r.Title)) } func (r *rdfItem) entryContent() string {