Add workaround for entry title with double encoded entities

Example: 'Text'
This commit is contained in:
Frédéric Guillot 2021-02-13 13:26:55 -08:00 committed by fguillot
parent 793f475edd
commit 5043749b9f
2 changed files with 49 additions and 2 deletions

View File

@ -989,7 +989,53 @@ func TestParseItemTitleWithHTMLEntity(t *testing.T) {
}
if feed.Entries[0].Title != "</example>" {
t.Errorf(`Incorrect title, got: %q`, feed.Title)
t.Errorf(`Incorrect title, got: %q`, feed.Entries[0].Title)
}
}
func TestParseItemTitleWithNumericCharacterReference(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
<channel>
<link>https://example.org/</link>
<title>Example</title>
<item>
<title>&#931; &#xDF;</title>
<link>http://www.example.org/article.html</link>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.Entries[0].Title != "Σ ß" {
t.Errorf(`Incorrect title, got: %q`, feed.Entries[0].Title)
}
}
func TestParseItemTitleWithDoubleEncodedEntities(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:slash="http://purl.org/rss/1.0/modules/slash/">
<channel>
<link>https://example.org/</link>
<title>Example</title>
<item>
<title>&amp;#39;Text&amp;#39;</title>
<link>http://www.example.org/article.html</link>
</item>
</channel>
</rss>`
feed, err := Parse("https://example.org/", bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.Entries[0].Title != "'Text'" {
t.Errorf(`Incorrect title, got: %q`, feed.Entries[0].Title)
}
}

View File

@ -6,6 +6,7 @@ package rss // import "miniflux.app/reader/rss"
import (
"encoding/xml"
"html"
"path"
"strconv"
"strings"
@ -257,7 +258,7 @@ func (r *rssItem) entryTitle() string {
}
}
return strings.TrimSpace(title)
return html.UnescapeString(strings.TrimSpace(title))
}
func (r *rssItem) entryContent() string {