Ignore <media:title> in RSS 2.0 feeds

In the vast majority of cases, the default entry title is correct.

Ignoring <media:title> avoid overriding the default title if they are different.
This commit is contained in:
Frédéric Guillot 2020-06-29 18:08:19 -07:00
parent c70bebb2aa
commit 1d6b0491a7
2 changed files with 70 additions and 2 deletions

View File

@ -136,6 +136,51 @@ func TestParseEntryWithoutTitle(t *testing.T) {
}
}
func TestParseEntryWithMediaTitle(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
<channel>
<link>https://example.org/</link>
<item>
<title>Entry Title</title>
<link>https://example.org/item</link>
<media:title>Media Title</media:title>
</item>
</channel>
</rss>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.Entries[0].Title != "Entry Title" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
}
func TestParseEntryWithDCTitleOnly(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/" xmlns:dc="http://purl.org/dc/elements/1.1/">
<channel>
<link>https://example.org/</link>
<item>
<dc:title>Entry Title</dc:title>
<link>https://example.org/item</link>
</item>
</channel>
</rss>`
feed, err := Parse(bytes.NewBufferString(data))
if err != nil {
t.Fatal(err)
}
if feed.Entries[0].Title != "Entry Title" {
t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title)
}
}
func TestParseEntryWithoutLink(t *testing.T) {
data := `<?xml version="1.0" encoding="utf-8"?>
<rss version="2.0">

View File

@ -122,6 +122,12 @@ type rssAuthor struct {
Inner string `xml:",innerxml"`
}
type rssTitle struct {
XMLName xml.Name
Data string `xml:",chardata"`
Inner string `xml:",innerxml"`
}
type rssEnclosure struct {
URL string `xml:"url,attr"`
Type string `xml:"type,attr"`
@ -138,7 +144,7 @@ func (enclosure *rssEnclosure) Size() int64 {
type rssItem struct {
GUID string `xml:"guid"`
Title string `xml:"title"`
Title []rssTitle `xml:"title"`
Links []rssLink `xml:"link"`
Description string `xml:"description"`
PubDate string `xml:"pubDate"`
@ -223,7 +229,24 @@ func (r *rssItem) entryHash() string {
}
func (r *rssItem) entryTitle() string {
return strings.TrimSpace(sanitizer.StripTags(r.Title))
var title string
for _, rssTitle := range r.Title {
switch rssTitle.XMLName.Space {
case "http://search.yahoo.com/mrss/":
// Ignore title in media namespace
case "http://purl.org/dc/elements/1.1/":
title = rssTitle.Data
default:
title = rssTitle.Data
}
if title != "" {
break
}
}
return strings.TrimSpace(sanitizer.StripTags(title))
}
func (r *rssItem) entryContent() string {