// SPDX-FileCopyrightText: Copyright The Miniflux Authors. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package rdf // import "miniflux.app/v2/internal/reader/rdf" import ( "bytes" "strings" "testing" "time" ) func TestParseRDFSample(t *testing.T) { data := ` XML.com http://xml.com/pub XML.com features a rich mix of information and services for the XML community. XML.com http://www.xml.com http://xml.com/universal/images/xml_tiny.gif Processing Inclusions with XSLT http://xml.com/pub/2000/08/09/xslt/xslt.html Processing document inclusions with general XML tools can be problematic. This article proposes a way of preserving inclusion information through SAX-based processing. Putting RDF to Work http://xml.com/pub/2000/08/09/rdfdb/index.html Tool and API support for the Resource Description Framework is slowly coming of age. Edd Dumbill takes a look at RDFDB, one of the most exciting new RDF toolkits. Search XML.com Search XML.com's XML collection s http://search.xml.com ` feed, err := Parse("http://xml.com/pub/rdf.xml", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.Title != "XML.com" { t.Errorf("Incorrect title, got: %s", feed.Title) } if feed.FeedURL != "http://xml.com/pub/rdf.xml" { t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) } if feed.SiteURL != "http://xml.com/pub" { t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) } if len(feed.Entries) != 2 { t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) } if feed.Entries[1].Hash != "8aaeee5d3ab50351422fbded41078ee88c73bf1441085b16a8c09fd90a7db321" { t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) } if feed.Entries[1].URL != "http://xml.com/pub/2000/08/09/rdfdb/index.html" { t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) } if feed.Entries[1].Title != "Putting RDF to Work" { t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) } if strings.HasSuffix(feed.Entries[1].Content, "Tool and API support") { t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) } if feed.Entries[1].Date.Year() != time.Now().Year() { t.Errorf("Entry date should not be empty") } } func TestParseRDFSampleWithDublinCore(t *testing.T) { data := ` Meerkat http://meerkat.oreillynet.com Meerkat: An Open Wire Service The O'Reilly Network Rael Dornfest (mailto:rael@oreilly.com) Copyright © 2000 O'Reilly & Associates, Inc. 2000-01-01T12:00+00:00 hourly 2 2000-01-01T12:00+00:00 Meerkat Powered! http://meerkat.oreillynet.com/icons/meerkat-powered.jpg http://meerkat.oreillynet.com XML: A Disruptive Technology http://c.moreover.com/click/here.pl?r123 XML is placing increasingly heavy loads on the existing technical infrastructure of the Internet. The O'Reilly Network Simon St.Laurent (mailto:simonstl@simonstl.com) Copyright © 2000 O'Reilly & Associates, Inc. XML XML.com NASDAQ XML Search Meerkat Search Meerkat's RSS Database... s http://meerkat.oreillynet.com/ search regex ` feed, err := Parse("http://meerkat.oreillynet.com/feed.rdf", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.Title != "Meerkat" { t.Errorf("Incorrect title, got: %s", feed.Title) } if feed.FeedURL != "http://meerkat.oreillynet.com/feed.rdf" { t.Errorf("Incorrect feed URL, got: %s", feed.FeedURL) } if feed.SiteURL != "http://meerkat.oreillynet.com" { t.Errorf("Incorrect site URL, got: %s", feed.SiteURL) } if len(feed.Entries) != 1 { t.Errorf("Incorrect number of entries, got: %d", len(feed.Entries)) } if feed.Entries[0].Hash != "fa4ef7c300b175ca66f92f226b5dba5caa2a9619f031101bf56e5b884b02cd97" { t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) } if feed.Entries[0].URL != "http://c.moreover.com/click/here.pl?r123" { t.Errorf("Incorrect entry URL, got: %s", feed.Entries[0].URL) } if feed.Entries[0].Title != "XML: A Disruptive Technology" { t.Errorf("Incorrect entry title, got: %s", feed.Entries[0].Title) } if strings.HasSuffix(feed.Entries[0].Content, "XML is placing increasingly") { t.Errorf("Incorrect entry content, got: %s", feed.Entries[0].Content) } if feed.Entries[0].Author != "Simon St.Laurent (mailto:simonstl@simonstl.com)" { t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) } } func TestParseRDFFeedWithEmptyTitle(t *testing.T) { data := ` http://example.org/item Example http://example.org/item Test ` feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.Title != "http://example.org/feed" { t.Errorf(`Incorrect title, got: %q`, feed.Title) } } func TestParseRDFFeedWithEmptyLink(t *testing.T) { data := ` Example Feed Example http://example.org/item Test ` feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.SiteURL != "http://example.org/feed" { t.Errorf(`Incorrect SiteURL, got: %q`, feed.SiteURL) } if feed.FeedURL != "http://example.org/feed" { t.Errorf(`Incorrect FeedURL, got: %q`, feed.FeedURL) } } func TestParseRDFFeedWithRelativeLink(t *testing.T) { data := ` Example Feed /test/index.html Example http://example.org/item Test ` feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.SiteURL != "http://example.org/test/index.html" { t.Errorf(`Incorrect SiteURL, got: %q`, feed.SiteURL) } if feed.FeedURL != "http://example.org/feed" { t.Errorf(`Incorrect FeedURL, got: %q`, feed.FeedURL) } } func TestParseRDFFeedSiteURLWithTrailingSpace(t *testing.T) { data := ` Example Feed http://example.org/test/index.html Example http://example.org/item Test ` feed, err := Parse("http://example.org/feed", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.SiteURL != "http://example.org/test/index.html" { t.Errorf(`Incorrect SiteURL, got: %q`, feed.SiteURL) } if feed.FeedURL != "http://example.org/feed" { t.Errorf(`Incorrect FeedURL, got: %q`, feed.FeedURL) } } func TestParseItemWithoutLink(t *testing.T) { data := ` Meerkat http://meerkat.oreillynet.com Title Test ` feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.Entries[0].Hash != "37f5223ebd58639aa62a49afbb61df960efb7dc5db5181dfb3cedd9a49ad34c6" { t.Errorf("Incorrect entry hash, got: %s", feed.Entries[0].Hash) } if feed.Entries[0].URL != "http://meerkat.oreillynet.com" { t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL) } } func TestParseItemRelativeURL(t *testing.T) { data := ` Example http://example.org Title Test something.html ` feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.Entries[0].URL != "http://example.org/something.html" { t.Errorf("Incorrect entry url, got: %s", feed.Entries[0].URL) } } func TestParseFeedWithURLWrappedInSpaces(t *testing.T) { data := ` bioRxiv Subject Collection: Bioengineering http://biorxiv.org This feed contains articles for bioRxiv Subject Collection "Bioengineering" bioRxiv bioRxiv http://biorxiv.org <![CDATA[ Microscale Collagen and Fibroblast Interactions Enhance Primary Human Hepatocyte Functions in 3-Dimensional Models ]]> http://biorxiv.org/cgi/content/short/857789v1?rss=1 2019-11-29 doi:10.1101/857789 Cold Spring Harbor Laboratory 2019-11-29 ` feed, err := Parse("http://biorxiv.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.SiteURL != "http://biorxiv.org" { t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL) } if len(feed.Entries) != 1 { t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) } if feed.Entries[0].URL != `http://biorxiv.org/cgi/content/short/857789v1?rss=1` { t.Errorf(`Unexpected entry URL, got %q`, feed.Entries[0].URL) } } func TestParseRDFItemWitEmptyTitleElement(t *testing.T) { data := ` Example Feed http://example.org/ http://example.org/item Test ` feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if len(feed.Entries) != 1 { t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) } expected := `http://example.org/item` result := feed.Entries[0].Title if result != expected { t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected) } } func TestParseRDFItemWithDublinCoreTitleElement(t *testing.T) { data := ` Example Feed http://example.org/ Dublin Core Title http://example.org/ Test ` feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if len(feed.Entries) != 1 { t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) } expected := `Dublin Core Title` result := feed.Entries[0].Title if result != expected { t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected) } } func TestParseRDFItemWithDuplicateTitleElement(t *testing.T) { data := ` Example Feed http://example.org/ Item Title http://example.org/ Test ` feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if len(feed.Entries) != 1 { t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) } expected := `Item Title` result := feed.Entries[0].Title if result != expected { t.Errorf(`Unexpected entry title, got %q instead of %q`, result, expected) } } func TestParseItemWithEncodedHTMLTitle(t *testing.T) { data := ` Example http://example.org AT&amp;T Test http://example.org/test.html ` feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.Entries[0].Title != `AT&T` { t.Errorf("Incorrect entry title, got: %q", feed.Entries[0].Title) } } func TestParseRDFWithContentEncoded(t *testing.T) { data := ` Example Feed http://example.org/ Item Title http://example.org/ Test

]]>
` feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if len(feed.Entries) != 1 { t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) } expected := `

Test

` result := feed.Entries[0].Content if result != expected { t.Errorf(`Unexpected entry content, got %q instead of %q`, result, expected) } } func TestParseRDFWithEncodedHTMLDescription(t *testing.T) { data := ` Example Feed http://example.org/ Item Title http://example.org/ AT&amp;T <img src="https://example.org/img.png"></a> ` feed, err := Parse("http://example.org/", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if len(feed.Entries) != 1 { t.Fatalf(`Unexpected number of entries, got %d`, len(feed.Entries)) } expected := `AT&T ` result := feed.Entries[0].Content if result != expected { t.Errorf(`Unexpected entry content, got %v instead of %v`, result, expected) } } func TestParseItemWithoutDate(t *testing.T) { data := ` Example http://example.org Title Test http://example.org/test.html ` feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } expectedDate := time.Now().In(time.Local) diff := expectedDate.Sub(feed.Entries[0].Date) if diff > time.Second { t.Errorf("Incorrect entry date, got: %v", diff) } } func TestParseItemWithDublicCoreDate(t *testing.T) { data := ` Example http://example.org Title Test http://example.org/test.html Tester 2018-04-10T05:00:00+00:00 ` feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } expectedDate := time.Date(2018, time.April, 10, 5, 0, 0, 0, time.UTC) if !feed.Entries[0].Date.Equal(expectedDate) { t.Errorf("Incorrect entry date, got: %v, want: %v", feed.Entries[0].Date, expectedDate) } } func TestParseItemWithInvalidDublicCoreDate(t *testing.T) { data := ` Example http://example.org Title Test http://example.org/test.html Tester 20-04-10T05:00:00+00:00 ` feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } expectedDate := time.Now().In(time.Local) diff := expectedDate.Sub(feed.Entries[0].Date) if diff > time.Second { t.Errorf("Incorrect entry date, got: %v", diff) } } func TestParseItemWithEncodedHTMLInDCCreatorField(t *testing.T) { data := ` Example http://example.org Title Test http://example.org/test.html <a href="http://example.org/author1">Author 1</a> (University 1), <a href="http://example.org/author2">Author 2</a> (University 2) 2018-04-10T05:00:00+00:00 ` feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } expectedAuthor := "Author 1 (University 1), Author 2 (University 2)" if feed.Entries[0].Author != expectedAuthor { t.Errorf("Incorrect entry author, got: %s, want: %s", feed.Entries[0].Author, expectedAuthor) } } func TestParseItemWithOnlyFeedAuthor(t *testing.T) { data := ` Meerkat http://meerkat.oreillynet.com Rael Dornfest (mailto:rael@oreilly.com) XML: A Disruptive Technology http://c.moreover.com/click/here.pl?r123 XML is placing increasingly heavy loads on the existing technical infrastructure of the Internet. ` feed, err := Parse("http://meerkat.oreillynet.com", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.Entries[0].Author != "Rael Dornfest (mailto:rael@oreilly.com)" { t.Errorf("Incorrect entry author, got: %s", feed.Entries[0].Author) } } func TestParseInvalidXml(t *testing.T) { data := `garbage` _, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err == nil { t.Fatal("Parse should returns an error") } } func TestParseFeedWithHTMLEntity(t *testing.T) { data := ` Example   Feed http://example.org ` feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.Title != "Example \u00a0 Feed" { t.Errorf(`Incorrect title, got: %q`, feed.Title) } } func TestParseFeedWithInvalidCharacterEntity(t *testing.T) { data := ` Example Feed http://example.org/a&b ` feed, err := Parse("http://example.org", bytes.NewReader([]byte(data))) if err != nil { t.Fatal(err) } if feed.SiteURL != "http://example.org/a&b" { t.Errorf(`Incorrect URL, got: %q`, feed.SiteURL) } }