Add scraper rules for version2.dk and ing.dk

This commit is contained in:
Frédéric Guillot 2017-12-27 19:44:23 -08:00
parent d4839b5597
commit c454f67037
2 changed files with 3 additions and 1 deletions

View File

@ -129,7 +129,7 @@ func (c *Client) buildClient() http.Client {
func (c *Client) buildHeaders() http.Header {
headers := make(http.Header)
headers.Add("User-Agent", userAgent)
headers.Add("Accept", "text/html,application/xhtml+xml,application/xml,application/json,image/*")
headers.Add("Accept", "*/*")
if c.etagHeader != "" {
headers.Add("If-None-Match", c.etagHeader)

View File

@ -10,6 +10,7 @@ var predefinedRules = map[string]string{
"cbc.ca": ".story-content",
"github.com": "article.entry-content",
"igen.fr": "section.corps",
"ing.dk": "section.body",
"lapresse.ca": ".amorce, .entry",
"lemonde.fr": "div#articleBody",
"lesjoiesducode.fr": ".blog-post-content img",
@ -20,5 +21,6 @@ var predefinedRules = map[string]string{
"phoronix.com": "div.content",
"techcrunch.com": "div.article-entry",
"theregister.co.uk": "#body",
"version2.dk": "section.body",
"wired.com": "main figure, article",
}