diff --git a/api/feed.go b/api/feed.go index e303f580..17bb73dc 100644 --- a/api/feed.go +++ b/api/feed.go @@ -47,6 +47,7 @@ func (c *Controller) CreateFeed(w http.ResponseWriter, r *http.Request) { feedInfo.CategoryID, feedInfo.FeedURL, feedInfo.Crawler, + feedInfo.UserAgent, feedInfo.Username, feedInfo.Password, ) diff --git a/api/payload.go b/api/payload.go index ee1e41b0..5acf0bb4 100644 --- a/api/payload.go +++ b/api/payload.go @@ -26,15 +26,17 @@ type entriesResponse struct { type feedCreation struct { FeedURL string `json:"feed_url"` CategoryID int64 `json:"category_id"` + UserAgent string `json:"user_agent"` Username string `json:"username"` Password string `json:"password"` Crawler bool `json:"crawler"` } type subscriptionDiscovery struct { - URL string `json:"url"` - Username string `json:"username"` - Password string `json:"password"` + URL string `json:"url"` + UserAgent string `json:"user_agent"` + Username string `json:"username"` + Password string `json:"password"` } type feedModification struct { @@ -44,6 +46,7 @@ type feedModification struct { ScraperRules *string `json:"scraper_rules"` RewriteRules *string `json:"rewrite_rules"` Crawler *bool `json:"crawler"` + UserAgent *string `json:"user_agent"` Username *string `json:"username"` Password *string `json:"password"` CategoryID *int64 `json:"category_id"` @@ -74,6 +77,10 @@ func (f *feedModification) Update(feed *model.Feed) { feed.Crawler = *f.Crawler } + if f.UserAgent != nil { + feed.UserAgent = *f.UserAgent + } + if f.Username != nil { feed.Username = *f.Username } diff --git a/api/subscription.go b/api/subscription.go index 603b9321..ff4c7cf8 100644 --- a/api/subscription.go +++ b/api/subscription.go @@ -22,6 +22,7 @@ func (c *Controller) GetSubscriptions(w http.ResponseWriter, r *http.Request) { subscriptions, err := subscription.FindSubscriptions( subscriptionInfo.URL, + subscriptionInfo.UserAgent, subscriptionInfo.Username, subscriptionInfo.Password, ) diff --git a/client/core.go b/client/core.go index 437af511..53d1e13f 100644 --- a/client/core.go +++ b/client/core.go @@ -91,6 +91,7 @@ type Feed struct { ScraperRules string `json:"scraper_rules"` RewriteRules string `json:"rewrite_rules"` Crawler bool `json:"crawler"` + UserAgent string `json:"user_agent"` Username string `json:"username"` Password string `json:"password"` Category *Category `json:"category,omitempty"` @@ -105,6 +106,7 @@ type FeedModification struct { ScraperRules *string `json:"scraper_rules"` RewriteRules *string `json:"rewrite_rules"` Crawler *bool `json:"crawler"` + UserAgent *string `json:"user_agent"` Username *string `json:"username"` Password *string `json:"password"` CategoryID *int64 `json:"category_id"` diff --git a/database/migration.go b/database/migration.go index 31797059..39d15b78 100644 --- a/database/migration.go +++ b/database/migration.go @@ -12,7 +12,7 @@ import ( "miniflux.app/logger" ) -const schemaVersion = 20 +const schemaVersion = 21 // Migrate executes database migrations. func Migrate(db *sql.DB) { diff --git a/database/sql.go b/database/sql.go index 34fecd87..be033fb0 100644 --- a/database/sql.go +++ b/database/sql.go @@ -144,6 +144,7 @@ create index users_extra_idx on users using gin(extra); "schema_version_20": `alter table entries add column document_vectors tsvector; update entries set document_vectors = to_tsvector(title || ' ' || coalesce(content, '')); create index document_vectors_idx on entries using gin(document_vectors);`, + "schema_version_21": `alter table feeds add column user_agent text default '';`, "schema_version_3": `create table tokens ( id text not null, value text not null, @@ -192,6 +193,7 @@ var SqlMapChecksums = map[string]string{ "schema_version_19": "a83f77b41cc213d282805a5b518f15abbf96331599119f0ef4aca4be037add7b", "schema_version_2": "e8e9ff32478df04fcddad10a34cba2e8bb1e67e7977b5bd6cdc4c31ec94282b4", "schema_version_20": "6c4e9b2c5bccdc3243c239c390fb1caa5e15624e669b2c07e14c126f6d2e2cd6", + "schema_version_21": "77da01ee38918ff4fe33985fbb20ed3276a717a7584c2ca9ebcf4d4ab6cb6910", "schema_version_3": "a54745dbc1c51c000f74d4e5068f1e2f43e83309f023415b1749a47d5c1e0f12", "schema_version_4": "216ea3a7d3e1704e40c797b5dc47456517c27dbb6ca98bf88812f4f63d74b5d9", "schema_version_5": "46397e2f5f2c82116786127e9f6a403e975b14d2ca7b652a48cd1ba843e6a27c", diff --git a/database/sql/schema_version_21.sql b/database/sql/schema_version_21.sql new file mode 100644 index 00000000..2c96d764 --- /dev/null +++ b/database/sql/schema_version_21.sql @@ -0,0 +1 @@ +alter table feeds add column user_agent text default ''; \ No newline at end of file diff --git a/http/client/client.go b/http/client/client.go index 201b136c..2dce15c3 100644 --- a/http/client/client.go +++ b/http/client/client.go @@ -33,6 +33,9 @@ const ( ) var ( + // DefaultUserAgent sets the User-Agent header used for any requests by miniflux. + DefaultUserAgent = "Mozilla/5.0 (compatible; Miniflux/" + version.Version + "; +https://miniflux.app)" + errInvalidCertificate = "Invalid SSL certificate (original error: %q)" errTemporaryNetworkOperation = "This website is temporarily unreachable (original error: %q)" errPermanentNetworkOperation = "This website is permanently unreachable (original error: %q)" @@ -47,6 +50,7 @@ type Client struct { authorizationHeader string username string password string + userAgent string Insecure bool } @@ -72,6 +76,14 @@ func (c *Client) WithCacheHeaders(etagHeader, lastModifiedHeader string) *Client return c } +// WithUserAgent defines the User-Agent header to use for outgoing requests. +func (c *Client) WithUserAgent(userAgent string) *Client { + if userAgent != "" { + c.userAgent = userAgent + } + return c +} + // Get execute a GET HTTP request. func (c *Client) Get() (*Response, error) { request, err := c.buildRequest(http.MethodGet, nil) @@ -212,7 +224,7 @@ func (c *Client) buildClient() http.Client { func (c *Client) buildHeaders() http.Header { headers := make(http.Header) - headers.Add("User-Agent", "Mozilla/5.0 (compatible; Miniflux/"+version.Version+"; +https://miniflux.app)") + headers.Add("User-Agent", c.userAgent) headers.Add("Accept", "*/*") if c.etagHeader != "" { @@ -233,5 +245,5 @@ func (c *Client) buildHeaders() http.Header { // New returns a new HTTP client. func New(url string) *Client { - return &Client{url: url, Insecure: false} + return &Client{url: url, userAgent: DefaultUserAgent, Insecure: false} } diff --git a/model/feed.go b/model/feed.go index d6ce0d16..0ace098f 100644 --- a/model/feed.go +++ b/model/feed.go @@ -24,6 +24,7 @@ type Feed struct { ScraperRules string `json:"scraper_rules"` RewriteRules string `json:"rewrite_rules"` Crawler bool `json:"crawler"` + UserAgent string `json:"user_agent"` Username string `json:"username"` Password string `json:"password"` Category *Category `json:"category,omitempty"` diff --git a/reader/feed/handler.go b/reader/feed/handler.go index fa09cb65..252d178c 100644 --- a/reader/feed/handler.go +++ b/reader/feed/handler.go @@ -37,7 +37,7 @@ type Handler struct { } // CreateFeed fetch, parse and store a new feed. -func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, username, password string) (*model.Feed, error) { +func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, userAgent, username, password string) (*model.Feed, error) { defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Handler:CreateFeed] feedUrl=%s", url)) if !h.store.CategoryExists(userID, categoryID) { @@ -46,6 +46,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, clt := client.New(url) clt.WithCredentials(username, password) + clt.WithUserAgent(userAgent) response, err := clt.Get() if err != nil { if _, ok := err.(*errors.LocalizedError); ok { @@ -87,6 +88,7 @@ func (h *Handler) CreateFeed(userID, categoryID int64, url string, crawler bool, subscription.FeedURL = response.EffectiveURL subscription.UserID = userID subscription.Crawler = crawler + subscription.UserAgent = userAgent subscription.Username = username subscription.Password = password @@ -136,6 +138,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { clt := client.New(originalFeed.FeedURL) clt.WithCredentials(originalFeed.Username, originalFeed.Password) clt.WithCacheHeaders(originalFeed.EtagHeader, originalFeed.LastModifiedHeader) + clt.WithUserAgent(originalFeed.UserAgent) response, err := clt.Get() if err != nil { var customErr errors.LocalizedError @@ -196,6 +199,7 @@ func (h *Handler) RefreshFeed(userID, feedID int64) error { feedProcessor := processor.NewFeedProcessor(userID, h.store, subscription) feedProcessor.WithScraperRules(originalFeed.ScraperRules) + feedProcessor.WithUserAgent(originalFeed.UserAgent) feedProcessor.WithRewriteRules(originalFeed.RewriteRules) feedProcessor.WithCrawler(originalFeed.Crawler) feedProcessor.Process() diff --git a/reader/processor/processor.go b/reader/processor/processor.go index 002f7e88..f57e6cd7 100644 --- a/reader/processor/processor.go +++ b/reader/processor/processor.go @@ -21,6 +21,7 @@ type FeedProcessor struct { scraperRules string rewriteRules string crawler bool + userAgent string } // WithCrawler enables the crawler. @@ -33,6 +34,11 @@ func (f *FeedProcessor) WithScraperRules(rules string) { f.scraperRules = rules } +// WithUserAgent sets the User-Agent header for fetching article content. +func (f *FeedProcessor) WithUserAgent(userAgent string) { + f.userAgent = userAgent +} + // WithRewriteRules adds rewrite rules to the processing. func (f *FeedProcessor) WithRewriteRules(rules string) { f.rewriteRules = rules @@ -45,7 +51,7 @@ func (f *FeedProcessor) Process() { if f.store.EntryURLExists(f.userID, entry.URL) { logger.Debug(`[FeedProcessor] Do not crawl existing entry URL: "%s"`, entry.URL) } else { - content, err := scraper.Fetch(entry.URL, f.scraperRules) + content, err := scraper.Fetch(entry.URL, f.scraperRules, f.userAgent) if err != nil { logger.Error("[FeedProcessor] %v", err) } else { diff --git a/reader/scraper/scraper.go b/reader/scraper/scraper.go index d2cccdbb..7aa70841 100644 --- a/reader/scraper/scraper.go +++ b/reader/scraper/scraper.go @@ -19,8 +19,12 @@ import ( ) // Fetch downloads a web page a returns relevant contents. -func Fetch(websiteURL, rules string) (string, error) { +func Fetch(websiteURL, rules, userAgent string) (string, error) { clt := client.New(websiteURL) + if userAgent != "" { + clt.WithUserAgent(userAgent) + } + response, err := clt.Get() if err != nil { return "", err diff --git a/reader/subscription/finder.go b/reader/subscription/finder.go index 8be6f73e..027e8106 100644 --- a/reader/subscription/finder.go +++ b/reader/subscription/finder.go @@ -29,11 +29,12 @@ var ( ) // FindSubscriptions downloads and try to find one or more subscriptions from an URL. -func FindSubscriptions(websiteURL, username, password string) (Subscriptions, error) { +func FindSubscriptions(websiteURL, userAgent, username, password string) (Subscriptions, error) { defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[FindSubscriptions] url=%s", websiteURL)) clt := client.New(websiteURL) clt.WithCredentials(username, password) + clt.WithUserAgent(userAgent) response, err := clt.Get() if err != nil { if _, ok := err.(errors.LocalizedError); ok { diff --git a/storage/entry_query_builder.go b/storage/entry_query_builder.go index 18cf7350..192f5157 100644 --- a/storage/entry_query_builder.go +++ b/storage/entry_query_builder.go @@ -192,7 +192,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) { e.id, e.user_id, e.feed_id, e.hash, e.published_at at time zone u.timezone, e.title, e.url, e.comments_url, e.author, e.content, e.status, e.starred, f.title as feed_title, f.feed_url, f.site_url, f.checked_at, - f.category_id, c.title as category_title, f.scraper_rules, f.rewrite_rules, f.crawler, + f.category_id, c.title as category_title, f.scraper_rules, f.rewrite_rules, f.crawler, f.user_agent, fi.icon_id, u.timezone FROM entries e @@ -247,6 +247,7 @@ func (e *EntryQueryBuilder) GetEntries() (model.Entries, error) { &entry.Feed.ScraperRules, &entry.Feed.RewriteRules, &entry.Feed.Crawler, + &entry.Feed.UserAgent, &iconID, &tz, ) diff --git a/storage/feed.go b/storage/feed.go index be312ac1..c60c11c3 100644 --- a/storage/feed.go +++ b/storage/feed.go @@ -66,7 +66,7 @@ func (s *Storage) Feeds(userID int64) (model.Feeds, error) { f.id, f.feed_url, f.site_url, f.title, f.etag_header, f.last_modified_header, f.user_id, f.checked_at at time zone u.timezone, f.parsing_error_count, f.parsing_error_msg, - f.scraper_rules, f.rewrite_rules, f.crawler, + f.scraper_rules, f.rewrite_rules, f.crawler, f.user_agent, f.username, f.password, f.category_id, c.title as category_title, fi.icon_id, @@ -104,6 +104,7 @@ func (s *Storage) Feeds(userID int64) (model.Feeds, error) { &feed.ScraperRules, &feed.RewriteRules, &feed.Crawler, + &feed.UserAgent, &feed.Username, &feed.Password, &feed.Category.ID, @@ -141,7 +142,7 @@ func (s *Storage) FeedByID(userID, feedID int64) (*model.Feed, error) { f.id, f.feed_url, f.site_url, f.title, f.etag_header, f.last_modified_header, f.user_id, f.checked_at at time zone u.timezone, f.parsing_error_count, f.parsing_error_msg, - f.scraper_rules, f.rewrite_rules, f.crawler, + f.scraper_rules, f.rewrite_rules, f.crawler, f.user_agent, f.username, f.password, f.category_id, c.title as category_title, fi.icon_id, @@ -166,6 +167,7 @@ func (s *Storage) FeedByID(userID, feedID int64) (*model.Feed, error) { &feed.ScraperRules, &feed.RewriteRules, &feed.Crawler, + &feed.UserAgent, &feed.Username, &feed.Password, &feed.Category.ID, @@ -194,8 +196,8 @@ func (s *Storage) CreateFeed(feed *model.Feed) error { defer timer.ExecutionTime(time.Now(), fmt.Sprintf("[Storage:CreateFeed] feedURL=%s", feed.FeedURL)) sql := ` INSERT INTO feeds - (feed_url, site_url, title, category_id, user_id, etag_header, last_modified_header, crawler, username, password) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) + (feed_url, site_url, title, category_id, user_id, etag_header, last_modified_header, crawler, user_agent, username, password) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) RETURNING id ` @@ -209,6 +211,7 @@ func (s *Storage) CreateFeed(feed *model.Feed) error { feed.EtagHeader, feed.LastModifiedHeader, feed.Crawler, + feed.UserAgent, feed.Username, feed.Password, ).Scan(&feed.ID) @@ -234,9 +237,9 @@ func (s *Storage) UpdateFeed(feed *model.Feed) (err error) { query := `UPDATE feeds SET feed_url=$1, site_url=$2, title=$3, category_id=$4, etag_header=$5, last_modified_header=$6, checked_at=$7, - parsing_error_msg=$8, parsing_error_count=$9, scraper_rules=$10, rewrite_rules=$11, crawler=$12, - username=$13, password=$14 - WHERE id=$15 AND user_id=$16` + parsing_error_msg=$8, parsing_error_count=$9, scraper_rules=$10, rewrite_rules=$11, crawler=$12, user_agent=$13, + username=$14, password=$15 + WHERE id=$16 AND user_id=$17` _, err = s.db.Exec(query, feed.FeedURL, @@ -251,6 +254,7 @@ func (s *Storage) UpdateFeed(feed *model.Feed) (err error) { feed.ScraperRules, feed.RewriteRules, feed.Crawler, + feed.UserAgent, feed.Username, feed.Password, feed.ID, diff --git a/template/html/add_subscription.html b/template/html/add_subscription.html index 5b465496..7ccbc20f 100644 --- a/template/html/add_subscription.html +++ b/template/html/add_subscription.html @@ -41,6 +41,9 @@ + + + diff --git a/template/html/choose_subscription.html b/template/html/choose_subscription.html index ad1c38ad..2b053a15 100644 --- a/template/html/choose_subscription.html +++ b/template/html/choose_subscription.html @@ -19,6 +19,7 @@