From 3060946cc142130fa3dfb9027a7c3d529fe4e0b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kier=C3=A1n=20Meinhardt?= Date: Sat, 18 Mar 2023 11:13:58 +0100 Subject: [PATCH] Use Odysee video duration as read time This feature works by scraping the Odysee website. To enable it, set the FETCH_ODYSEE_WATCH_TIME environment variable to 1. --- config/config_test.go | 18 ++++++++++++++ config/options.go | 10 ++++++++ config/parser.go | 2 ++ miniflux.1 | 6 +++++ reader/processor/processor.go | 46 +++++++++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+) diff --git a/config/config_test.go b/config/config_test.go index f492f4f5..2e1ae401 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -1598,6 +1598,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) { } } +func TestFetchOdyseeWatchTime(t *testing.T) { + os.Clearenv() + os.Setenv("FETCH_ODYSEE_WATCH_TIME", "1") + + parser := NewParser() + opts, err := parser.ParseEnvironmentVariables() + if err != nil { + t.Fatalf(`Parsing failure: %v`, err) + } + + expected := true + result := opts.FetchOdyseeWatchTime() + + if result != expected { + t.Fatalf(`Unexpected FETCH_ODYSEE_WATCH_TIME value, got %v instead of %v`, result, expected) + } +} + func TestFetchYouTubeWatchTime(t *testing.T) { os.Clearenv() os.Setenv("FETCH_YOUTUBE_WATCH_TIME", "1") diff --git a/config/options.go b/config/options.go index 52a6ef10..7147a754 100644 --- a/config/options.go +++ b/config/options.go @@ -49,6 +49,7 @@ const ( defaultProxyOption = "http-only" defaultProxyMediaTypes = "image" defaultProxyUrl = "" + defaultFetchOdyseeWatchTime = false defaultFetchYouTubeWatchTime = false defaultYouTubeEmbedUrlOverride = "https://www.youtube-nocookie.com/embed/" defaultCreateAdmin = false @@ -126,6 +127,7 @@ type Options struct { proxyOption string proxyMediaTypes []string proxyUrl string + fetchOdyseeWatchTime bool fetchYouTubeWatchTime bool youTubeEmbedUrlOverride string oauth2UserCreationAllowed bool @@ -196,6 +198,7 @@ func NewOptions() *Options { proxyOption: defaultProxyOption, proxyMediaTypes: []string{defaultProxyMediaTypes}, proxyUrl: defaultProxyUrl, + fetchOdyseeWatchTime: defaultFetchOdyseeWatchTime, fetchYouTubeWatchTime: defaultFetchYouTubeWatchTime, youTubeEmbedUrlOverride: defaultYouTubeEmbedUrlOverride, oauth2UserCreationAllowed: defaultOAuth2UserCreation, @@ -436,6 +439,12 @@ func (o *Options) YouTubeEmbedUrlOverride() string { return o.youTubeEmbedUrlOverride } +// FetchOdyseeWatchTime returns true if the Odysee video duration +// should be fetched and used as a reading time. +func (o *Options) FetchOdyseeWatchTime() bool { + return o.fetchOdyseeWatchTime +} + // ProxyOption returns "none" to never proxy, "http-only" to proxy non-HTTPS, "all" to always proxy. func (o *Options) ProxyOption() string { return o.proxyOption @@ -581,6 +590,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option { "DISABLE_HTTP_SERVICE": !o.httpService, "DISABLE_SCHEDULER_SERVICE": !o.schedulerService, "FETCH_YOUTUBE_WATCH_TIME": o.fetchYouTubeWatchTime, + "FETCH_ODYSEE_WATCH_TIME": o.fetchOdyseeWatchTime, "HTTPS": o.HTTPS, "HTTP_CLIENT_MAX_BODY_SIZE": o.httpClientMaxBodySize, "HTTP_CLIENT_PROXY": o.httpClientProxy, diff --git a/config/parser.go b/config/parser.go index a76e02c0..41d81d3c 100644 --- a/config/parser.go +++ b/config/parser.go @@ -213,6 +213,8 @@ func (p *Parser) parseLines(lines []string) (err error) { p.opts.metricsPassword = parseString(value, defaultMetricsPassword) case "METRICS_PASSWORD_FILE": p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword) + case "FETCH_ODYSEE_WATCH_TIME": + p.opts.fetchOdyseeWatchTime = parseBool(value, defaultFetchOdyseeWatchTime) case "FETCH_YOUTUBE_WATCH_TIME": p.opts.fetchYouTubeWatchTime = parseBool(value, defaultFetchYouTubeWatchTime) case "YOUTUBE_EMBED_URL_OVERRIDE": diff --git a/miniflux.1 b/miniflux.1 index b5c2837e..1d66a8c4 100644 --- a/miniflux.1 +++ b/miniflux.1 @@ -118,6 +118,12 @@ Set the value to 1 to enable debug logs\&. .br Disabled by default\&. .TP +.B FETCH_ODYSEE_WATCH_TIME +Set the value to 1 to scrape video duration from Odysee website and +use it as a reading time\&. +.br +Disabled by default\&. +.TP .B FETCH_YOUTUBE_WATCH_TIME Set the value to 1 to scrape video duration from YouTube website and use it as a reading time\&. diff --git a/reader/processor/processor.go b/reader/processor/processor.go index 03765e76..e720e1cf 100644 --- a/reader/processor/processor.go +++ b/reader/processor/processor.go @@ -32,6 +32,7 @@ import ( var ( youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`) + odyseeRegex = regexp.MustCompile(`^https://odysee\.com`) iso8601Regex = regexp.MustCompile(`^P((?P\d+)Y)?((?P\d+)M)?((?P\d+)W)?((?P\d+)D)?(T((?P\d+)H)?((?P\d+)M)?((?P\d+)S)?)?$`) customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`) ) @@ -207,6 +208,17 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod } } + if shouldFetchOdyseeWatchTime(entry) { + if entryIsNew { + watchTime, err := fetchOdyseeWatchTime(entry.URL) + if err != nil { + logger.Error("[Processor] Unable to fetch Odysee watch time: %q => %v", entry.URL, err) + } + entry.ReadingTime = watchTime + } else { + entry.ReadingTime = store.GetReadTime(entry, feed) + } + } // Handle YT error case and non-YT entries. if entry.ReadingTime == 0 { entry.ReadingTime = calculateReadingTime(entry.Content, user) @@ -222,6 +234,14 @@ func shouldFetchYouTubeWatchTime(entry *model.Entry) bool { return urlMatchesYouTubePattern } +func shouldFetchOdyseeWatchTime(entry *model.Entry) bool { + if !config.Opts.FetchOdyseeWatchTime() { + return false + } + matches := odyseeRegex.FindStringSubmatch(entry.URL) + return matches != nil +} + func fetchYouTubeWatchTime(url string) (int, error) { clt := client.NewClientWithConfig(url, config.Opts) response, browserErr := browser.Exec(clt) @@ -247,6 +267,32 @@ func fetchYouTubeWatchTime(url string) (int, error) { return int(dur.Minutes()), nil } +func fetchOdyseeWatchTime(url string) (int, error) { + clt := client.NewClientWithConfig(url, config.Opts) + response, browserErr := browser.Exec(clt) + if browserErr != nil { + return 0, browserErr + } + + doc, docErr := goquery.NewDocumentFromReader(response.Body) + if docErr != nil { + return 0, docErr + } + + durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content") + // durs contains video watch time in seconds + if !exists { + return 0, errors.New("duration has not found") + } + + dur, err := strconv.ParseInt(durs, 10, 64) + if err != nil { + return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err) + } + + return int(dur / 60), nil +} + // parseISO8601 parses an ISO 8601 duration string. func parseISO8601(from string) (time.Duration, error) { var match []string