Use Odysee video duration as read time

This feature works by scraping the Odysee website.

To enable it, set the FETCH_ODYSEE_WATCH_TIME environment variable to
1.
This commit is contained in:
Kierán Meinhardt 2023-03-18 11:13:58 +01:00 committed by Frédéric Guillot
parent 859b4466ab
commit 3060946cc1
5 changed files with 82 additions and 0 deletions

View File

@ -1598,6 +1598,24 @@ func TestAuthProxyUserCreationAdmin(t *testing.T) {
}
}
func TestFetchOdyseeWatchTime(t *testing.T) {
os.Clearenv()
os.Setenv("FETCH_ODYSEE_WATCH_TIME", "1")
parser := NewParser()
opts, err := parser.ParseEnvironmentVariables()
if err != nil {
t.Fatalf(`Parsing failure: %v`, err)
}
expected := true
result := opts.FetchOdyseeWatchTime()
if result != expected {
t.Fatalf(`Unexpected FETCH_ODYSEE_WATCH_TIME value, got %v instead of %v`, result, expected)
}
}
func TestFetchYouTubeWatchTime(t *testing.T) {
os.Clearenv()
os.Setenv("FETCH_YOUTUBE_WATCH_TIME", "1")

View File

@ -49,6 +49,7 @@ const (
defaultProxyOption = "http-only"
defaultProxyMediaTypes = "image"
defaultProxyUrl = ""
defaultFetchOdyseeWatchTime = false
defaultFetchYouTubeWatchTime = false
defaultYouTubeEmbedUrlOverride = "https://www.youtube-nocookie.com/embed/"
defaultCreateAdmin = false
@ -126,6 +127,7 @@ type Options struct {
proxyOption string
proxyMediaTypes []string
proxyUrl string
fetchOdyseeWatchTime bool
fetchYouTubeWatchTime bool
youTubeEmbedUrlOverride string
oauth2UserCreationAllowed bool
@ -196,6 +198,7 @@ func NewOptions() *Options {
proxyOption: defaultProxyOption,
proxyMediaTypes: []string{defaultProxyMediaTypes},
proxyUrl: defaultProxyUrl,
fetchOdyseeWatchTime: defaultFetchOdyseeWatchTime,
fetchYouTubeWatchTime: defaultFetchYouTubeWatchTime,
youTubeEmbedUrlOverride: defaultYouTubeEmbedUrlOverride,
oauth2UserCreationAllowed: defaultOAuth2UserCreation,
@ -436,6 +439,12 @@ func (o *Options) YouTubeEmbedUrlOverride() string {
return o.youTubeEmbedUrlOverride
}
// FetchOdyseeWatchTime returns true if the Odysee video duration
// should be fetched and used as a reading time.
func (o *Options) FetchOdyseeWatchTime() bool {
return o.fetchOdyseeWatchTime
}
// ProxyOption returns "none" to never proxy, "http-only" to proxy non-HTTPS, "all" to always proxy.
func (o *Options) ProxyOption() string {
return o.proxyOption
@ -581,6 +590,7 @@ func (o *Options) SortedOptions(redactSecret bool) []*Option {
"DISABLE_HTTP_SERVICE": !o.httpService,
"DISABLE_SCHEDULER_SERVICE": !o.schedulerService,
"FETCH_YOUTUBE_WATCH_TIME": o.fetchYouTubeWatchTime,
"FETCH_ODYSEE_WATCH_TIME": o.fetchOdyseeWatchTime,
"HTTPS": o.HTTPS,
"HTTP_CLIENT_MAX_BODY_SIZE": o.httpClientMaxBodySize,
"HTTP_CLIENT_PROXY": o.httpClientProxy,

View File

@ -213,6 +213,8 @@ func (p *Parser) parseLines(lines []string) (err error) {
p.opts.metricsPassword = parseString(value, defaultMetricsPassword)
case "METRICS_PASSWORD_FILE":
p.opts.metricsPassword = readSecretFile(value, defaultMetricsPassword)
case "FETCH_ODYSEE_WATCH_TIME":
p.opts.fetchOdyseeWatchTime = parseBool(value, defaultFetchOdyseeWatchTime)
case "FETCH_YOUTUBE_WATCH_TIME":
p.opts.fetchYouTubeWatchTime = parseBool(value, defaultFetchYouTubeWatchTime)
case "YOUTUBE_EMBED_URL_OVERRIDE":

View File

@ -118,6 +118,12 @@ Set the value to 1 to enable debug logs\&.
.br
Disabled by default\&.
.TP
.B FETCH_ODYSEE_WATCH_TIME
Set the value to 1 to scrape video duration from Odysee website and
use it as a reading time\&.
.br
Disabled by default\&.
.TP
.B FETCH_YOUTUBE_WATCH_TIME
Set the value to 1 to scrape video duration from YouTube website and
use it as a reading time\&.

View File

@ -32,6 +32,7 @@ import (
var (
youtubeRegex = regexp.MustCompile(`youtube\.com/watch\?v=(.*)`)
odyseeRegex = regexp.MustCompile(`^https://odysee\.com`)
iso8601Regex = regexp.MustCompile(`^P((?P<year>\d+)Y)?((?P<month>\d+)M)?((?P<week>\d+)W)?((?P<day>\d+)D)?(T((?P<hour>\d+)H)?((?P<minute>\d+)M)?((?P<second>\d+)S)?)?$`)
customReplaceRuleRegex = regexp.MustCompile(`rewrite\("(.*)"\|"(.*)"\)`)
)
@ -207,6 +208,17 @@ func updateEntryReadingTime(store *storage.Storage, feed *model.Feed, entry *mod
}
}
if shouldFetchOdyseeWatchTime(entry) {
if entryIsNew {
watchTime, err := fetchOdyseeWatchTime(entry.URL)
if err != nil {
logger.Error("[Processor] Unable to fetch Odysee watch time: %q => %v", entry.URL, err)
}
entry.ReadingTime = watchTime
} else {
entry.ReadingTime = store.GetReadTime(entry, feed)
}
}
// Handle YT error case and non-YT entries.
if entry.ReadingTime == 0 {
entry.ReadingTime = calculateReadingTime(entry.Content, user)
@ -222,6 +234,14 @@ func shouldFetchYouTubeWatchTime(entry *model.Entry) bool {
return urlMatchesYouTubePattern
}
func shouldFetchOdyseeWatchTime(entry *model.Entry) bool {
if !config.Opts.FetchOdyseeWatchTime() {
return false
}
matches := odyseeRegex.FindStringSubmatch(entry.URL)
return matches != nil
}
func fetchYouTubeWatchTime(url string) (int, error) {
clt := client.NewClientWithConfig(url, config.Opts)
response, browserErr := browser.Exec(clt)
@ -247,6 +267,32 @@ func fetchYouTubeWatchTime(url string) (int, error) {
return int(dur.Minutes()), nil
}
func fetchOdyseeWatchTime(url string) (int, error) {
clt := client.NewClientWithConfig(url, config.Opts)
response, browserErr := browser.Exec(clt)
if browserErr != nil {
return 0, browserErr
}
doc, docErr := goquery.NewDocumentFromReader(response.Body)
if docErr != nil {
return 0, docErr
}
durs, exists := doc.Find(`meta[property="og:video:duration"]`).First().Attr("content")
// durs contains video watch time in seconds
if !exists {
return 0, errors.New("duration has not found")
}
dur, err := strconv.ParseInt(durs, 10, 64)
if err != nil {
return 0, fmt.Errorf("unable to parse duration %s: %v", durs, err)
}
return int(dur / 60), nil
}
// parseISO8601 parses an ISO 8601 duration string.
func parseISO8601(from string) (time.Duration, error) {
var match []string