From b1c99777114f877d446a6b45688f60dba2b03f08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Guillot?= Date: Tue, 17 Nov 2020 17:01:59 -0800 Subject: [PATCH] Handle more invalid dates --- reader/date/parser.go | 38 ++++++++++++++++++++++++++++---------- reader/date/parser_test.go | 6 ++++++ reader/rdf/rdf.go | 2 +- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/reader/date/parser.go b/reader/date/parser.go index eb78f490..6f959528 100644 --- a/reader/date/parser.go +++ b/reader/date/parser.go @@ -23,6 +23,7 @@ var dateFormats = []string{ time.RFC1123Z, time.RFC1123, time.ANSIC, + "Mon, 02 Jan 2006 15:04:05 MST -07:00", "Mon, January 2, 2006, 3:04 PM MST", "Mon, January 2 2006 15:04:05 -0700", "Mon, January 02, 2006, 15:04:05 MST", @@ -38,6 +39,7 @@ var dateFormats = []string{ "Mon Jan 02, 2006 3:04 pm", "Mon, Jan 02,2006 15:04:05 MST", "Mon Jan 02 2006 15:04:05 -0700", + "Mon, 02/01/2006", "Monday, 2. January 2006 - 15:04", "Monday 02 January 2006", "Monday, January 2, 2006 15:04:05 MST", @@ -206,11 +208,15 @@ var dateFormats = []string{ "01/02/2006", "01-02-2006", "Jan. 2006", + "Jan. 2, 2006, 03:04 p.m.", + "2006-01-02 15:04:05 -07:00", + "2 January, 2006", } var invalidTimezoneReplacer = strings.NewReplacer( "Europe/Brussels", "CET", "GMT+0000 (Coordinated Universal Time)", "GMT", + "GMT-", "GMT -", ) var invalidLocalizedDateReplacer = strings.NewReplacer( @@ -246,22 +252,30 @@ var invalidLocalizedDateReplacer = strings.NewReplacer( "Vendredi,", "Friday,", "Samedi,", "Saturday,", "Dimanche,", "Sunday,", - "avr ", "Apr ", - "mai ", "May ", - "jui ", "Jun ", - "juin ", "June ", "jan.", "January ", "feb.", "February ", "mars.", "March ", "avril.", "April ", "mai.", "May ", "juin.", "June ", - "juil.", "july", - "août.", "august", - "sept.", "september", - "oct.", "october", - "nov.", "november", - "dec.", "december", + "juil.", "July", + "août.", "August", + "sept.", "September", + "oct.", "October", + "nov.", "November", + "dec.", "December", + "janvier ", "January ", + "février ", "February ", + "mars ", "March ", + "avril ", "April ", + "mai ", "May ", + "juin ", "June ", + "juillet ", "July", + "août ", "August", + "septembre ", "September", + "octobre ", "October", + "november ", "November", + "décembre ", "December", "Janvier", "January", "Février", "February", "Mars", "March", @@ -274,6 +288,10 @@ var invalidLocalizedDateReplacer = strings.NewReplacer( "Octobre", "October", "Novembre", "November", "Décembre", "December", + "avr ", "Apr ", + "mai ", "May ", + "jui ", "Jun ", + "juin ", "June ", ) // Parse parses a given date string using a large diff --git a/reader/date/parser_test.go b/reader/date/parser_test.go index 31f9029b..f39e8dd8 100644 --- a/reader/date/parser_test.go +++ b/reader/date/parser_test.go @@ -143,6 +143,12 @@ func TestParseWeirdDateFormat(t *testing.T) { "Mon, 16th Nov 2020 13:16:28 GMT", "Nov. 2020", "ven., 03 juil. 2020 15:09:58 +0000", + "Fri, 26/06/2020", + "Thu, 29 Oct 2020 07:36:03 GMT-07:00", + "jeu., 02 avril 2020 00:00:00 +0200", + "Jan. 4, 2016, 12:37 p.m.", + "2018-10-23 04:07:42 +00:00", + "5 August, 2019", } for _, date := range dates { diff --git a/reader/rdf/rdf.go b/reader/rdf/rdf.go index df4296b9..73c3801b 100644 --- a/reader/rdf/rdf.go +++ b/reader/rdf/rdf.go @@ -95,7 +95,7 @@ func (r *rdfItem) entryDate() time.Time { if r.DublinCoreDate != "" { result, err := date.Parse(r.DublinCoreDate) if err != nil { - logger.Error("rdf: %v", err) + logger.Error("rdf: %v (entry link = %s)", err, r.Link) return time.Now() }