Handle some non-english date formats

This commit is contained in:
Frédéric Guillot 2018-04-09 21:27:15 -07:00
parent 20f874399d
commit 02ba735ba9
2 changed files with 37 additions and 1 deletions

View File

@ -71,6 +71,8 @@ var dateFormats = []string{
"Mon, 2 Jan 2006 15:04:05 -0700",
"Mon, 2 Jan 2006 15:04:05",
"Mon, 2 Jan 2006 15:04",
"Mon, 02 Jan 2006, 15:04",
"Mon, 2 Jan 2006, 15:04",
"Mon,2 Jan 2006",
"Mon, 2 Jan 2006",
"Mon, 2 Jan 15:04:05 MST",
@ -192,6 +194,7 @@ var dateFormats = []string{
// Parse parses a given date string using a large
// list of commonly found feed date formats.
func Parse(ds string) (t time.Time, err error) {
ds = replaceNonEnglishWords(ds)
d := strings.TrimSpace(ds)
if d == "" {
return t, errors.New("date parser: empty value")
@ -211,3 +214,32 @@ func Parse(ds string) (t time.Time, err error) {
err = fmt.Errorf(`date parser: failed to parse date "%s"`, ds)
return
}
// Replace German and French dates to English.
func replaceNonEnglishWords(ds string) string {
r := strings.NewReplacer(
"Mo,", "Mon,",
"Di,", "Tue,",
"Mi,", "Wed,",
"Do,", "Thu,",
"Fr,", "Fri,",
"Sa,", "Sat,",
"So,", "Sun,",
"Mär ", "Mar ",
"Mai ", "May ",
"Okt ", "Oct ",
"Dez ", "Dec ",
"lun,", "Mon,",
"mar,", "Tue,",
"mer,", "Wed,",
"jeu,", "Thu,",
"ven,", "Fri,",
"sam,", "Sat,",
"dim,", "Sun,",
"avr ", "Apr ",
"mai ", "May ",
"jui ", "Jun ",
)
return r.Replace(ds)
}

View File

@ -47,11 +47,15 @@ func TestParseWeirdDateFormat(t *testing.T) {
"Friday, December 22, 2017 - 3:09pm",
"Friday, December 8, 2017 - 3:07pm",
"Thu, 25 Feb 2016 00:00:00 Europe/Brussels",
"Mon, 09 Apr 2018, 16:04",
"Di, 23 Jan 2018 00:00:00 +0100",
"Do, 29 Mär 2018 00:00:00 +0200",
"mer, 9 avr 2018 00:00:00 +0200",
}
for _, date := range dates {
if _, err := Parse(date); err != nil {
t.Fatalf(`Unable to parse date: "%s"`, date)
t.Fatalf(`Unable to parse date: %q`, date)
}
}
}