From e4d82becb71b1b8f9c843a5f0f8657bc6b93c67b Mon Sep 17 00:00:00 2001 From: Omar Polo Date: Fri, 15 Jan 2021 09:27:42 +0000 Subject: [PATCH] normalize host name when parsing the IRI RFC3986 3.2.2 "Host" says that > Although host is case-insensitive, producers and normalizers should > use lowercase for registered names and hexadecimal addresses for the > sake of uniformity, while only using uppercase letters for > percent-encodings. so we cope with that. --- iri.c | 6 +++++- iri_test.c | 4 ++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/iri.c b/iri.c index 1b997dd..d64afe8 100644 --- a/iri.c +++ b/iri.c @@ -146,8 +146,12 @@ parse_authority(struct parser *p) while (unreserved(*p->iri) || sub_delimiters(*p->iri) - || parse_pct_encoded(p)) + || parse_pct_encoded(p)) { + /* normalize the host name. */ + if (*p->iri < 0x7F) + *p->iri = tolower(*p->iri); p->iri++; + } if (p->err != NULL) return 0; diff --git a/iri_test.c b/iri_test.c index e322813..81ec0b4 100644 --- a/iri_test.c +++ b/iri_test.c @@ -123,6 +123,10 @@ main(void) FAIL, empty, "FAIL with invalid port number"); + TEST("gemini://OmArPoLo.CoM", + PASS, + IRI("gemini", "omarpolo.com", "", "", "", ""), + "host is case-insensitive"); /* path */ TEST("gemini://omarpolo.com/foo/bar/baz",