initial punycode support

This commit is contained in:
Omar Polo 2021-01-27 10:47:49 +00:00
parent 390a611893
commit 3300cbe06a
9 changed files with 133 additions and 11 deletions

1
.gitignore vendored
View File

@ -17,4 +17,5 @@ regress/*.pem
regress/reg.conf
regress/fill-file
regress/iri_test
regress/puny-test
regress/*.o

View File

@ -1,3 +1,7 @@
2021-01-26 Omar Polo <op@omarpolo.com>
* puny.c (puny_decode): initial punycode support!
2021-01-25 Omar Polo <op@omarpolo.com>
* gmid.1: manpage improvements (example and usage)

View File

@ -13,7 +13,7 @@ lex.yy.c: lex.l y.tab.c
y.tab.c: parse.y
${YACC} -b y -d parse.y
SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c
SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c puny.c
OBJS = ${SRCS:.c=.o} lex.yy.o y.tab.o ${COMPAT}
gmid: ${OBJS}

9
gmid.c
View File

@ -131,7 +131,14 @@ log_request(struct client *c, char *meta, size_t l)
/* serialize the IRI */
strlcpy(b, c->iri.schema, sizeof(b));
strlcat(b, "://", sizeof(b));
strlcat(b, c->iri.host, sizeof(b));
/* log the decoded host name, but if it was invalid
* use the raw one. */
if (*c->domain != '\0')
strlcat(b, c->domain, sizeof(b));
else
strlcat(b, c->iri.host, sizeof(b));
strlcat(b, "/", sizeof(b));
strlcat(b, c->iri.path, sizeof(b)); /* TODO: sanitize UTF8 */
if (*c->iri.query != '\0') { /* TODO: sanitize UTF8 */

8
gmid.h
View File

@ -54,6 +54,9 @@
#define HOSTSLEN 64
#define LOCLEN 32
/* RFC1034 imposes this limit. 63+1 for the NUL-terminator */
#define DOMAIN_NAME_LEN (63+1)
#define LOGE(c, fmt, ...) logs(LOG_ERR, c, fmt, __VA_ARGS__)
#define LOGW(c, fmt, ...) logs(LOG_WARNING, c, fmt, __VA_ARGS__)
#define LOGN(c, fmt, ...) logs(LOG_NOTICE, c, fmt, __VA_ARGS__)
@ -134,6 +137,7 @@ struct client {
struct tls *ctx;
char req[GEMINI_URL_LEN];
struct iri iri;
char domain[DOMAIN_NAME_LEN];
int state, next;
int code;
const char *meta;
@ -236,9 +240,13 @@ void sandbox(void);
/* utf8.c */
int valid_multibyte_utf8(struct parser*);
char *utf8_nth(char*, size_t);
/* iri.c */
int parse_iri(char*, struct iri*, const char**);
int trim_req_iri(char*);
/* puny.c */
int puny_decode(char*, char*, size_t);
#endif

View File

@ -2,11 +2,16 @@ include ../Makefile.local
.PHONY: all clean runtime
all: iri_test runtime
all: puny-test testdata iri_test cert.pem
./puny-test
./runtime
./iri_test
puny-test: puny-test.o ../puny.o ../utf8.o
${CC} puny-test.o ../puny.o ../utf8.o -o puny-test
iri_test: iri_test.o ../iri.o ../utf8.o
${CC} iri_test.o ../iri.o ../utf8.o -o iri_test ${LDFLAGS}
${CC} iri_test.o ../iri.o ../utf8.o -o iri_test
fill-file: fill-file.o
${CC} fill-file.o -o fill-file
@ -38,5 +43,5 @@ testdata: fill-file
cp hello testdata/dir
cp testdata/index.gmi testdata/dir/foo.gmi
runtime: testdata cert.pem
runtime: testdata
./runtime

78
regress/puny-test.c Normal file
View File

@ -0,0 +1,78 @@
/*
* Copyright (c) 2021 Omar Polo <op@omarpolo.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdio.h>
#include <string.h>
#include "../gmid.h"
struct suite {
const char *src;
const char *res;
} t[] = {
{"foo", "foo"},
{"xn-invalid", "xn-invalid"},
{"naïve", "naïve"},
{"xn--8ca", "è"},
{"xn--caff-8oa", "caffè"},
{"xn--nave-6pa", "naïve"},
{"xn--e-0mbbc", "τeστ"},
{"xn--8ca67lbac", "τèστ"},
{"xn--28j2a3ar1p", "こんにちは"},
{"xn--hello--ur7iy09x", "hello-世界"},
{"xn--hi--hi-rr7iy09x", "hi-世界-hi"},
{"xn--caf-8la.foo.org", "cafè.foo.org"},
/* 3 bytes */
{"xn--j6h", ""},
/* 4 bytes */
{"xn--x73l", "𩸽"},
{"xn--x73laaa", "𩸽𩸽𩸽𩸽"},
{NULL, NULL}
};
int
main(int argc, char **argv)
{
struct suite *i;
int failed;
char *hostname;
char buf[64]; /* name len */
failed = 0;
for (i = t; i->src != NULL; ++i) {
if ((hostname = strdup(i->src)) == NULL)
return 0;
memset(buf, 0, sizeof(buf));
if (!puny_decode(hostname, buf, sizeof(buf))) {
printf("decode: failure with %s\n", i->src);
failed = 1;
continue;
}
if (strcmp(buf, i->res)) {
printf("ERR: expected \"%s\", got \"%s\"\n",
i->res, buf);
failed = 1;
continue;
} else
printf("OK: %s => %s\n", i->src, buf);
free(hostname);
}
return failed;
}

View File

@ -262,12 +262,10 @@ handle_handshake(struct pollfd *fds, struct client *c)
}
servname = tls_conn_servername(c->ctx);
puny_decode(servname, c->domain, sizeof(c->domain));
for (h = hosts; h->domain != NULL; ++h) {
if (!strcmp(h->domain, "*"))
break;
if (servname != NULL && !fnmatch(h->domain, servname, 0))
if (!fnmatch(h->domain, c->domain, 0))
break;
}
@ -290,6 +288,7 @@ void
handle_open_conn(struct pollfd *fds, struct client *c)
{
const char *parse_err = "invalid request";
char decoded[DOMAIN_NAME_LEN];
bzero(c->req, sizeof(c->req));
bzero(&c->iri, sizeof(c->iri));
@ -314,8 +313,11 @@ handle_open_conn(struct pollfd *fds, struct client *c)
return;
}
/* XXX: we should check that the SNI matches the requested host */
if (strcmp(c->iri.schema, "gemini") || c->iri.port_no != conf.port) {
puny_decode(c->iri.host, decoded, sizeof(decoded));
if (c->iri.port_no != conf.port
|| strcmp(c->iri.schema, "gemini")
|| strcmp(decoded, c->domain)) {
start_reply(fds, c, PROXY_REFUSED, "won't proxy request");
return;
}

17
utf8.c
View File

@ -77,3 +77,20 @@ valid_multibyte_utf8(struct parser *p)
}
return 1;
}
char *
utf8_nth(char *s, size_t n)
{
size_t i;
uint32_t cp = 0, state = 0;
for (i = 0; *s && i < n; ++s)
if (!utf8_decode(&state, &cp, *s))
++i;
if (state != UTF8_ACCEPT)
return NULL;
if (i == n)
return s;
return NULL;
}