mirror of https://github.com/omar-polo/gmid.git
implement a valid RFC3986 (URI) parser
Up until now I used a "poor man" approach: the uri parser is barely a parser, it tries to extract the path from the request, with some minor checking, and that's all. This obviously is not RFC3986-compliant. The new RFC3986 (URI) parser should be fully compliant. It may accept some invalid URI, but shouldn't reject or mis-parse valid URI. (in particular, the rule for the path is way more relaxed in this parser than it is in the RFC text). A difference with RFC3986 is that we don't even try to parse the (optional) userinfo part of a URI: following the Gemini spec we treat it as an error. A further caveats is that %2F in the path part of the URI is indistinguishable from a literal '/': this is NOT conforming, but due to the scope and use of gmid, I don't see how treat a %2F sequence in the path (reject the URI?).
This commit is contained in:
parent
d5aba4c791
commit
33d32d1fd6
|
@ -2,5 +2,6 @@ cert.pem
|
|||
key.pem
|
||||
TAGS
|
||||
gmid
|
||||
uri_test
|
||||
*.o
|
||||
docs
|
||||
|
|
18
Makefile
18
Makefile
|
@ -2,18 +2,24 @@ CC = cc
|
|||
CFLAGS = -Wall -Wextra -g
|
||||
LDFLAGS = -ltls
|
||||
|
||||
.PHONY: all clean
|
||||
.PHONY: all clean test
|
||||
|
||||
all: gmid TAGS README.md
|
||||
|
||||
gmid: gmid.o
|
||||
${CC} gmid.o -o gmid ${LDFLAGS}
|
||||
gmid: gmid.o uri.o
|
||||
${CC} gmid.o uri.o -o gmid ${LDFLAGS}
|
||||
|
||||
TAGS: gmid.c
|
||||
-etags gmid.c || true
|
||||
TAGS: gmid.c uri.c
|
||||
-etags gmid.c uri.c || true
|
||||
|
||||
README.md: gmid.1
|
||||
mandoc -Tmarkdown gmid.1 | sed -e '1d' -e '$$d' > README.md
|
||||
|
||||
clean:
|
||||
rm -f gmid.o gmid
|
||||
rm -f *.o gmid
|
||||
|
||||
uri_test: uri_test.o uri.o
|
||||
${CC} uri_test.o uri.o -o uri_test ${LDFLAGS}
|
||||
|
||||
test: uri_test
|
||||
./uri_test
|
||||
|
|
155
gmid.c
155
gmid.c
|
@ -34,6 +34,28 @@ int port;
|
|||
int foreground;
|
||||
int connected_clients;
|
||||
|
||||
struct etm { /* file extension to mime */
|
||||
const char *mime;
|
||||
const char *ext;
|
||||
} filetypes[] = {
|
||||
{"application/pdf", "pdf"},
|
||||
|
||||
{"image/gif", "gif"},
|
||||
{"image/jpeg", "jpg"},
|
||||
{"image/jpeg", "jpeg"},
|
||||
{"image/png", "png"},
|
||||
{"image/svg+xml", "svg"},
|
||||
|
||||
{"text/gemini", "gemini"},
|
||||
{"text/gemini", "gmi"},
|
||||
{"text/markdown", "markdown"},
|
||||
{"text/markdown", "md"},
|
||||
{"text/plain", "txt"},
|
||||
{"text/xml", "xml"},
|
||||
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
void
|
||||
siginfo_handler(int sig)
|
||||
{
|
||||
|
@ -51,102 +73,6 @@ starts_with(const char *str, const char *prefix)
|
|||
return 1;
|
||||
}
|
||||
|
||||
char *
|
||||
url_after_proto(char *url)
|
||||
{
|
||||
char *s;
|
||||
const char *proto = "gemini:";
|
||||
const char *marker = "//";
|
||||
|
||||
/* a relative URL */
|
||||
if ((s = strstr(url, marker)) == NULL)
|
||||
return url;
|
||||
|
||||
/*
|
||||
* if a protocol is not specified, gemini should be implied:
|
||||
* this handles the case of //example.com
|
||||
*/
|
||||
if (s == url)
|
||||
return s + strlen(marker);
|
||||
|
||||
if (s - strlen(proto) != url)
|
||||
return NULL;
|
||||
|
||||
if (!starts_with(url, proto))
|
||||
return NULL;
|
||||
|
||||
return s + strlen(marker);
|
||||
}
|
||||
|
||||
char *
|
||||
url_start_of_request(char *url)
|
||||
{
|
||||
char *s, *t;
|
||||
|
||||
if ((s = url_after_proto(url)) == NULL)
|
||||
return NULL;
|
||||
|
||||
/* non-absolute URL */
|
||||
if (s == url)
|
||||
return s;
|
||||
|
||||
if ((t = strstr(s, "/")) == NULL)
|
||||
return s + strlen(s);
|
||||
return t;
|
||||
}
|
||||
|
||||
int
|
||||
url_trim(struct client *c, char *url)
|
||||
{
|
||||
const char *e = "\r\n";
|
||||
char *s;
|
||||
|
||||
if ((s = strstr(url, e)) == NULL)
|
||||
return 0;
|
||||
s[0] = '\0';
|
||||
s[1] = '\0';
|
||||
|
||||
if (s[2] != '\0') {
|
||||
LOGE(c, "%s", "request longer than 1024 bytes");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
char *
|
||||
adjust_path(char *path)
|
||||
{
|
||||
char *s, *query;
|
||||
size_t len;
|
||||
|
||||
if ((query = strchr(path, '?')) != NULL) {
|
||||
*query = '\0';
|
||||
query++;
|
||||
}
|
||||
|
||||
/* /.. -> / */
|
||||
len = strlen(path);
|
||||
if (len >= 3) {
|
||||
if (!strcmp(&path[len-3], "/..")) {
|
||||
path[len-2] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
/* if the path is only `..` trim out and exit */
|
||||
if (!strcmp(path, "..")) {
|
||||
path[0] = '\0';
|
||||
return query;
|
||||
}
|
||||
|
||||
/* remove every ../ in the path */
|
||||
while (1) {
|
||||
if ((s = strstr(path, "../")) == NULL)
|
||||
return query;
|
||||
memmove(s, s+3, strlen(s)+1); /* copy also the \0 */
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
start_reply(struct pollfd *pfd, struct client *client, int code, const char *reason)
|
||||
{
|
||||
|
@ -224,7 +150,7 @@ check_path(struct client *c, const char *path, int *fd)
|
|||
struct stat sb;
|
||||
|
||||
assert(path != NULL);
|
||||
if ((*fd = openat(dirfd, path,
|
||||
if ((*fd = openat(dirfd, *path ? path : ".",
|
||||
O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
|
||||
return FILE_MISSING;
|
||||
}
|
||||
|
@ -288,16 +214,8 @@ err:
|
|||
|
||||
|
||||
int
|
||||
open_file(char *path, char *query, struct pollfd *fds, struct client *c)
|
||||
open_file(char *fpath, char *query, struct pollfd *fds, struct client *c)
|
||||
{
|
||||
char fpath[PATHBUF];
|
||||
|
||||
bzero(fpath, sizeof(fpath));
|
||||
|
||||
if (*path != '.')
|
||||
fpath[0] = '.';
|
||||
strlcat(fpath, path, PATHBUF);
|
||||
|
||||
switch (check_path(c, fpath, &c->fd)) {
|
||||
case FILE_EXECUTABLE:
|
||||
/* +2 to skip the ./ */
|
||||
|
@ -578,8 +496,8 @@ void
|
|||
handle(struct pollfd *fds, struct client *client)
|
||||
{
|
||||
char buf[GEMINI_URL_LEN];
|
||||
char *path;
|
||||
char *query;
|
||||
const char *parse_err;
|
||||
struct uri uri;
|
||||
|
||||
switch (client->state) {
|
||||
case S_OPEN:
|
||||
|
@ -599,26 +517,19 @@ handle(struct pollfd *fds, struct client *client)
|
|||
return;
|
||||
}
|
||||
|
||||
if (!url_trim(client, buf)) {
|
||||
if (!start_reply(fds, client, BAD_REQUEST, "bad request"))
|
||||
if (!trim_req_uri(buf) || !parse_uri(buf, &uri, &parse_err)) {
|
||||
if (!start_reply(fds, client, BAD_REQUEST, parse_err))
|
||||
return;
|
||||
goodbye(fds, client);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((path = url_start_of_request(buf)) == NULL) {
|
||||
if (!start_reply(fds, client, BAD_REQUEST, "bad request"))
|
||||
return;
|
||||
goodbye(fds, client);
|
||||
return;
|
||||
}
|
||||
LOGI(client, "GET %s%s%s",
|
||||
*uri.path ? uri.path : "/",
|
||||
*uri.query ? "?" : "",
|
||||
*uri.query ? uri.query : "");
|
||||
|
||||
query = adjust_path(path);
|
||||
LOGI(client, "GET %s%s%s", path,
|
||||
query ? "?" : "",
|
||||
query ? query : "");
|
||||
|
||||
send_file(path, query, fds, client);
|
||||
send_file(uri.path, uri.query, fds, client);
|
||||
break;
|
||||
|
||||
case S_INITIALIZING:
|
||||
|
|
42
gmid.h
42
gmid.h
|
@ -107,6 +107,17 @@ struct client {
|
|||
struct in_addr addr;
|
||||
};
|
||||
|
||||
|
||||
struct uri {
|
||||
char *schema;
|
||||
char *host;
|
||||
char *port;
|
||||
uint16_t port_no;
|
||||
char *path;
|
||||
char *query;
|
||||
char *fragment;
|
||||
};
|
||||
|
||||
enum {
|
||||
FILE_EXISTS,
|
||||
FILE_EXECUTABLE,
|
||||
|
@ -114,35 +125,10 @@ enum {
|
|||
FILE_MISSING,
|
||||
};
|
||||
|
||||
struct etm { /* file extension to mime */
|
||||
const char *mime;
|
||||
const char *ext;
|
||||
} filetypes[] = {
|
||||
{"application/pdf", "pdf"},
|
||||
|
||||
{"image/gif", "gif"},
|
||||
{"image/jpeg", "jpg"},
|
||||
{"image/jpeg", "jpeg"},
|
||||
{"image/png", "png"},
|
||||
{"image/svg+xml", "svg"},
|
||||
|
||||
{"text/gemini", "gemini"},
|
||||
{"text/gemini", "gmi"},
|
||||
{"text/markdown", "markdown"},
|
||||
{"text/markdown", "md"},
|
||||
{"text/plain", "txt"},
|
||||
{"text/xml", "xml"},
|
||||
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
/* gmid.c */
|
||||
void siginfo_handler(int);
|
||||
int starts_with(const char*, const char*);
|
||||
|
||||
char *url_after_proto(char*);
|
||||
char *url_start_of_request(char*);
|
||||
int url_trim(struct client*, char*);
|
||||
char *adjust_path(char*);
|
||||
ssize_t filesize(int);
|
||||
|
||||
int start_reply(struct pollfd*, struct client*, int, const char*);
|
||||
|
@ -167,4 +153,8 @@ void loop(struct tls*, int);
|
|||
|
||||
void usage(const char*);
|
||||
|
||||
/* uri.c */
|
||||
int parse_uri(char*, struct uri*, const char**);
|
||||
int trim_req_uri(char*);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,413 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Omar Polo <op@omarpolo.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "gmid.h"
|
||||
|
||||
/*
|
||||
* Notes from RFC3986
|
||||
*
|
||||
* => gemini://tanso.net/rfc/rfc3986.txt
|
||||
*
|
||||
*
|
||||
* ABNF
|
||||
* ====
|
||||
*
|
||||
* pct-encoded "%" HEXDIG HEXDIG
|
||||
*
|
||||
* reserved = gen-delims / sub-delimis
|
||||
* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
|
||||
* sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
|
||||
* / "*" / "+" / "," / ";" / "="
|
||||
*
|
||||
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
|
||||
*
|
||||
* URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
|
||||
*
|
||||
* hier-part = "//" authority path-abempty
|
||||
* / path-absolute
|
||||
* / path-rootless
|
||||
* / path-empty
|
||||
*
|
||||
* scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
|
||||
*
|
||||
* authority = [ userinfo "@" ] host [ ":" port ]
|
||||
*
|
||||
* (note that userinfo isn't used for Gemini URL)
|
||||
*
|
||||
* host = IP-literal / IPv4address / reg-name
|
||||
* reg-name = *( unreserved / pct-encoded / sub-delims )
|
||||
*
|
||||
* port = *DIGIT
|
||||
*
|
||||
* path = path-abemty ; begins with "/" or is empty
|
||||
* / path-absolute ; begins with "/" but not "//"
|
||||
* / path-noscheme ; begins with a non-colon segment
|
||||
* / path-rootless ; begins with a segment
|
||||
* / path-empty ; zero characters
|
||||
*
|
||||
* path-abemty = *( "/" segment )
|
||||
* path-absolute = "/" [ segment-nz *( "/" segment ) ]
|
||||
* path-noscheme = ; not used
|
||||
* path-rootless = ; not used
|
||||
* path-empty = ; not used
|
||||
*
|
||||
* segment = *pchar
|
||||
* segment-nz = 1*pchar
|
||||
* segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
|
||||
* pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
|
||||
*
|
||||
* query = *( pchar / "/" / "?" )
|
||||
*
|
||||
* fragment = *( pchar / "/" / "?" )
|
||||
*
|
||||
*
|
||||
* EXAMPLE
|
||||
* =======
|
||||
*
|
||||
* foo://example.com:8042/over/there?name=ferret#nose
|
||||
* \_/ \______________/\_________/ \_________/ \__/
|
||||
* | | | | |
|
||||
* scheme authority path query fragment
|
||||
*
|
||||
*/
|
||||
|
||||
struct parser {
|
||||
char *uri;
|
||||
struct uri *parsed;
|
||||
const char *err;
|
||||
};
|
||||
|
||||
/* XXX: these macros will expand multiple times their argument */
|
||||
|
||||
#define UNRESERVED(p) \
|
||||
(isalnum(p) \
|
||||
|| p == '-' \
|
||||
|| p == '.' \
|
||||
|| p == '_' \
|
||||
|| p == '~')
|
||||
|
||||
#define SUB_DELIMITERS(p) \
|
||||
(p == '!' \
|
||||
|| p == '$' \
|
||||
|| p == '&' \
|
||||
|| p == '\'' \
|
||||
|| p == '(' \
|
||||
|| p == ')' \
|
||||
|| p == '*' \
|
||||
|| p == '+' \
|
||||
|| p == ',' \
|
||||
|| p == ';' \
|
||||
|| p == '=')
|
||||
|
||||
static int
|
||||
parse_pct_encoded(struct parser *p)
|
||||
{
|
||||
if (*p->uri != '%')
|
||||
return 0;
|
||||
|
||||
if (!isxdigit(*(p->uri+1)) || !isxdigit(*(p->uri+2))) {
|
||||
p->err = "illegal percent-encoding";
|
||||
return 0;
|
||||
}
|
||||
|
||||
sscanf(p->uri+1, "%2hhx", p->uri);
|
||||
memmove(p->uri+1, p->uri+3, strlen(p->uri+3)+1);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) "://" */
|
||||
static int
|
||||
parse_scheme(struct parser *p)
|
||||
{
|
||||
p->parsed->schema = p->uri;
|
||||
|
||||
if (!isalpha(*p->uri)) {
|
||||
p->err = "illegal character in scheme";
|
||||
return 0;
|
||||
}
|
||||
|
||||
p->uri++;
|
||||
while (isalnum(*p->uri)
|
||||
|| *p->uri == '+'
|
||||
|| *p->uri == '-'
|
||||
|| *p->uri == '.')
|
||||
p->uri++;
|
||||
|
||||
if (*p->uri != ':') {
|
||||
p->err = "illegal character in scheme";
|
||||
return 0;
|
||||
}
|
||||
|
||||
*p->uri = '\0';
|
||||
if (*(++p->uri) != '/' || *(++p->uri) != '/') {
|
||||
p->err = "invalid marker after scheme";
|
||||
return 0;
|
||||
}
|
||||
|
||||
p->uri++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* *DIGIT */
|
||||
static int
|
||||
parse_port(struct parser *p)
|
||||
{
|
||||
uint32_t i = 0;
|
||||
|
||||
p->parsed->port = p->uri;
|
||||
|
||||
for (; isdigit(*p->uri); p->uri++) {
|
||||
i = i * 10 + *p->uri - '0';
|
||||
if (i > UINT16_MAX) {
|
||||
p->err = "port number too large";
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (*p->uri != '/' && *p->uri != '\0') {
|
||||
p->err = "illegal character in port number";
|
||||
return 0;
|
||||
}
|
||||
|
||||
p->parsed->port_no = i;
|
||||
|
||||
if (*p->uri != '\0') {
|
||||
*p->uri = '\0';
|
||||
p->uri++;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* TODO: add support for ip-literal and ipv4addr ? */
|
||||
/* *( unreserved / sub-delims / pct-encoded ) */
|
||||
static int
|
||||
parse_authority(struct parser *p)
|
||||
{
|
||||
p->parsed->host = p->uri;
|
||||
|
||||
while (UNRESERVED(*p->uri)
|
||||
|| SUB_DELIMITERS(*p->uri)
|
||||
|| parse_pct_encoded(p))
|
||||
p->uri++;
|
||||
|
||||
if (*p->uri == ':') {
|
||||
*p->uri = '\0';
|
||||
p->uri++;
|
||||
return parse_port(p);
|
||||
}
|
||||
|
||||
if (*p->uri == '/') {
|
||||
*p->uri = '\0';
|
||||
p->uri++;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (*p->uri == '\0')
|
||||
return 1;
|
||||
|
||||
p->err = "illegal character in authority section";
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Routine for path_clean. Elide the pointed .. with the preceding
|
||||
* element. Return 0 if it's not possible. incr is the length of
|
||||
* the increment, 3 for ../ and 2 for .. */
|
||||
static int
|
||||
path_elide_dotdot(char *path, char *i, int incr)
|
||||
{
|
||||
char *j;
|
||||
|
||||
if (i == path)
|
||||
return 0;
|
||||
for (j = i-2; j != path && *j != '/'; j--)
|
||||
/* noop */ ;
|
||||
if (*j == '/')
|
||||
j++;
|
||||
i += incr;
|
||||
memmove(j, i, strlen(i)+1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use an algorithm similar to the one implemented in go' path.Clean:
|
||||
*
|
||||
* 1. Replace multiple slashes with a single slash
|
||||
* 2. Eliminate each . path name element
|
||||
* 3. Eliminate each inner .. along with the non-.. element that precedes it
|
||||
* 4. Eliminate trailing .. if possible or error (go would only discard)
|
||||
*
|
||||
* Unlike path.Clean, this function return the empty string if the
|
||||
* original path is equivalent to "/".
|
||||
*/
|
||||
static int
|
||||
path_clean(char *path)
|
||||
{
|
||||
char *i;
|
||||
|
||||
/* 1. replace multiple slashes with a single one */
|
||||
for (i = path; *i; ++i) {
|
||||
if (*i == '/' && *(i+1) == '/') {
|
||||
memmove(i, i+1, strlen(i)); /* move also the \0 */
|
||||
i--;
|
||||
}
|
||||
}
|
||||
|
||||
/* 2. eliminate each . path name element */
|
||||
for (i = path; *i; ++i) {
|
||||
if ((i == path || *i == '/') && *(i+1) == '.' &&
|
||||
*(i+2) == '/') {
|
||||
/* move also the \0 */
|
||||
memmove(i, i+2, strlen(i)-1);
|
||||
i--;
|
||||
}
|
||||
}
|
||||
if (!strcmp(path, ".") || !strcmp(path, "/.")) {
|
||||
*path = '\0';
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* 3. eliminate each inner .. along with the preceding non-.. */
|
||||
for (i = strstr(path, "../"); i != NULL; i = strstr(path, ".."))
|
||||
if (!path_elide_dotdot(path, i, 3))
|
||||
return 0;
|
||||
|
||||
/* 4. eliminate trailing ..*/
|
||||
if ((i = strstr(path, "..")) != NULL)
|
||||
if (!path_elide_dotdot(path, i, 2))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
parse_query(struct parser *p)
|
||||
{
|
||||
p->parsed->query = p->uri;
|
||||
if (*p->uri == '\0')
|
||||
return 1;
|
||||
|
||||
while (UNRESERVED(*p->uri)
|
||||
|| SUB_DELIMITERS(*p->uri)
|
||||
|| *p->uri == '/'
|
||||
|| *p->uri == '?'
|
||||
|| parse_pct_encoded(p))
|
||||
p->uri++;
|
||||
|
||||
if (*p->uri != '\0' && *p->uri != '#') {
|
||||
p->err = "illegal character in query";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (*p->uri != '\0') {
|
||||
*p->uri = '\0';
|
||||
p->uri++;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* don't even bother */
|
||||
static int
|
||||
parse_fragment(struct parser *p)
|
||||
{
|
||||
p->parsed->fragment = p->uri;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* XXX: is it too broad? */
|
||||
/* *(pchar / "/") */
|
||||
static int
|
||||
parse_path(struct parser *p)
|
||||
{
|
||||
char c;
|
||||
|
||||
p->parsed->path = p->uri;
|
||||
if (*p->uri == '\0') {
|
||||
p->parsed->query = p->parsed->fragment = p->uri;
|
||||
return 1;
|
||||
}
|
||||
|
||||
while (UNRESERVED(*p->uri)
|
||||
|| SUB_DELIMITERS(*p->uri)
|
||||
|| *p->uri == '/'
|
||||
|| parse_pct_encoded(p))
|
||||
p->uri++;
|
||||
|
||||
if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') {
|
||||
p->err = "illegal character in path";
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (*p->uri != '\0') {
|
||||
c = *p->uri;
|
||||
*p->uri = '\0';
|
||||
p->uri++;
|
||||
|
||||
if (c == '#') {
|
||||
if (!parse_fragment(p))
|
||||
return 0;
|
||||
} else
|
||||
if (!parse_query(p) || !parse_fragment(p))
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!path_clean(p->parsed->path)) {
|
||||
p->err = "illegal path";
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
parse_uri(char *uri, struct uri *ret, const char **err_ret)
|
||||
{
|
||||
char *end;
|
||||
struct parser p = {uri, ret, NULL};
|
||||
|
||||
bzero(ret, sizeof(*ret));
|
||||
|
||||
/* initialize optional stuff to the empty string */
|
||||
end = uri + strlen(uri);
|
||||
p.parsed->port = end;
|
||||
p.parsed->path = end;
|
||||
p.parsed->query = end;
|
||||
p.parsed->fragment = end;
|
||||
|
||||
if (!parse_scheme(&p) || !parse_authority(&p) || !parse_path(&p)) {
|
||||
*err_ret = p.err;
|
||||
return 0;
|
||||
}
|
||||
|
||||
*err_ret = NULL;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
trim_req_uri(char *uri)
|
||||
{
|
||||
char *i;
|
||||
|
||||
if ((i = strstr(uri, "\r\n")) == NULL)
|
||||
return 0;
|
||||
*i = '\0';
|
||||
return 1;
|
||||
}
|
|
@ -0,0 +1,184 @@
|
|||
/*
|
||||
* Copyright (c) 2020 Omar Polo <op@omarpolo.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <err.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "gmid.h"
|
||||
|
||||
#define TEST(uri, fail, exp, descr) \
|
||||
if (!run_test(uri, fail, exp)) { \
|
||||
fprintf(stderr, "%s:%d: error: %s\n", \
|
||||
__FILE__, __LINE__, descr); \
|
||||
exit(1); \
|
||||
}
|
||||
|
||||
#define URI(schema, host, port, path, query, frag) \
|
||||
((struct uri){schema, host, port, 0, path, query, frag})
|
||||
|
||||
#define DIFF(wanted, got, field) \
|
||||
if (wanted->field == NULL || got->field == NULL || \
|
||||
strcmp(wanted->field, got->field)) { \
|
||||
fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
|
||||
got->field, wanted->field); \
|
||||
return 0; \
|
||||
}
|
||||
|
||||
#define PASS 0
|
||||
#define FAIL 1
|
||||
|
||||
int
|
||||
diff_uri(struct uri *p, struct uri *exp)
|
||||
{
|
||||
DIFF(p, exp, schema);
|
||||
DIFF(p, exp, host);
|
||||
DIFF(p, exp, port);
|
||||
DIFF(p, exp, path);
|
||||
DIFF(p, exp, query);
|
||||
DIFF(p, exp, fragment);
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
run_test(const char *uri, int should_fail, struct uri expected)
|
||||
{
|
||||
int failed, ok = 1;
|
||||
char *uri_copy;
|
||||
struct uri parsed;
|
||||
const char *error;
|
||||
|
||||
if ((uri_copy = strdup(uri)) == NULL)
|
||||
err(1, "strdup");
|
||||
|
||||
fprintf(stderr, "=> %s\n", uri);
|
||||
failed = !parse_uri(uri_copy, &parsed, &error);
|
||||
|
||||
if (failed && should_fail)
|
||||
goto done;
|
||||
|
||||
if (error != NULL)
|
||||
fprintf(stderr, "> %s\n", error);
|
||||
|
||||
ok = !failed && !should_fail;
|
||||
if (ok)
|
||||
ok = diff_uri(&expected, &parsed);
|
||||
|
||||
done:
|
||||
free(uri_copy);
|
||||
return ok;
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
struct uri empty = {"", "", "", PASS, "", "", ""};
|
||||
|
||||
TEST("http://omarpolo.com",
|
||||
PASS,
|
||||
URI("http", "omarpolo.com", "", "", "", ""),
|
||||
"can parse uri with empty path");
|
||||
|
||||
/* schema */
|
||||
TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
|
||||
TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
|
||||
TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
|
||||
TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
|
||||
|
||||
/* authority */
|
||||
TEST("gemini://omarpolo.com",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "", "", "", ""),
|
||||
"can parse authority with empty path");
|
||||
TEST("gemini://omarpolo.com/",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "", "", "", ""),
|
||||
"can parse authority with empty path (alt)")
|
||||
TEST("gemini://omarpolo.com:1965",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "1965", "", "", ""),
|
||||
"can parse with port and empty path");
|
||||
TEST("gemini://omarpolo.com:1965/",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "1965", "", "", ""),
|
||||
"can parse with port and empty path")
|
||||
TEST("gemini://omarpolo.com:196s",
|
||||
FAIL,
|
||||
empty,
|
||||
"FAIL with invalid port number");
|
||||
|
||||
/* path */
|
||||
TEST("gemini://omarpolo.com/foo/bar/baz",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
|
||||
"parse simple paths");
|
||||
TEST("gemini://omarpolo.com/foo//bar///baz",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
|
||||
"parse paths with multiple slashes");
|
||||
TEST("gemini://omarpolo.com/foo/./bar/./././baz",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
|
||||
"parse paths with . elements");
|
||||
TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
|
||||
"parse paths with .. elements");
|
||||
TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
|
||||
"parse paths with multiple .. elements");
|
||||
TEST("gemini://omarpolo.com/foo/..",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "", "", "", ""),
|
||||
"parse paths with a trailing ..");
|
||||
TEST("gemini://omarpolo.com/foo/../",
|
||||
PASS,
|
||||
URI("gemini", "omarpolo.com", "", "", "", ""),
|
||||
"parse paths with a trailing ..");
|
||||
TEST("gemini://omarpolo.com/foo/../..",
|
||||
FAIL,
|
||||
empty,
|
||||
"reject paths that would escape the root");
|
||||
|
||||
/* query */
|
||||
TEST("foo://example.com/foo/?gne",
|
||||
PASS,
|
||||
URI("foo", "example.com", "", "foo/", "gne", ""),
|
||||
"parse query strings");
|
||||
TEST("foo://example.com/foo/?gne&foo",
|
||||
PASS,
|
||||
URI("foo", "example.com", "", "foo/", "gne&foo", ""),
|
||||
"parse query strings");
|
||||
TEST("foo://example.com/foo/?gne%2F",
|
||||
PASS,
|
||||
URI("foo", "example.com", "", "foo/", "gne/", ""),
|
||||
"parse query strings");
|
||||
|
||||
/* fragment */
|
||||
TEST("foo://bar.co/#foo",
|
||||
PASS,
|
||||
URI("foo", "bar.co", "", "", "", "foo"),
|
||||
"can recognize fragments");
|
||||
|
||||
/* percent encoding */
|
||||
TEST("foo://bar.com/caf%C3%A8.gmi",
|
||||
PASS,
|
||||
URI("foo", "bar.com", "", "cafè.gmi", "", ""),
|
||||
"can decode");
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue