[cgi] added support for path parameters

enhance the CGI scripting support so that script can take path
parameters.  That is, a script at /cgi/foo is called when the request
path is /cgi/foo/bar/...

This commit also introduce some backward incompatible changes as the
default env variables set for the CGI script changed.
This commit is contained in:
Omar Polo 2020-11-10 14:07:36 +01:00
parent 92e66347ed
commit a5d310bc0d
No known key found for this signature in database
GPG Key ID: 35F98C96A1786F0D
4 changed files with 234 additions and 115 deletions

View File

@ -1,3 +1,8 @@
2020-11-10 Omar Polo <op@omarpolo.com>
* gmid.c (open_file): added support for path parameters for CGI
scripts
2020-11-06 Omar Polo <op@omarpolo.com>
* gmid.1: great improvements to the documentation

View File

@ -24,10 +24,7 @@ will strip any sequence of
*../*
or trailing
*..*
in the requests made by clients, so it's impossible to serve content
outside the
*docs*
directory by mistake, and will also refuse to follow symlinks.
in the requests made by clients and will refuse to follow symlinks.
Furthermore, on
OpenBSD,
pledge(2)
@ -50,7 +47,6 @@ If a user request path is a directory,
will try to serve a
*index.gmi*
file inside that directory.
If not found, it will return an error 51 (not found) to the user.
The options are as follows:
@ -63,7 +59,8 @@ The options are as follows:
> The root directory to serve.
> **gmid**
> won't serve any file that is outside that directory, by default
> won't serve any file that is outside that directory.
> By default is
> *docs*.
**-h**
@ -97,31 +94,61 @@ with these additional variables set:
> "gmid"
`SERVER_PROTOCOL`
> "gemini"
`SERVER_PORT`
> "1965"
`PATH_INFO`
`SCRIPT_NAME`
> the request path
> The (public) path to the script.
`PATH_TRANSLATED`
`SCRIPT_EXECUTABLE`
> the full path: the concatenation of the document root and the request
> path
> The full path to the executable.
`REQUEST_URI`
> The user request (without the query parameters.)
`REQUEST_RELATIVE`
> The request relative to the script.
`QUERY_STRING`
> the query string if present in the request URL, otherwise it
> won't be set.
> The query parameters.
`REMOTE_ADDR`
`REMOTE_HOST`
> the IP address of the client in dot notation
> The remote IP address.
`DOCUMENT_ROOT`
> The root directory being served, the one provided with the
> *d*
> parameter to
> **gmid**
Let's say you have a script in
*/cgi-bin/script*
and the user request is
*/cgi-bin/script/foo/bar?quux*.
Then
`SCRIPT_NAME`
will be
*/cgi-bin/script*,
`SCRIPT_EXECUTABLE`
will be
*$DOCUMENT\_ROOT/cgi-bin/script*,
`REQUEST_URI`
will be
*/cgi-bin/script/foo/bar*,
`REQUEST_RELATIVE`
will be
*foo/bar and*
`QUERY_STRING`
will be
*quux*.
# EXAMPLES
@ -157,7 +184,7 @@ option is
*cgi-bin*
and not
*docs/cgi-bin*,
since it&#8217;s relative to the document root.
since it's relative to the document root.
# CAVEATS

61
gmid.1
View File

@ -37,10 +37,7 @@ will strip any sequence of
.Pa ../
or trailing
.Pa ..
in the requests made by clients, so it's impossible to serve content
outside the
.Pa docs
directory by mistake, and will also refuse to follow symlinks.
in the requests made by clients and will refuse to follow symlinks.
Furthermore, on
.Ox ,
.Xr pledge 2
@ -63,7 +60,6 @@ If a user request path is a directory,
will try to serve a
.Pa index.gmi
file inside that directory.
If not found, it will return an error 51 (not found) to the user.
.Pp
The options are as follows:
.Bl -tag -width 12m
@ -73,7 +69,8 @@ The certificate to use, by default is
.It Fl d Ar docs
The root directory to serve.
.Nm
won't serve any file that is outside that directory, by default
won't serve any file that is outside that directory.
By default is
.Pa docs .
.It Fl h
Print the usage and exit.
@ -93,24 +90,50 @@ executable file will execute it and fed its output to the client.
The CGI scripts will inherit the environment from
.Nm
with these additional variables set:
.Bl -tag -width 15m
.Bl -tag -width 18m
.It Ev SERVER_SOFTWARE
"gmid"
.It Ev SERVER_PROTOCOL
"gemini"
.It Ev SERVER_PORT
"1965"
.It Ev PATH_INFO
the request path
.It Ev PATH_TRANSLATED
the full path: the concatenation of the document root and the request
path
.It Ev SCRIPT_NAME
The (public) path to the script.
.It Ev SCRIPT_EXECUTABLE
The full path to the executable.
.It Ev REQUEST_URI
The user request (without the query parameters.)
.It Ev REQUEST_RELATIVE
The request relative to the script.
.It Ev QUERY_STRING
the query string if present in the request URL, otherwise it
won't be set.
.It Ev REMOTE_ADDR
the IP address of the client in dot notation
The query parameters.
.It Ev REMOTE_HOST
The remote IP address.
.It Ev DOCUMENT_ROOT
The root directory being served, the one provided with the
.Ar d
parameter to
.Nm
.El
.Pp
Let's say you have a script in
.Pa /cgi-bin/script
and the user request is
.Pa /cgi-bin/script/foo/bar?quux .
Then
.Ev SCRIPT_NAME
will be
.Pa /cgi-bin/script ,
.Ev SCRIPT_EXECUTABLE
will be
.Pa $DOCUMENT_ROOT/cgi-bin/script ,
.Ev REQUEST_URI
will be
.Pa /cgi-bin/script/foo/bar ,
.Ev REQUEST_RELATIVE
will be
.Pa foo/bar and
.Ev QUERY_STRING
will be
.Ar quux .
.Sh EXAMPLES
To quickly getting started
.Bd -literal -offset indent
@ -146,7 +169,7 @@ option is
.Pa cgi-bin
and not
.Pa docs/cgi-bin ,
since its relative to the document root.
since it's relative to the document root.
.Sh CAVEATS
.Bl -bullet
.It

216
gmid.c
View File

@ -77,6 +77,13 @@ struct client {
struct in_addr addr;
};
enum {
FILE_EXISTS,
FILE_EXECUTABLE,
FILE_DIRECTORY,
FILE_MISSING,
};
struct etm { /* file extension to mime */
const char *mime;
const char *ext;
@ -118,14 +125,15 @@ char *url_after_proto(char*);
char *url_start_of_request(char*);
int url_trim(struct client*, char*);
char *adjust_path(char*);
int path_isdir(char*);
ssize_t filesize(int);
int start_reply(struct pollfd*, struct client*, int, const char*);
const char *path_ext(const char*);
const char *mime(const char*);
int check_path(const char*, int*);
int check_for_cgi(char *, char*, struct pollfd*, struct client*);
int open_file(char*, char*, struct pollfd*, struct client*);
void start_cgi(const char*, const char*, struct pollfd*, struct client*);
int start_cgi(const char*, const char*, const char*, struct pollfd*, struct client*);
void cgi_setpoll_on_child(struct pollfd*, struct client*);
void cgi_setpoll_on_client(struct pollfd*, struct client*);
void handle_cgi(struct pollfd*, struct client*);
@ -244,14 +252,6 @@ adjust_path(char *path)
}
}
int
path_isdir(char *path)
{
if (*path == '\0')
return 1;
return path[strlen(path)-1] == '/';
}
int
start_reply(struct pollfd *pfd, struct client *client, int code, const char *reason)
{
@ -324,12 +324,78 @@ mime(const char *path)
}
int
open_file(char *path, char *query, struct pollfd *fds, struct client *c)
check_path(const char *path, int *fd)
{
char fpath[PATHBUF];
struct stat sb;
assert(path != NULL);
if ((*fd = openat(dirfd, path,
O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
return FILE_MISSING;
}
if (fstat(*fd, &sb) == -1) {
dprintf(logfd, "failed stat for %s\n", path);
return FILE_MISSING;
}
if (S_ISDIR(sb.st_mode))
return FILE_DIRECTORY;
if (sb.st_mode & S_IXUSR)
return FILE_EXECUTABLE;
return FILE_EXISTS;
}
/*
* the inverse of this algorithm, i.e. starting from the start of the
* path + strlen(cgi), and checking if each component, should be
* faster. But it's tedious to write. This does the opposite: starts
* from the end and strip one component at a time, until either an
* executable is found or we emptied the path.
*/
int
check_for_cgi(char *path, char *query, struct pollfd *fds, struct client *c)
{
char *end;
end = strchr(path, '\0');
/* NB: assume CGI is enabled and path matches cgi */
while (end > path) {
/* go up one level. UNIX paths are simple and POSIX
* dirname, with its ambiguities on if the given path
* is changed or not, gives me headaches. */
while (*end != '/')
end--;
*end = '\0';
switch (check_path(path, &c->fd)) {
case FILE_EXECUTABLE:
return start_cgi(path, end+1, query, fds,c);
case FILE_MISSING:
break;
default:
goto err;
}
*end = '/';
end--;
}
err:
if (!start_reply(fds, c, NOT_FOUND, "not found"))
return 0;
goodbye(fds, c);
return 0;
}
int
open_file(char *path, char *query, struct pollfd *fds, struct client *c)
{
char fpath[PATHBUF];
bzero(fpath, sizeof(fpath));
@ -337,60 +403,58 @@ open_file(char *path, char *query, struct pollfd *fds, struct client *c)
fpath[0] = '.';
strlcat(fpath, path, PATHBUF);
if ((c->fd = openat(dirfd, fpath,
O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
LOG(c, "open failed: %s", fpath);
if (!start_reply(fds, c, NOT_FOUND, "not found"))
return 0;
goodbye(fds, c);
return 0;
}
switch (check_path(fpath, &c->fd)) {
case FILE_EXECUTABLE:
/* +2 to skip the ./ */
if (cgi != NULL && starts_with(fpath+2, cgi))
return start_cgi(fpath, "", query, fds, c);
if (fstat(c->fd, &sb) == -1) {
LOG(c, "fstat failed for %s", fpath);
if (!start_reply(fds, c, TEMP_FAILURE, "internal server error"))
return 0;
goodbye(fds, c);
return 0;
}
/* fallthrough */
if (S_ISDIR(sb.st_mode)) {
case FILE_EXISTS:
if ((c->len = filesize(c->fd)) == -1) {
LOG(c, "failed to get file size for %s", fpath);
goodbye(fds, c);
return 0;
}
if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE,
c->fd, 0)) == MAP_FAILED) {
warn("mmap: %s", fpath);
goodbye(fds, c);
return 0;
}
c->i = c->buf;
return start_reply(fds, c, SUCCESS, mime(fpath));
case FILE_DIRECTORY:
LOG(c, "%s is a directory, trying %s/index.gmi", fpath, fpath);
close(c->fd);
c->fd = -1;
send_dir(fpath, fds, c);
return 0;
}
/* +2 to skip the ./ */
if ((sb.st_mode & S_IXUSR) && cgi != NULL && starts_with(fpath+2, cgi)) {
start_cgi(fpath, query, fds, c);
return 0;
}
case FILE_MISSING:
if (cgi != NULL && starts_with(fpath+2, cgi))
return check_for_cgi(fpath, query, fds, c);
if ((c->len = filesize(c->fd)) == -1) {
LOG(c, "failed to get file size for %s", fpath);
if (!start_reply(fds, c, NOT_FOUND, "not found"))
return 0;
goodbye(fds, c);
return 0;
}
if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE,
c->fd, 0)) == MAP_FAILED) {
warn("mmap: %s", fpath);
goodbye(fds, c);
return 0;
default:
/* unreachable */
abort();
}
c->i = c->buf;
return start_reply(fds, c, SUCCESS, mime(fpath));
}
void
start_cgi(const char *path, const char *query,
int
start_cgi(const char *spath, const char *relpath, const char *query,
struct pollfd *fds, struct client *c)
{
pid_t pid;
int p[2];
int p[2]; /* read end, write end */
if (pipe(p) == -1)
goto err;
@ -399,65 +463,68 @@ start_cgi(const char *path, const char *query,
case -1:
goto err;
case 0: { /* child */
char *expath;
case 0: { /* child */
char *ex, *requri;
char addr[INET_ADDRSTRLEN];
char *argv[] = { NULL, NULL, NULL };
/* skip the initial ./ */
path += 2;
spath++;
close(p[0]); /* close the read end */
close(p[0]);
if (dup2(p[1], 1) == -1)
goto childerr;
if (inet_ntop(c->af, &c->addr, addr, sizeof(addr)) == NULL)
goto childerr;
/* skip the ./ at the start of path*/
if (asprintf(&expath, "%s%s", dir, path) == -1)
goto childerr;
argv[0] = argv[1] = expath;
if (asprintf(&ex, "%s%s", dir, spath+1) == -1)
goto childerr;
if (asprintf(&requri, "%s%s%s", spath,
*relpath == '\0' ? "" : "/",
relpath) == -1)
goto childerr;
argv[0] = argv[1] = ex;
/* fix the env */
setenv("SERVER_SOFTWARE", "gmid", 1);
/* setenv("SERVER_NAME", "", 1); */
/* setenv("GATEWAY_INTERFACE", "CGI/version", 1); */
setenv("SERVER_PROTOCOL", "gemini", 1);
setenv("SERVER_PORT", "1965", 1);
setenv("PATH_INFO", path, 1);
setenv("PATH_TRANSLATED", expath, 1);
/* setenv("SERVER_NAME", "", 1); */
setenv("SCRIPT_NAME", spath, 1);
setenv("SCRIPT_EXECUTABLE", ex, 1);
setenv("REQUEST_URI", requri, 1);
setenv("REQUEST_RELATIVE", relpath, 1);
if (query != NULL)
setenv("QUERY_STRING", query, 1);
setenv("REMOTE_ADDR", addr, 1);
setenv("REMOTE_HOST", addr, 1);
setenv("DOCUMENT_ROOT", dir, 1);
execvp(expath, argv);
execvp(ex, argv);
goto childerr;
}
default: /* parent */
close(p[1]); /* close the write end */
close(p[1]);
close(c->fd);
c->fd = p[0];
c->child = pid;
mark_nonblock(c->fd);
c->state = S_SENDING;
handle_cgi(fds, c);
return;
return 0;
}
err:
if (!start_reply(fds, c, TEMP_FAILURE, "internal server error"))
return;
return 0;
goodbye(fds, c);
return;
return 0;
childerr:
dprintf(p[1], "%d internal server error\r\n", TEMP_FAILURE);
close(p[1]);
/* don't call atexit stuff */
_exit(1);
_exit(1);
}
void
@ -645,10 +712,7 @@ handle(struct pollfd *fds, struct client *client)
query ? "?" : "",
query ? query : "");
if (path_isdir(path))
send_dir(path, fds, client);
else
send_file(path, query, fds, client);
send_file(path, query, fds, client);
break;
case S_INITIALIZING: