mirror of https://github.com/omar-polo/gmid.git
[cgi] added support for path parameters
enhance the CGI scripting support so that script can take path parameters. That is, a script at /cgi/foo is called when the request path is /cgi/foo/bar/... This commit also introduce some backward incompatible changes as the default env variables set for the CGI script changed.
This commit is contained in:
parent
92e66347ed
commit
a5d310bc0d
|
@ -1,3 +1,8 @@
|
|||
2020-11-10 Omar Polo <op@omarpolo.com>
|
||||
|
||||
* gmid.c (open_file): added support for path parameters for CGI
|
||||
scripts
|
||||
|
||||
2020-11-06 Omar Polo <op@omarpolo.com>
|
||||
|
||||
* gmid.1: great improvements to the documentation
|
||||
|
|
67
README.md
67
README.md
|
@ -24,10 +24,7 @@ will strip any sequence of
|
|||
*../*
|
||||
or trailing
|
||||
*..*
|
||||
in the requests made by clients, so it's impossible to serve content
|
||||
outside the
|
||||
*docs*
|
||||
directory by mistake, and will also refuse to follow symlinks.
|
||||
in the requests made by clients and will refuse to follow symlinks.
|
||||
Furthermore, on
|
||||
OpenBSD,
|
||||
pledge(2)
|
||||
|
@ -50,7 +47,6 @@ If a user request path is a directory,
|
|||
will try to serve a
|
||||
*index.gmi*
|
||||
file inside that directory.
|
||||
If not found, it will return an error 51 (not found) to the user.
|
||||
|
||||
The options are as follows:
|
||||
|
||||
|
@ -63,7 +59,8 @@ The options are as follows:
|
|||
|
||||
> The root directory to serve.
|
||||
> **gmid**
|
||||
> won't serve any file that is outside that directory, by default
|
||||
> won't serve any file that is outside that directory.
|
||||
> By default is
|
||||
> *docs*.
|
||||
|
||||
**-h**
|
||||
|
@ -97,31 +94,61 @@ with these additional variables set:
|
|||
|
||||
> "gmid"
|
||||
|
||||
`SERVER_PROTOCOL`
|
||||
|
||||
> "gemini"
|
||||
|
||||
`SERVER_PORT`
|
||||
|
||||
> "1965"
|
||||
|
||||
`PATH_INFO`
|
||||
`SCRIPT_NAME`
|
||||
|
||||
> the request path
|
||||
> The (public) path to the script.
|
||||
|
||||
`PATH_TRANSLATED`
|
||||
`SCRIPT_EXECUTABLE`
|
||||
|
||||
> the full path: the concatenation of the document root and the request
|
||||
> path
|
||||
> The full path to the executable.
|
||||
|
||||
`REQUEST_URI`
|
||||
|
||||
> The user request (without the query parameters.)
|
||||
|
||||
`REQUEST_RELATIVE`
|
||||
|
||||
> The request relative to the script.
|
||||
|
||||
`QUERY_STRING`
|
||||
|
||||
> the query string if present in the request URL, otherwise it
|
||||
> won't be set.
|
||||
> The query parameters.
|
||||
|
||||
`REMOTE_ADDR`
|
||||
`REMOTE_HOST`
|
||||
|
||||
> the IP address of the client in dot notation
|
||||
> The remote IP address.
|
||||
|
||||
`DOCUMENT_ROOT`
|
||||
|
||||
> The root directory being served, the one provided with the
|
||||
> *d*
|
||||
> parameter to
|
||||
> **gmid**
|
||||
|
||||
Let's say you have a script in
|
||||
*/cgi-bin/script*
|
||||
and the user request is
|
||||
*/cgi-bin/script/foo/bar?quux*.
|
||||
Then
|
||||
`SCRIPT_NAME`
|
||||
will be
|
||||
*/cgi-bin/script*,
|
||||
`SCRIPT_EXECUTABLE`
|
||||
will be
|
||||
*$DOCUMENT\_ROOT/cgi-bin/script*,
|
||||
`REQUEST_URI`
|
||||
will be
|
||||
*/cgi-bin/script/foo/bar*,
|
||||
`REQUEST_RELATIVE`
|
||||
will be
|
||||
*foo/bar and*
|
||||
`QUERY_STRING`
|
||||
will be
|
||||
*quux*.
|
||||
|
||||
# EXAMPLES
|
||||
|
||||
|
@ -157,7 +184,7 @@ option is
|
|||
*cgi-bin*
|
||||
and not
|
||||
*docs/cgi-bin*,
|
||||
since it’s relative to the document root.
|
||||
since it's relative to the document root.
|
||||
|
||||
# CAVEATS
|
||||
|
||||
|
|
61
gmid.1
61
gmid.1
|
@ -37,10 +37,7 @@ will strip any sequence of
|
|||
.Pa ../
|
||||
or trailing
|
||||
.Pa ..
|
||||
in the requests made by clients, so it's impossible to serve content
|
||||
outside the
|
||||
.Pa docs
|
||||
directory by mistake, and will also refuse to follow symlinks.
|
||||
in the requests made by clients and will refuse to follow symlinks.
|
||||
Furthermore, on
|
||||
.Ox ,
|
||||
.Xr pledge 2
|
||||
|
@ -63,7 +60,6 @@ If a user request path is a directory,
|
|||
will try to serve a
|
||||
.Pa index.gmi
|
||||
file inside that directory.
|
||||
If not found, it will return an error 51 (not found) to the user.
|
||||
.Pp
|
||||
The options are as follows:
|
||||
.Bl -tag -width 12m
|
||||
|
@ -73,7 +69,8 @@ The certificate to use, by default is
|
|||
.It Fl d Ar docs
|
||||
The root directory to serve.
|
||||
.Nm
|
||||
won't serve any file that is outside that directory, by default
|
||||
won't serve any file that is outside that directory.
|
||||
By default is
|
||||
.Pa docs .
|
||||
.It Fl h
|
||||
Print the usage and exit.
|
||||
|
@ -93,24 +90,50 @@ executable file will execute it and fed its output to the client.
|
|||
The CGI scripts will inherit the environment from
|
||||
.Nm
|
||||
with these additional variables set:
|
||||
.Bl -tag -width 15m
|
||||
.Bl -tag -width 18m
|
||||
.It Ev SERVER_SOFTWARE
|
||||
"gmid"
|
||||
.It Ev SERVER_PROTOCOL
|
||||
"gemini"
|
||||
.It Ev SERVER_PORT
|
||||
"1965"
|
||||
.It Ev PATH_INFO
|
||||
the request path
|
||||
.It Ev PATH_TRANSLATED
|
||||
the full path: the concatenation of the document root and the request
|
||||
path
|
||||
.It Ev SCRIPT_NAME
|
||||
The (public) path to the script.
|
||||
.It Ev SCRIPT_EXECUTABLE
|
||||
The full path to the executable.
|
||||
.It Ev REQUEST_URI
|
||||
The user request (without the query parameters.)
|
||||
.It Ev REQUEST_RELATIVE
|
||||
The request relative to the script.
|
||||
.It Ev QUERY_STRING
|
||||
the query string if present in the request URL, otherwise it
|
||||
won't be set.
|
||||
.It Ev REMOTE_ADDR
|
||||
the IP address of the client in dot notation
|
||||
The query parameters.
|
||||
.It Ev REMOTE_HOST
|
||||
The remote IP address.
|
||||
.It Ev DOCUMENT_ROOT
|
||||
The root directory being served, the one provided with the
|
||||
.Ar d
|
||||
parameter to
|
||||
.Nm
|
||||
.El
|
||||
.Pp
|
||||
Let's say you have a script in
|
||||
.Pa /cgi-bin/script
|
||||
and the user request is
|
||||
.Pa /cgi-bin/script/foo/bar?quux .
|
||||
Then
|
||||
.Ev SCRIPT_NAME
|
||||
will be
|
||||
.Pa /cgi-bin/script ,
|
||||
.Ev SCRIPT_EXECUTABLE
|
||||
will be
|
||||
.Pa $DOCUMENT_ROOT/cgi-bin/script ,
|
||||
.Ev REQUEST_URI
|
||||
will be
|
||||
.Pa /cgi-bin/script/foo/bar ,
|
||||
.Ev REQUEST_RELATIVE
|
||||
will be
|
||||
.Pa foo/bar and
|
||||
.Ev QUERY_STRING
|
||||
will be
|
||||
.Ar quux .
|
||||
.Sh EXAMPLES
|
||||
To quickly getting started
|
||||
.Bd -literal -offset indent
|
||||
|
@ -146,7 +169,7 @@ option is
|
|||
.Pa cgi-bin
|
||||
and not
|
||||
.Pa docs/cgi-bin ,
|
||||
since it’s relative to the document root.
|
||||
since it's relative to the document root.
|
||||
.Sh CAVEATS
|
||||
.Bl -bullet
|
||||
.It
|
||||
|
|
216
gmid.c
216
gmid.c
|
@ -77,6 +77,13 @@ struct client {
|
|||
struct in_addr addr;
|
||||
};
|
||||
|
||||
enum {
|
||||
FILE_EXISTS,
|
||||
FILE_EXECUTABLE,
|
||||
FILE_DIRECTORY,
|
||||
FILE_MISSING,
|
||||
};
|
||||
|
||||
struct etm { /* file extension to mime */
|
||||
const char *mime;
|
||||
const char *ext;
|
||||
|
@ -118,14 +125,15 @@ char *url_after_proto(char*);
|
|||
char *url_start_of_request(char*);
|
||||
int url_trim(struct client*, char*);
|
||||
char *adjust_path(char*);
|
||||
int path_isdir(char*);
|
||||
ssize_t filesize(int);
|
||||
|
||||
int start_reply(struct pollfd*, struct client*, int, const char*);
|
||||
const char *path_ext(const char*);
|
||||
const char *mime(const char*);
|
||||
int check_path(const char*, int*);
|
||||
int check_for_cgi(char *, char*, struct pollfd*, struct client*);
|
||||
int open_file(char*, char*, struct pollfd*, struct client*);
|
||||
void start_cgi(const char*, const char*, struct pollfd*, struct client*);
|
||||
int start_cgi(const char*, const char*, const char*, struct pollfd*, struct client*);
|
||||
void cgi_setpoll_on_child(struct pollfd*, struct client*);
|
||||
void cgi_setpoll_on_client(struct pollfd*, struct client*);
|
||||
void handle_cgi(struct pollfd*, struct client*);
|
||||
|
@ -244,14 +252,6 @@ adjust_path(char *path)
|
|||
}
|
||||
}
|
||||
|
||||
int
|
||||
path_isdir(char *path)
|
||||
{
|
||||
if (*path == '\0')
|
||||
return 1;
|
||||
return path[strlen(path)-1] == '/';
|
||||
}
|
||||
|
||||
int
|
||||
start_reply(struct pollfd *pfd, struct client *client, int code, const char *reason)
|
||||
{
|
||||
|
@ -324,12 +324,78 @@ mime(const char *path)
|
|||
}
|
||||
|
||||
int
|
||||
open_file(char *path, char *query, struct pollfd *fds, struct client *c)
|
||||
check_path(const char *path, int *fd)
|
||||
{
|
||||
char fpath[PATHBUF];
|
||||
struct stat sb;
|
||||
|
||||
assert(path != NULL);
|
||||
if ((*fd = openat(dirfd, path,
|
||||
O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
|
||||
return FILE_MISSING;
|
||||
}
|
||||
|
||||
if (fstat(*fd, &sb) == -1) {
|
||||
dprintf(logfd, "failed stat for %s\n", path);
|
||||
return FILE_MISSING;
|
||||
}
|
||||
|
||||
if (S_ISDIR(sb.st_mode))
|
||||
return FILE_DIRECTORY;
|
||||
|
||||
if (sb.st_mode & S_IXUSR)
|
||||
return FILE_EXECUTABLE;
|
||||
|
||||
return FILE_EXISTS;
|
||||
}
|
||||
|
||||
/*
|
||||
* the inverse of this algorithm, i.e. starting from the start of the
|
||||
* path + strlen(cgi), and checking if each component, should be
|
||||
* faster. But it's tedious to write. This does the opposite: starts
|
||||
* from the end and strip one component at a time, until either an
|
||||
* executable is found or we emptied the path.
|
||||
*/
|
||||
int
|
||||
check_for_cgi(char *path, char *query, struct pollfd *fds, struct client *c)
|
||||
{
|
||||
char *end;
|
||||
end = strchr(path, '\0');
|
||||
|
||||
/* NB: assume CGI is enabled and path matches cgi */
|
||||
|
||||
while (end > path) {
|
||||
/* go up one level. UNIX paths are simple and POSIX
|
||||
* dirname, with its ambiguities on if the given path
|
||||
* is changed or not, gives me headaches. */
|
||||
while (*end != '/')
|
||||
end--;
|
||||
*end = '\0';
|
||||
|
||||
switch (check_path(path, &c->fd)) {
|
||||
case FILE_EXECUTABLE:
|
||||
return start_cgi(path, end+1, query, fds,c);
|
||||
case FILE_MISSING:
|
||||
break;
|
||||
default:
|
||||
goto err;
|
||||
}
|
||||
|
||||
*end = '/';
|
||||
end--;
|
||||
}
|
||||
|
||||
err:
|
||||
if (!start_reply(fds, c, NOT_FOUND, "not found"))
|
||||
return 0;
|
||||
goodbye(fds, c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
open_file(char *path, char *query, struct pollfd *fds, struct client *c)
|
||||
{
|
||||
char fpath[PATHBUF];
|
||||
|
||||
bzero(fpath, sizeof(fpath));
|
||||
|
||||
|
@ -337,60 +403,58 @@ open_file(char *path, char *query, struct pollfd *fds, struct client *c)
|
|||
fpath[0] = '.';
|
||||
strlcat(fpath, path, PATHBUF);
|
||||
|
||||
if ((c->fd = openat(dirfd, fpath,
|
||||
O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) {
|
||||
LOG(c, "open failed: %s", fpath);
|
||||
if (!start_reply(fds, c, NOT_FOUND, "not found"))
|
||||
return 0;
|
||||
goodbye(fds, c);
|
||||
return 0;
|
||||
}
|
||||
switch (check_path(fpath, &c->fd)) {
|
||||
case FILE_EXECUTABLE:
|
||||
/* +2 to skip the ./ */
|
||||
if (cgi != NULL && starts_with(fpath+2, cgi))
|
||||
return start_cgi(fpath, "", query, fds, c);
|
||||
|
||||
if (fstat(c->fd, &sb) == -1) {
|
||||
LOG(c, "fstat failed for %s", fpath);
|
||||
if (!start_reply(fds, c, TEMP_FAILURE, "internal server error"))
|
||||
return 0;
|
||||
goodbye(fds, c);
|
||||
return 0;
|
||||
}
|
||||
/* fallthrough */
|
||||
|
||||
if (S_ISDIR(sb.st_mode)) {
|
||||
case FILE_EXISTS:
|
||||
if ((c->len = filesize(c->fd)) == -1) {
|
||||
LOG(c, "failed to get file size for %s", fpath);
|
||||
goodbye(fds, c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE,
|
||||
c->fd, 0)) == MAP_FAILED) {
|
||||
warn("mmap: %s", fpath);
|
||||
goodbye(fds, c);
|
||||
return 0;
|
||||
}
|
||||
c->i = c->buf;
|
||||
return start_reply(fds, c, SUCCESS, mime(fpath));
|
||||
|
||||
case FILE_DIRECTORY:
|
||||
LOG(c, "%s is a directory, trying %s/index.gmi", fpath, fpath);
|
||||
close(c->fd);
|
||||
c->fd = -1;
|
||||
send_dir(fpath, fds, c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* +2 to skip the ./ */
|
||||
if ((sb.st_mode & S_IXUSR) && cgi != NULL && starts_with(fpath+2, cgi)) {
|
||||
start_cgi(fpath, query, fds, c);
|
||||
return 0;
|
||||
}
|
||||
case FILE_MISSING:
|
||||
if (cgi != NULL && starts_with(fpath+2, cgi))
|
||||
return check_for_cgi(fpath, query, fds, c);
|
||||
|
||||
if ((c->len = filesize(c->fd)) == -1) {
|
||||
LOG(c, "failed to get file size for %s", fpath);
|
||||
if (!start_reply(fds, c, NOT_FOUND, "not found"))
|
||||
return 0;
|
||||
goodbye(fds, c);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((c->buf = mmap(NULL, c->len, PROT_READ, MAP_PRIVATE,
|
||||
c->fd, 0)) == MAP_FAILED) {
|
||||
warn("mmap: %s", fpath);
|
||||
goodbye(fds, c);
|
||||
return 0;
|
||||
default:
|
||||
/* unreachable */
|
||||
abort();
|
||||
}
|
||||
c->i = c->buf;
|
||||
|
||||
return start_reply(fds, c, SUCCESS, mime(fpath));
|
||||
}
|
||||
|
||||
void
|
||||
start_cgi(const char *path, const char *query,
|
||||
int
|
||||
start_cgi(const char *spath, const char *relpath, const char *query,
|
||||
struct pollfd *fds, struct client *c)
|
||||
{
|
||||
pid_t pid;
|
||||
int p[2];
|
||||
int p[2]; /* read end, write end */
|
||||
|
||||
if (pipe(p) == -1)
|
||||
goto err;
|
||||
|
@ -399,65 +463,68 @@ start_cgi(const char *path, const char *query,
|
|||
case -1:
|
||||
goto err;
|
||||
|
||||
case 0: { /* child */
|
||||
char *expath;
|
||||
case 0: { /* child */
|
||||
char *ex, *requri;
|
||||
char addr[INET_ADDRSTRLEN];
|
||||
char *argv[] = { NULL, NULL, NULL };
|
||||
|
||||
/* skip the initial ./ */
|
||||
path += 2;
|
||||
spath++;
|
||||
|
||||
close(p[0]); /* close the read end */
|
||||
close(p[0]);
|
||||
if (dup2(p[1], 1) == -1)
|
||||
goto childerr;
|
||||
|
||||
if (inet_ntop(c->af, &c->addr, addr, sizeof(addr)) == NULL)
|
||||
goto childerr;
|
||||
|
||||
/* skip the ./ at the start of path*/
|
||||
if (asprintf(&expath, "%s%s", dir, path) == -1)
|
||||
goto childerr;
|
||||
argv[0] = argv[1] = expath;
|
||||
|
||||
if (asprintf(&ex, "%s%s", dir, spath+1) == -1)
|
||||
goto childerr;
|
||||
|
||||
if (asprintf(&requri, "%s%s%s", spath,
|
||||
*relpath == '\0' ? "" : "/",
|
||||
relpath) == -1)
|
||||
goto childerr;
|
||||
|
||||
argv[0] = argv[1] = ex;
|
||||
|
||||
/* fix the env */
|
||||
setenv("SERVER_SOFTWARE", "gmid", 1);
|
||||
/* setenv("SERVER_NAME", "", 1); */
|
||||
/* setenv("GATEWAY_INTERFACE", "CGI/version", 1); */
|
||||
setenv("SERVER_PROTOCOL", "gemini", 1);
|
||||
setenv("SERVER_PORT", "1965", 1);
|
||||
setenv("PATH_INFO", path, 1);
|
||||
setenv("PATH_TRANSLATED", expath, 1);
|
||||
/* setenv("SERVER_NAME", "", 1); */
|
||||
setenv("SCRIPT_NAME", spath, 1);
|
||||
setenv("SCRIPT_EXECUTABLE", ex, 1);
|
||||
setenv("REQUEST_URI", requri, 1);
|
||||
setenv("REQUEST_RELATIVE", relpath, 1);
|
||||
if (query != NULL)
|
||||
setenv("QUERY_STRING", query, 1);
|
||||
setenv("REMOTE_ADDR", addr, 1);
|
||||
setenv("REMOTE_HOST", addr, 1);
|
||||
setenv("DOCUMENT_ROOT", dir, 1);
|
||||
|
||||
execvp(expath, argv);
|
||||
execvp(ex, argv);
|
||||
goto childerr;
|
||||
}
|
||||
|
||||
default: /* parent */
|
||||
close(p[1]); /* close the write end */
|
||||
close(p[1]);
|
||||
close(c->fd);
|
||||
c->fd = p[0];
|
||||
c->child = pid;
|
||||
mark_nonblock(c->fd);
|
||||
c->state = S_SENDING;
|
||||
handle_cgi(fds, c);
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
err:
|
||||
if (!start_reply(fds, c, TEMP_FAILURE, "internal server error"))
|
||||
return;
|
||||
return 0;
|
||||
goodbye(fds, c);
|
||||
return;
|
||||
return 0;
|
||||
|
||||
childerr:
|
||||
dprintf(p[1], "%d internal server error\r\n", TEMP_FAILURE);
|
||||
close(p[1]);
|
||||
|
||||
/* don't call atexit stuff */
|
||||
_exit(1);
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -645,10 +712,7 @@ handle(struct pollfd *fds, struct client *client)
|
|||
query ? "?" : "",
|
||||
query ? query : "");
|
||||
|
||||
if (path_isdir(path))
|
||||
send_dir(path, fds, client);
|
||||
else
|
||||
send_file(path, query, fds, client);
|
||||
send_file(path, query, fds, client);
|
||||
break;
|
||||
|
||||
case S_INITIALIZING:
|
||||
|
|
Loading…
Reference in New Issue