drop the dependency on lex by implementing yylex by ourselves

The actual implementation is based off doas' parse.y.  This gave us
various benefits, like cleaner code, \ to break long lines, better
handling of quotes etc...
This commit is contained in:
Omar Polo 2021-06-16 14:43:16 +00:00
parent 984c46a82e
commit 74f0778b9a
6 changed files with 203 additions and 33 deletions

View File

@ -1,3 +1,10 @@
2021-06-16 Omar Polo <op@omarpolo.com>
* parse.y (yylex): drop the dependency on lex by implementing
yylex by ourselves (the actual implementation is based off doas'
parse.y). This gave us various benefits, like cleaner code, \ to
break long lines, better handling of quotes etc...
2021-06-11 Omar Polo <op@omarpolo.com>
* parse.y (servopt): add `param' keyword

View File

@ -7,15 +7,12 @@ Makefile.local: configure
include Makefile.local
lex.yy.c: lex.l y.tab.c
${LEX} lex.l
y.tab.c: parse.y
${YACC} -b y -d parse.y
${YACC} -b y parse.y
SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c puny.c \
utils.c log.c dirs.c fcgi.c
OBJS = ${SRCS:.c=.o} lex.yy.o y.tab.o ${COMPAT}
OBJS = ${SRCS:.c=.o} y.tab.o ${COMPAT}
gmid: ${OBJS}
${CC} ${OBJS} -o gmid ${LDFLAGS}

View File

@ -92,8 +92,8 @@ server "example.com" {
## Building
gmid depends on a POSIX libc, libevent2, OpenSSL/LibreSSL and libtls
(provided either by LibreSSL or libretls). At build time, flex and
yacc (or GNU bison) are also needed.
(provided either by LibreSSL or libretls). At build time, yacc (or
GNU bison) is also needed.
The build is as simple as

14
configure vendored
View File

@ -40,7 +40,6 @@ CFLAGS="${CFLAGS} -g -W -Wall -Wextra -Wmissing-prototypes -Wstrict-prototypes"
CFLAGS="${CFLAGS} -Wwrite-strings -Wno-unused-parameter"
LDFLAGS="-ltls -levent"
LD_IMSG=
LEX=lex
STATIC=
YACC=yacc
@ -74,17 +73,6 @@ if which pkg-config 2>/dev/null 1>&2; then
esac
fi
# auto detect lex/flex
which ${LEX} 2>/dev/null 1>&2 || {
echo "${LEX} not found: trying flex" 1>&2
echo "${LEX} not found: trying flex" 1>&3
LEX=flex
which ${LEX} 2>/dev/null 1>&2 || {
echo "${LEX} not found: giving up" 1>&2
echo "${LEX} not found: giving up" 1>&3
}
}
# auto detect yacc/bison
which ${YACC} 2>/dev/null 1>&2 || {
echo "${YACC} not found: trying bison" 1>&2
@ -112,7 +100,6 @@ for keyvals in "$@"; do
CFLAGS) CFLAGS="$val" ;;
DESTDIR) DESTDIR="$val" ;;
LDFLAGS) LDFLAGS="$val" ;;
LEX) LEX="$lex" ;;
PREFIX) PREFIX="$val" ;;
YACC) YACC="$val" ;;
*)
@ -398,7 +385,6 @@ CC = ${CC}
CFLAGS = ${CFLAGS}
LDFLAGS = ${LDFLAGS} ${LD_IMSG}
YACC = ${YACC}
LEX = ${LEX}
STATIC = ${STATIC}
PREFIX = ${PREFIX}
BINDIR = ${BINDIR}

6
gmid.h
View File

@ -310,12 +310,6 @@ void init_config(void);
void free_config(void);
void drop_priv(void);
/* provided by lex/yacc */
extern FILE *yyin;
extern int yylineno;
extern int yyparse(void);
extern int yylex(void);
void yyerror(const char*, ...);
int parse_portno(const char*);
void parse_conf(const char*);

198
parse.y
View File

@ -1,4 +1,3 @@
/* -*- mode: fundamental; indent-tabs-mode: t; -*- */
%{
/*
@ -17,13 +16,17 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <ctype.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "gmid.h"
FILE *yyfp;
/*
* #define YYDEBUG 1
* int yydebug = 1;
@ -32,12 +35,14 @@
struct vhost *host;
struct location *loc;
int goterror = 0;
static int goterror;
static int lineno, colno;
static struct vhost *new_vhost(void);
static struct location *new_location(void);
void yyerror(const char*, ...);
static int yylex(void);
int parse_portno(const char*);
void parse_conf(const char*);
char *ensure_absolute_path(char*);
@ -109,7 +114,7 @@ vhost : TSERVER TSTRING {
if (strstr($2, "xn--") != NULL) {
warnx("%s:%d \"%s\" looks like punycode: "
"you should use the decoded hostname.",
config_path, yylineno, $2);
config_path, lineno, $2);
}
} '{' servopts locations '}' {
@ -278,12 +283,193 @@ yyerror(const char *msg, ...)
goterror = 1;
va_start(ap, msg);
fprintf(stderr, "%s:%d: ", config_path, yylineno);
fprintf(stderr, "%s:%d: ", config_path, lineno);
vfprintf(stderr, msg, ap);
fprintf(stderr, "\n");
va_end(ap);
}
static struct keyword {
const char *word;
int token;
} keywords[] = {
{"alias", TALIAS},
{"auto", TAUTO},
{"block", TBLOCK},
{"ca", TCA},
{"cert", TCERT},
{"cgi", TCGI},
{"chroot", TCHROOT},
{"client", TCLIENT},
{"default", TDEFAULT},
{"entrypoint", TENTRYPOINT},
{"env", TENV},
{"fastcgi", TFASTCGI},
{"index", TINDEX},
{"ipv6", TIPV6},
{"key", TKEY},
{"lang", TLANG},
{"location", TLOCATION},
{"log", TLOG},
{"mime", TMIME},
{"param", TPARAM},
{"port", TPORT},
{"prefork", TPREFORK},
{"protocols", TPROTOCOLS},
{"require", TREQUIRE},
{"return", TRETURN},
{"root", TROOT},
{"server", TSERVER},
{"spawn", TSPAWN},
{"strip", TSTRIP},
{"tcp", TTCP},
{"type", TTYPE},
{"user", TUSER},
};
/*
* Taken an adapted from doas' parse.y
*/
static int
yylex(void)
{
char buf[1024], *ebuf, *p, *str;
int c, quotes = 0, escape = 0, qpos = -1, nonkw = 0;
size_t i;
p = buf;
ebuf = buf + sizeof(buf);
repeat:
/* skip whitespace first */
for (c = getc(yyfp); isspace(c); c = getc(yyfp)) {
colno++;
if (c == '\n') {
lineno++;
colno = 0;
}
}
/* check for special one-character constructions */
switch (c) {
case '{':
case '}':
return c;
case '#':
/* skip comments; NUL is allowed; no continuation */
while ((c = getc(yyfp)) != '\n')
if (c == EOF)
goto eof;
colno = 0;
lineno++;
goto repeat;
case EOF:
goto eof;
}
/* parsing next word */
for (;; c = getc(yyfp), colno++) {
switch (c) {
case '\0':
yyerror("unallowed character NULL in column %d",
colno+1);
escape = 0;
continue;
case '\\':
escape = !escape;
if (escape)
continue;
break;
case '\n':
if (quotes)
yyerror("unterminated quotes in column %d",
colno+1);
if (escape) {
nonkw = 1;
escape = 0;
colno = 0;
lineno++;
}
goto eow;
case EOF:
if (escape)
yyerror("unterminated escape in column %d",
colno);
if (quotes)
yyerror("unterminated quotes in column %d",
qpos+1);
goto eow;
case '{':
case '}':
case '#':
case ' ':
case '\t':
if (!escape && !quotes)
goto eow;
break;
case '"':
if (!escape) {
quotes = !quotes;
if (quotes) {
nonkw = 1;
qpos = colno;
}
continue;
}
}
*p++ = c;
if (p == ebuf) {
yyerror("line too long");
p = buf;
}
escape = 0;
}
eow:
*p = 0;
if (c != EOF)
ungetc(c, yyfp);
if (p == buf) {
/*
* There could be a number of reason for empty buffer,
* and we handle all of them here, to avoid cluttering
* the main loop.
*/
if (c == EOF)
goto eof;
else if (qpos == -1) /* accept, e.g., empty args: cmd foo args "" */
goto repeat;
}
if (!nonkw) {
for (i = 0; i < sizeof(keywords) / sizeof(keywords[0]); ++i) {
if (!strcmp(buf, keywords[i].word))
return keywords[i].token;
}
}
c = *buf;
if (!nonkw && (c == '-' || isdigit(c))) {
yylval.num = parse_portno(buf);
return TNUM;
}
if (!nonkw && !strcmp(buf, "on")) {
yylval.num = 1;
return TBOOL;
}
if (!nonkw && !strcmp(buf, "off")) {
yylval.num = 0;
return TBOOL;
}
if ((str = strdup(buf)) == NULL)
err(1, "%s", __func__);
yylval.str = str;
return TSTRING;
eof:
if (ferror(yyfp))
yyerror("input error reading config");
return 0;
}
int
parse_portno(const char *p)
{
@ -300,10 +486,10 @@ void
parse_conf(const char *path)
{
config_path = path;
if ((yyin = fopen(path, "r")) == NULL)
if ((yyfp = fopen(path, "r")) == NULL)
err(1, "cannot open config: %s", path);
yyparse();
fclose(yyin);
fclose(yyfp);
if (goterror)
exit(1);