fix: Escape characters that could break URL
This commit is contained in:
parent
d6d4af0c8c
commit
a2e13b6dc3
92
src/link.c
92
src/link.c
|
@ -12,6 +12,7 @@
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <regex.h>
|
||||||
|
|
||||||
#define STATUS_LEN 64
|
#define STATUS_LEN 64
|
||||||
|
|
||||||
|
@ -28,6 +29,7 @@ int ROOT_LINK_OFFSET = 0;
|
||||||
*/
|
*/
|
||||||
static pthread_mutex_t link_lock;
|
static pthread_mutex_t link_lock;
|
||||||
static void make_link_relative(const char *page_url, char *link_url);
|
static void make_link_relative(const char *page_url, char *link_url);
|
||||||
|
static char *escape_full_url(const char *f_url);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief create a new Link
|
* \brief create a new Link
|
||||||
|
@ -273,11 +275,12 @@ static LinkTable *single_LinkTable_new(const char *url)
|
||||||
return linktbl;
|
return linktbl;
|
||||||
}
|
}
|
||||||
|
|
||||||
LinkTable *LinkSystem_init(const char *url)
|
LinkTable *LinkSystem_init(const char *f_url)
|
||||||
{
|
{
|
||||||
if (pthread_mutex_init(&link_lock, NULL)) {
|
if (pthread_mutex_init(&link_lock, NULL)) {
|
||||||
lprintf(error, "link_lock initialisation failed!\n");
|
lprintf(error, "link_lock initialisation failed!\n");
|
||||||
}
|
}
|
||||||
|
char *url = escape_full_url(f_url);
|
||||||
int url_len = strnlen(url, MAX_PATH_LEN) - 1;
|
int url_len = strnlen(url, MAX_PATH_LEN) - 1;
|
||||||
/*
|
/*
|
||||||
* --------- Set the length of the root link -----------
|
* --------- Set the length of the root link -----------
|
||||||
|
@ -317,6 +320,7 @@ LinkTable *LinkSystem_init(const char *url)
|
||||||
} else {
|
} else {
|
||||||
lprintf(fatal, "Invalid CONFIG.mode\n");
|
lprintf(fatal, "Invalid CONFIG.mode\n");
|
||||||
}
|
}
|
||||||
|
FREE(url);
|
||||||
return ROOT_LINK_TBL;
|
return ROOT_LINK_TBL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1177,3 +1181,89 @@ static void make_link_relative(const char *page_url, char *link_url)
|
||||||
before it. */
|
before it. */
|
||||||
memmove(link_url, link_url + skip_len, strlen(link_url) - skip_len + 1);
|
memmove(link_url, link_url + skip_len, strlen(link_url) - skip_len + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief Pattern matching strings for URLs entered by user.
|
||||||
|
* \details The order is important as we match from most specifiec to least.
|
||||||
|
* For example, 192.168.1.1:80 is more specific than 192.168.1.1, even though
|
||||||
|
* visiting the latter refers to the former by default.
|
||||||
|
*/
|
||||||
|
const char *const ip_patterns[] = {
|
||||||
|
"([0-9]{1,3}\\.){3}[0-9]{1,3}:[0-9]*", // IPv4 with port
|
||||||
|
"([0-9]{1,3}\\.){3}[0-9]{1,3}", // IPv4 without port
|
||||||
|
NULL,
|
||||||
|
};
|
||||||
|
|
||||||
|
static char *escape_full_url(const char *f_url)
|
||||||
|
{
|
||||||
|
char *const proto = strstr(f_url, "://");
|
||||||
|
const char *url = proto + 3;
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
int ip_in_path = 0;
|
||||||
|
regex_t regex;
|
||||||
|
regmatch_t pmatch[1];
|
||||||
|
|
||||||
|
for (int i = 0; ip_patterns[i]; i++) {
|
||||||
|
ret = regcomp(®ex, ip_patterns[i], REG_EXTENDED | REG_ICASE);
|
||||||
|
if (ret) {
|
||||||
|
lprintf(fatal, "Could not compile regex\n");
|
||||||
|
}
|
||||||
|
ret = regexec(®ex, url, 1, pmatch, 0);
|
||||||
|
if (!ret) {
|
||||||
|
ip_in_path = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ip_in_path) {
|
||||||
|
int path_offset = pmatch[0].rm_eo - pmatch[0].rm_so;
|
||||||
|
if (*(url + path_offset) == '/' ) {
|
||||||
|
url += path_offset + 1;
|
||||||
|
} else {
|
||||||
|
url += path_offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CURL *c = curl_easy_init();
|
||||||
|
char *next;
|
||||||
|
char *unescaped_path = curl_easy_unescape(c, url, 0, NULL);
|
||||||
|
char *escaped_path = curl_easy_escape(c, unescaped_path, 0);
|
||||||
|
curl_free(unescaped_path);
|
||||||
|
|
||||||
|
char *const base_url = CALLOC(MAX_PATH_LEN, sizeof(char));
|
||||||
|
next = mempcpy(base_url, f_url, url - f_url);
|
||||||
|
int len = strnlen(escaped_path, MAX_PATH_LEN);
|
||||||
|
if (strnlen(next, MAX_PATH_LEN - (url - f_url)) + len >= MAX_PATH_LEN - 1) {
|
||||||
|
lprintf(fatal, "URL too long\n");
|
||||||
|
}
|
||||||
|
next = mempcpy(next, escaped_path, len);
|
||||||
|
next -= len;
|
||||||
|
|
||||||
|
/* At this point, next should point to the part just after the IP address
|
||||||
|
* or just after the protocol, depending on whether a user entered a URL
|
||||||
|
* with a domain name or IP address.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* curl_easy_escape does the correct thing and escapes whatever may break
|
||||||
|
* the URL, but we must always preserve the slash in the URL since we make
|
||||||
|
* decisions elsewhere based on the path (slash).
|
||||||
|
*/
|
||||||
|
const char *e_slash;
|
||||||
|
const char *e_p;
|
||||||
|
char *b_p;
|
||||||
|
e_slash = strstr(escaped_path, "%2F");
|
||||||
|
for(e_p=escaped_path, b_p=next; (b_p - next < len) || e_slash; ) {
|
||||||
|
*b_p++ = *e_p++;
|
||||||
|
if (e_p == e_slash) {
|
||||||
|
*b_p++ = '/';
|
||||||
|
e_p += 3;
|
||||||
|
e_slash = strstr(e_p, "%2F");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
curl_free(escaped_path);
|
||||||
|
curl_easy_cleanup(c);
|
||||||
|
regfree(®ex);
|
||||||
|
return base_url;
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue