Do the right thing with sites that require the final slash

Some web sites will return 404 if you fetch a directory without the
final slash. For example, https://archive.mozilla.org/pub/ works,
https://archive.mozilla.org/pub does not. We need to do two things to
accommodate this:

* When processing the root URL of the filesystem, instead of stripping
  off the final slash, just set the offset to ignore it.
* In the link structure, store the actual URL tail of the link
  separately from its name, final slash and all if there is one, and
  append that instead of the name when constructing the URL for curl.
This commit is contained in:
Jonathan Kamens 2023-09-03 13:56:02 -04:00 committed by Fufu Fang
parent 1e80844831
commit 41cb4b80bc
2 changed files with 6 additions and 11 deletions

View File

@ -36,6 +36,7 @@ static Link *Link_new(const char *linkname, LinkType type)
Link *link = CALLOC(1, sizeof(Link));
strncpy(link->linkname, linkname, MAX_FILENAME_LEN);
strncpy(link->linkpath, linkname, MAX_FILENAME_LEN);
link->type = type;
/*
@ -269,26 +270,20 @@ static LinkTable *single_LinkTable_new(const char *url)
return linktbl;
}
LinkTable *LinkSystem_init(const char *raw_url)
LinkTable *LinkSystem_init(const char *url)
{
if (pthread_mutex_init(&link_lock, NULL)) {
lprintf(error, "link_lock initialisation failed!\n");
}
/*
* Remove excess '/' if it is there
*/
char *url = strdup(raw_url);
int url_len = strnlen(url, MAX_PATH_LEN) - 1;
if (url[url_len] == '/') {
url[url_len] = '\0';
}
/*
* --------- Set the length of the root link -----------
*/
/*
* This is where the '/' should be
*/
ROOT_LINK_OFFSET = strnlen(url, MAX_PATH_LEN);
ROOT_LINK_OFFSET = strnlen(url, MAX_PATH_LEN) -
((url[url_len] == '/') ? 1 : 0);
/*
* --------------------- Enable cache system --------------------
@ -319,7 +314,6 @@ LinkTable *LinkSystem_init(const char *raw_url)
} else {
lprintf(fatal, "Invalid CONFIG.mode\n");
}
FREE(url);
return ROOT_LINK_TBL;
}
@ -469,7 +463,7 @@ static void LinkTable_fill(LinkTable *linktbl)
for (int i = 1; i < linktbl->num; i++) {
Link *this_link = linktbl->links[i];
char *url;
url = path_append(head_link->f_url, this_link->linkname);
url = path_append(head_link->f_url, this_link->linkpath);
strncpy(this_link->f_url, url, MAX_PATH_LEN);
FREE(url);
char *unescaped_linkname;

View File

@ -43,6 +43,7 @@ struct LinkTable {
struct Link {
/** \brief The link name in the last level of the URL */
char linkname[MAX_FILENAME_LEN + 1];
char linkpath[MAX_FILENAME_LEN + 1];
/** \brief The full URL of the file */
char f_url[MAX_PATH_LEN + 1];
/** \brief The type of the link */