2018-07-26 11:29:44 +02:00
|
|
|
#include "link.h"
|
|
|
|
|
2019-04-22 13:06:34 +02:00
|
|
|
#include "cache.h"
|
2018-07-26 11:29:44 +02:00
|
|
|
#include "network.h"
|
|
|
|
|
|
|
|
#include <gumbo.h>
|
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <errno.h>
|
2019-04-24 00:48:08 +02:00
|
|
|
#include <stdlib.h>
|
2018-07-26 11:29:44 +02:00
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
|
|
/* ---------------- External variables -----------------------*/
|
2018-07-30 15:20:04 +02:00
|
|
|
LinkTable *ROOT_LINK_TBL = NULL;
|
2019-04-22 16:26:25 +02:00
|
|
|
int ROOT_LINK_OFFSET = 0;
|
2018-07-26 11:29:44 +02:00
|
|
|
|
2019-09-01 01:43:50 +02:00
|
|
|
/* ----------------- Static variables ----------------------- */
|
|
|
|
|
2019-08-31 22:21:28 +02:00
|
|
|
/**
|
|
|
|
* \brief LinkTable generation priority lock
|
|
|
|
* \details This allows LinkTable generation to be run exclusively. This
|
|
|
|
* effectively gives LinkTable generation priority over file transfer.
|
|
|
|
*/
|
|
|
|
static pthread_mutex_t link_lock;
|
|
|
|
|
|
|
|
void link_system_init()
|
|
|
|
{
|
|
|
|
if (pthread_mutex_init(&link_lock, NULL) != 0) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"link_system_init(): link_lock initialisation failed!\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-22 14:32:15 +02:00
|
|
|
static void LinkTable_add(LinkTable *linktbl, Link *link)
|
|
|
|
{
|
|
|
|
linktbl->num++;
|
|
|
|
linktbl->links = realloc(linktbl->links, linktbl->num * sizeof(Link *));
|
|
|
|
if (!linktbl->links) {
|
|
|
|
fprintf(stderr, "LinkTable_add(): realloc failure!\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
linktbl->links[linktbl->num - 1] = link;
|
|
|
|
}
|
|
|
|
|
|
|
|
static Link *Link_new(const char *linkname, LinkType type)
|
|
|
|
{
|
|
|
|
Link *link = calloc(1, sizeof(Link));
|
|
|
|
if (!link) {
|
|
|
|
fprintf(stderr, "Link_new(): calloc failure!\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
2019-04-26 08:39:45 +02:00
|
|
|
strncpy(link->linkname, linkname, MAX_FILENAME_LEN);
|
2019-04-22 14:32:15 +02:00
|
|
|
link->type = type;
|
|
|
|
|
|
|
|
/* remove the '/' from linkname if it exists */
|
2019-04-26 08:39:45 +02:00
|
|
|
char *c = &(link->linkname[strnlen(link->linkname, MAX_FILENAME_LEN) - 1]);
|
2019-04-22 14:32:15 +02:00
|
|
|
if ( *c == '/') {
|
|
|
|
*c = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
return link;
|
|
|
|
}
|
|
|
|
|
|
|
|
static LinkType linkname_type(const char *linkname)
|
|
|
|
{
|
|
|
|
/* The link name has to start with alphanumerical character */
|
|
|
|
if (!isalnum(linkname[0])) {
|
|
|
|
return LINK_INVALID;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* check for http:// and https:// */
|
|
|
|
if ( !strncmp(linkname, "http://", 7) || !strncmp(linkname, "https://", 8) ) {
|
|
|
|
return LINK_INVALID;
|
|
|
|
}
|
|
|
|
|
2019-04-26 08:39:45 +02:00
|
|
|
if ( linkname[strnlen(linkname, MAX_FILENAME_LEN) - 1] == '/' ) {
|
2019-04-22 14:32:15 +02:00
|
|
|
return LINK_DIR;
|
|
|
|
}
|
|
|
|
|
|
|
|
return LINK_FILE;
|
|
|
|
}
|
2018-07-26 11:29:44 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Shamelessly copied and pasted from:
|
|
|
|
* https://github.com/google/gumbo-parser/blob/master/examples/find_links.cc
|
|
|
|
*/
|
|
|
|
static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl)
|
|
|
|
{
|
|
|
|
if (node->type != GUMBO_NODE_ELEMENT) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
GumboAttribute* href;
|
|
|
|
if (node->v.element.tag == GUMBO_TAG_A &&
|
|
|
|
(href = gumbo_get_attribute(&node->v.element.attributes, "href"))) {
|
|
|
|
/* if it is valid, copy the link onto the heap */
|
2019-04-22 14:32:15 +02:00
|
|
|
LinkType type = linkname_type(href->value);
|
2019-04-26 19:45:38 +02:00
|
|
|
if ( (type == LINK_DIR) || (type == LINK_FILE) ) {
|
2019-04-22 16:26:25 +02:00
|
|
|
LinkTable_add(linktbl, Link_new(href->value, type));
|
2018-07-26 11:29:44 +02:00
|
|
|
}
|
2019-04-22 16:26:25 +02:00
|
|
|
}
|
|
|
|
/* Note the recursive call, lol. */
|
|
|
|
GumboVector *children = &node->v.element.children;
|
|
|
|
for (size_t i = 0; i < children->length; ++i) {
|
|
|
|
HTML_to_LinkTable((GumboNode *)children->data[i], linktbl);
|
|
|
|
}
|
|
|
|
return;
|
2018-07-26 11:29:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static CURL *Link_to_curl(Link *link)
|
|
|
|
{
|
|
|
|
CURL *curl = curl_easy_init();
|
|
|
|
if (!curl) {
|
|
|
|
fprintf(stderr, "Link_to_curl(): curl_easy_init() failed!\n");
|
|
|
|
}
|
|
|
|
/* set up some basic curl stuff */
|
2019-04-26 12:39:03 +02:00
|
|
|
curl_easy_setopt(curl, CURLOPT_USERAGENT, NETWORK_CONFIG.user_agent);
|
2018-07-26 11:29:44 +02:00
|
|
|
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
|
|
|
|
/* for following directories without the '/' */
|
|
|
|
curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 2);
|
|
|
|
curl_easy_setopt(curl, CURLOPT_URL, link->f_url);
|
|
|
|
curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1);
|
|
|
|
curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 15);
|
2018-07-30 15:20:04 +02:00
|
|
|
curl_easy_setopt(curl, CURLOPT_SHARE, CURL_SHARE);
|
|
|
|
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_memory_callback);
|
2019-07-20 20:23:24 +02:00
|
|
|
|
2019-04-12 13:53:58 +02:00
|
|
|
// curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L);
|
|
|
|
|
2018-07-30 15:20:04 +02:00
|
|
|
if (NETWORK_CONFIG.username) {
|
|
|
|
curl_easy_setopt(curl, CURLOPT_USERNAME, NETWORK_CONFIG.username);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NETWORK_CONFIG.password) {
|
|
|
|
curl_easy_setopt(curl, CURLOPT_PASSWORD, NETWORK_CONFIG.password);
|
|
|
|
}
|
2018-07-26 11:29:44 +02:00
|
|
|
|
2018-07-30 15:55:38 +02:00
|
|
|
if (NETWORK_CONFIG.proxy) {
|
|
|
|
curl_easy_setopt(curl, CURLOPT_PROXY, NETWORK_CONFIG.proxy);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NETWORK_CONFIG.proxy_user) {
|
|
|
|
curl_easy_setopt(curl, CURLOPT_PROXYUSERNAME,
|
|
|
|
NETWORK_CONFIG.proxy_user);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NETWORK_CONFIG.proxy_pass) {
|
|
|
|
curl_easy_setopt(curl, CURLOPT_PROXYPASSWORD,
|
|
|
|
NETWORK_CONFIG.proxy_pass);
|
|
|
|
}
|
|
|
|
|
2018-07-26 11:29:44 +02:00
|
|
|
return curl;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Link_get_stat(Link *this_link)
|
|
|
|
{
|
2019-04-26 19:53:26 +02:00
|
|
|
CURL *curl = Link_to_curl(this_link);
|
|
|
|
curl_easy_setopt(curl, CURLOPT_NOBODY, 1);
|
|
|
|
curl_easy_setopt(curl, CURLOPT_FILETIME, 1L);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need to put the variable on the heap, because otherwise the
|
|
|
|
* variable gets popped from the stack as the function returns.
|
|
|
|
*
|
|
|
|
* It gets freed in curl_multi_perform_once();
|
|
|
|
*/
|
|
|
|
TransferStruct *transfer = malloc(sizeof(TransferStruct));
|
|
|
|
if (!transfer) {
|
|
|
|
fprintf(stderr, "Link_get_size(): malloc failed!\n");
|
|
|
|
exit(EXIT_FAILURE);
|
2018-07-26 11:29:44 +02:00
|
|
|
}
|
2019-04-26 19:53:26 +02:00
|
|
|
transfer->link = this_link;
|
|
|
|
transfer->type = FILESTAT;
|
|
|
|
curl_easy_setopt(curl, CURLOPT_PRIVATE, transfer);
|
|
|
|
|
|
|
|
transfer_nonblocking(curl);
|
2018-07-26 11:29:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void Link_set_stat(Link* this_link, CURL *curl)
|
|
|
|
{
|
|
|
|
long http_resp;
|
|
|
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
|
|
|
|
if (http_resp == HTTP_OK) {
|
|
|
|
double cl = 0;
|
|
|
|
curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
|
|
|
|
curl_easy_getinfo(curl, CURLINFO_FILETIME, &(this_link->time));
|
2019-07-20 18:19:40 +02:00
|
|
|
if (this_link->type == 'F') {
|
|
|
|
if (cl == -1) {
|
|
|
|
this_link->type = LINK_INVALID;
|
|
|
|
} else {
|
|
|
|
this_link->content_length = cl;
|
|
|
|
}
|
2018-07-26 11:29:44 +02:00
|
|
|
}
|
|
|
|
} else {
|
2019-04-27 03:31:18 +02:00
|
|
|
fprintf(stderr, "Link_set_stat(): HTTP %ld", http_resp);
|
2018-07-26 11:29:44 +02:00
|
|
|
this_link->type = LINK_INVALID;
|
2019-04-27 03:31:18 +02:00
|
|
|
if (http_resp == HTTP_TOO_MANY_REQUESTS) {
|
2019-04-27 04:28:28 +02:00
|
|
|
fprintf(stderr, ", re-adding the link to the queue");
|
2019-04-27 03:31:18 +02:00
|
|
|
Link_get_stat(this_link);
|
|
|
|
}
|
|
|
|
fprintf(stderr, ".\n");
|
2018-07-26 11:29:44 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-04-22 14:32:15 +02:00
|
|
|
static void LinkTable_fill(LinkTable *linktbl)
|
2018-07-26 11:29:44 +02:00
|
|
|
{
|
|
|
|
Link *head_link = linktbl->links[0];
|
2019-04-26 20:47:03 +02:00
|
|
|
for (int i = 1; i < linktbl->num; i++) {
|
2018-07-26 11:29:44 +02:00
|
|
|
Link *this_link = linktbl->links[i];
|
2019-04-26 20:30:39 +02:00
|
|
|
char *url;
|
|
|
|
url = path_append(head_link->f_url, this_link->linkname);
|
|
|
|
strncpy(this_link->f_url, url, MAX_PATH_LEN);
|
|
|
|
free(url);
|
|
|
|
char *unescaped_linkname;
|
2019-04-30 09:05:46 +02:00
|
|
|
CURL* c = curl_easy_init();
|
|
|
|
unescaped_linkname = curl_easy_unescape(c, this_link->linkname,
|
2019-04-26 20:30:39 +02:00
|
|
|
0, NULL);
|
|
|
|
strncpy(this_link->linkname, unescaped_linkname, MAX_FILENAME_LEN);
|
|
|
|
curl_free(unescaped_linkname);
|
2019-04-30 09:05:46 +02:00
|
|
|
curl_easy_cleanup(c);
|
2019-04-26 20:30:39 +02:00
|
|
|
Link_get_stat(this_link);
|
2018-07-26 11:29:44 +02:00
|
|
|
}
|
|
|
|
/* Block until the LinkTable is filled up */
|
2019-09-03 15:02:41 +02:00
|
|
|
fprintf(stderr, "LinkTable_fill(): ");
|
|
|
|
int n = curl_multi_perform_once();
|
|
|
|
int i = 0;
|
|
|
|
int j = 0;
|
|
|
|
char s[64];
|
|
|
|
while ( (i = curl_multi_perform_once()) ) {
|
|
|
|
if (1) {
|
|
|
|
if (j) {
|
|
|
|
for (size_t k = 0; k < strnlen(s, 64); k++) {
|
|
|
|
fprintf(stderr, "\b");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
snprintf(s, 64, "... %d / %d", i, n);
|
|
|
|
fprintf(stderr, "%s", s);
|
|
|
|
}
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t k = 0; k < strnlen(s, MAX_FILENAME_LEN); k++) {
|
|
|
|
fprintf(stderr, "\b");
|
|
|
|
}
|
|
|
|
fprintf(stderr, "... Done!\n");
|
2018-07-26 11:29:44 +02:00
|
|
|
}
|
|
|
|
|
2019-04-26 16:35:48 +02:00
|
|
|
/**
|
|
|
|
* \brief fill in the gaps in a link table
|
|
|
|
*/
|
|
|
|
static void LinkTable_gap_fill(LinkTable *linktbl)
|
|
|
|
{
|
|
|
|
for (int i = 0; i < linktbl->num; i++) {
|
2019-04-26 20:30:39 +02:00
|
|
|
Link *this_link = linktbl->links[i];
|
2019-04-26 20:47:03 +02:00
|
|
|
if ((this_link->type != LINK_FILE) &&
|
|
|
|
(this_link->type != LINK_DIR) &&
|
|
|
|
(this_link->type != LINK_HEAD)) {
|
2019-04-26 16:35:48 +02:00
|
|
|
Link_get_stat(linktbl->links[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Block until the gaps are filled */
|
2019-04-27 02:53:09 +02:00
|
|
|
while (curl_multi_perform_once())
|
|
|
|
;
|
2019-04-26 16:35:48 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2018-07-26 11:29:44 +02:00
|
|
|
static void LinkTable_free(LinkTable *linktbl)
|
|
|
|
{
|
|
|
|
for (int i = 0; i < linktbl->num; i++) {
|
|
|
|
free(linktbl->links[i]);
|
|
|
|
}
|
|
|
|
free(linktbl->links);
|
|
|
|
free(linktbl);
|
|
|
|
}
|
|
|
|
|
2019-04-22 16:26:25 +02:00
|
|
|
static void LinkTable_print(LinkTable *linktbl)
|
|
|
|
{
|
2019-04-27 03:31:18 +02:00
|
|
|
int j = 0;
|
2019-04-26 19:45:38 +02:00
|
|
|
fprintf(stderr, "--------------------------------------------\n");
|
|
|
|
fprintf(stderr, " LinkTable %p for %s\n", linktbl,
|
|
|
|
linktbl->links[0]->f_url);
|
|
|
|
fprintf(stderr, "--------------------------------------------\n");
|
2019-04-22 16:26:25 +02:00
|
|
|
for (int i = 0; i < linktbl->num; i++) {
|
|
|
|
Link *this_link = linktbl->links[i];
|
2019-04-26 19:45:38 +02:00
|
|
|
fprintf(stderr, "%d %c %lu %s %s\n",
|
|
|
|
i,
|
|
|
|
this_link->type,
|
|
|
|
this_link->content_length,
|
|
|
|
this_link->linkname,
|
|
|
|
this_link->f_url
|
|
|
|
);
|
2019-04-26 20:49:20 +02:00
|
|
|
if ((this_link->type != LINK_FILE) &&
|
|
|
|
(this_link->type != LINK_DIR) &&
|
|
|
|
(this_link->type != LINK_HEAD)) {
|
2019-04-27 03:31:18 +02:00
|
|
|
j++;
|
2019-04-26 19:25:40 +02:00
|
|
|
}
|
2019-04-22 16:26:25 +02:00
|
|
|
}
|
2019-04-26 19:45:38 +02:00
|
|
|
fprintf(stderr, "--------------------------------------------\n");
|
2019-04-27 03:31:18 +02:00
|
|
|
fprintf(stderr, "LinkTable_print(): Invalid link count: %d, %s.\n", j,
|
2019-04-26 19:30:35 +02:00
|
|
|
linktbl->links[0]->f_url);
|
2019-04-26 19:45:38 +02:00
|
|
|
fprintf(stderr, "--------------------------------------------\n");
|
2019-04-22 16:26:25 +02:00
|
|
|
}
|
|
|
|
|
2018-07-26 11:29:44 +02:00
|
|
|
LinkTable *LinkTable_new(const char *url)
|
|
|
|
{
|
2019-09-01 12:39:47 +02:00
|
|
|
#ifdef LINK_LOCK_DEBUG
|
2019-08-31 22:21:28 +02:00
|
|
|
fprintf(stderr,
|
|
|
|
"LinkTable_new(): thread %lu: locking link_lock;\n",
|
|
|
|
pthread_self());
|
2019-09-01 12:39:47 +02:00
|
|
|
#endif
|
2019-09-01 02:21:40 +02:00
|
|
|
PTHREAD_MUTEX_LOCK(&link_lock);
|
2018-07-26 11:29:44 +02:00
|
|
|
LinkTable *linktbl = calloc(1, sizeof(LinkTable));
|
|
|
|
if (!linktbl) {
|
|
|
|
fprintf(stderr, "LinkTable_new(): calloc failure!\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* populate the base URL */
|
|
|
|
LinkTable_add(linktbl, Link_new("/", LINK_HEAD));
|
|
|
|
Link *head_link = linktbl->links[0];
|
|
|
|
head_link->type = LINK_HEAD;
|
2019-04-26 08:39:45 +02:00
|
|
|
strncpy(head_link->f_url, url, MAX_PATH_LEN);
|
2018-07-26 11:29:44 +02:00
|
|
|
|
|
|
|
/* start downloading the base URL */
|
|
|
|
CURL *curl = Link_to_curl(head_link);
|
|
|
|
MemoryStruct buf;
|
|
|
|
buf.size = 0;
|
|
|
|
buf.memory = NULL;
|
|
|
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&buf);
|
|
|
|
|
2019-04-26 14:20:55 +02:00
|
|
|
/* If we get HTTP 429, wait for 5 seconds before retry */
|
|
|
|
volatile long http_resp = 0;
|
|
|
|
do {
|
|
|
|
transfer_blocking(curl);
|
|
|
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
|
|
|
|
if (http_resp == HTTP_TOO_MANY_REQUESTS) {
|
2019-04-26 19:30:35 +02:00
|
|
|
fprintf(stderr, "LinkTable_new(): URL: %s, HTTP 429, \
|
2019-04-26 14:32:59 +02:00
|
|
|
Too Many Requests\n", url);
|
2019-04-26 15:14:42 +02:00
|
|
|
sleep(HTTP_429_WAIT);
|
2019-04-26 14:20:55 +02:00
|
|
|
} else if (http_resp != HTTP_OK) {
|
2019-04-26 19:30:35 +02:00
|
|
|
fprintf(stderr, "LinkTable_new(): cannot retrieve URL: %s, \
|
2019-04-26 17:24:55 +02:00
|
|
|
HTTP %ld\n", url, http_resp);
|
2019-04-26 14:20:55 +02:00
|
|
|
LinkTable_free(linktbl);
|
|
|
|
curl_easy_cleanup(curl);
|
|
|
|
return NULL;
|
|
|
|
};
|
|
|
|
} while (http_resp != HTTP_OK);
|
2018-07-26 11:29:44 +02:00
|
|
|
|
|
|
|
curl_easy_getinfo(curl, CURLINFO_FILETIME, &(head_link->time));
|
|
|
|
curl_easy_cleanup(curl);
|
|
|
|
|
|
|
|
/* Otherwise parsed the received data */
|
|
|
|
GumboOutput* output = gumbo_parse(buf.memory);
|
|
|
|
HTML_to_LinkTable(output->root, linktbl);
|
|
|
|
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
|
|
|
free(buf.memory);
|
|
|
|
|
2019-04-26 09:26:47 +02:00
|
|
|
int skip_fill = 0;
|
|
|
|
char *unescaped_path;
|
2019-04-30 09:05:46 +02:00
|
|
|
CURL* c = curl_easy_init();
|
|
|
|
unescaped_path = curl_easy_unescape(c, url + ROOT_LINK_OFFSET, 0, NULL);
|
2019-04-22 16:26:25 +02:00
|
|
|
if (CACHE_SYSTEM_INIT) {
|
2019-04-23 11:26:13 +02:00
|
|
|
CacheDir_create(unescaped_path);
|
2019-04-26 09:26:47 +02:00
|
|
|
LinkTable *disk_linktbl;
|
|
|
|
disk_linktbl = LinkTable_disk_open(unescaped_path);
|
|
|
|
if (disk_linktbl) {
|
|
|
|
/* Check if we need to update the link table */
|
|
|
|
if (disk_linktbl->num == linktbl->num) {
|
|
|
|
LinkTable_free(linktbl);
|
|
|
|
linktbl = disk_linktbl;
|
|
|
|
skip_fill = 1;
|
|
|
|
} else {
|
|
|
|
LinkTable_free(disk_linktbl);
|
|
|
|
}
|
|
|
|
}
|
2019-04-22 16:26:25 +02:00
|
|
|
}
|
|
|
|
|
2019-04-26 09:26:47 +02:00
|
|
|
if (!skip_fill) {
|
2019-04-26 16:35:48 +02:00
|
|
|
/* Fill in the link table */
|
2019-04-26 09:26:47 +02:00
|
|
|
LinkTable_fill(linktbl);
|
2019-04-26 16:35:48 +02:00
|
|
|
} else {
|
|
|
|
/* Fill in the holes in the link table */
|
|
|
|
LinkTable_gap_fill(linktbl);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Save the link table */
|
|
|
|
if (CACHE_SYSTEM_INIT) {
|
|
|
|
LinkTable_disk_save(linktbl, unescaped_path);
|
2019-04-26 09:26:47 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
curl_free(unescaped_path);
|
2019-04-30 09:05:46 +02:00
|
|
|
curl_easy_cleanup(c);
|
2019-04-26 09:26:47 +02:00
|
|
|
|
2019-04-22 16:26:25 +02:00
|
|
|
LinkTable_print(linktbl);
|
2019-08-31 22:21:28 +02:00
|
|
|
#ifdef LINK_LOCK_DEBUG
|
|
|
|
fprintf(stderr,
|
|
|
|
"LinkTable_new(): thread %lu: unlocking link_lock;\n",
|
|
|
|
pthread_self());
|
|
|
|
#endif
|
2019-09-01 02:21:40 +02:00
|
|
|
PTHREAD_MUTEX_UNLOCK(&link_lock);
|
2018-07-26 11:29:44 +02:00
|
|
|
return linktbl;
|
|
|
|
}
|
|
|
|
|
2019-04-26 08:39:45 +02:00
|
|
|
static void LinkTable_disk_delete(const char *dirn)
|
|
|
|
{
|
2019-04-26 09:26:47 +02:00
|
|
|
char *metadirn = path_append("cache/meta/", dirn);
|
|
|
|
char *path = path_append(metadirn, ".LinkTable");
|
2019-04-26 08:39:45 +02:00
|
|
|
if(unlink(path)) {
|
2019-04-26 09:26:47 +02:00
|
|
|
fprintf(stderr, "LinkTable_disk_delete(): unlink(%s): %s\n", path,
|
2019-04-26 08:39:45 +02:00
|
|
|
strerror(errno));
|
|
|
|
}
|
|
|
|
free(path);
|
2019-04-26 09:26:47 +02:00
|
|
|
free(metadirn);
|
2019-04-26 08:39:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int LinkTable_disk_save(LinkTable *linktbl, const char *dirn)
|
|
|
|
{
|
2019-04-26 09:26:47 +02:00
|
|
|
char *metadirn = path_append("cache/meta/", dirn);
|
|
|
|
char *path = path_append(metadirn, ".LinkTable");
|
2019-04-26 08:39:45 +02:00
|
|
|
FILE *fp = fopen(path, "w");
|
2019-04-26 09:26:47 +02:00
|
|
|
free(metadirn);
|
2019-04-26 08:39:45 +02:00
|
|
|
|
|
|
|
if (!fp) {
|
2019-04-26 09:26:47 +02:00
|
|
|
fprintf(stderr, "LinkTable_disk_save(): fopen(%s): %s\n", path,
|
|
|
|
strerror(errno));
|
|
|
|
free(path);
|
2019-04-26 08:39:45 +02:00
|
|
|
return -1;
|
|
|
|
}
|
2019-04-26 09:26:47 +02:00
|
|
|
free(path);
|
2019-04-26 08:39:45 +02:00
|
|
|
|
|
|
|
fwrite(&linktbl->num, sizeof(int), 1, fp);
|
|
|
|
for (int i = 0; i < linktbl->num; i++) {
|
|
|
|
fwrite(linktbl->links[i]->linkname, sizeof(char), MAX_FILENAME_LEN, fp);
|
|
|
|
fwrite(linktbl->links[i]->f_url, sizeof(char), MAX_PATH_LEN, fp);
|
|
|
|
fwrite(&linktbl->links[i]->type, sizeof(LinkType), 1, fp);
|
|
|
|
fwrite(&linktbl->links[i]->content_length, sizeof(size_t), 1, fp);
|
|
|
|
fwrite(&linktbl->links[i]->time, sizeof(long), 1, fp);
|
|
|
|
}
|
|
|
|
|
|
|
|
int res = 0;
|
|
|
|
|
|
|
|
if (ferror(fp)) {
|
2019-04-26 09:26:47 +02:00
|
|
|
fprintf(stderr, "LinkTable_disk_save(): encountered ferror!\n");
|
2019-04-26 08:39:45 +02:00
|
|
|
res = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fclose(fp)) {
|
2019-04-26 09:26:47 +02:00
|
|
|
fprintf(stderr,
|
|
|
|
"LinkTable_disk_save(): cannot close the file pointer, %s\n",
|
2019-04-26 08:39:45 +02:00
|
|
|
strerror(errno));
|
|
|
|
res = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
LinkTable *LinkTable_disk_open(const char *dirn)
|
|
|
|
{
|
2019-04-26 09:26:47 +02:00
|
|
|
char *metadirn = path_append("cache/meta/", dirn);
|
|
|
|
char *path = path_append(metadirn, ".LinkTable");
|
2019-04-26 08:39:45 +02:00
|
|
|
FILE *fp = fopen(path, "r");
|
2019-04-26 09:26:47 +02:00
|
|
|
free(metadirn);
|
2019-04-26 08:39:45 +02:00
|
|
|
|
|
|
|
if (!fp) {
|
2019-04-26 09:26:47 +02:00
|
|
|
free(path);
|
2019-04-26 08:39:45 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
LinkTable *linktbl = calloc(1, sizeof(LinkTable));
|
|
|
|
if (!linktbl) {
|
2019-04-26 09:26:47 +02:00
|
|
|
fprintf(stderr, "LinkTable_disk_open(): calloc linktbl failed!\n");
|
2019-04-26 08:39:45 +02:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
fread(&linktbl->num, sizeof(int), 1, fp);
|
|
|
|
linktbl->links = calloc(linktbl->num, sizeof(Link *));
|
|
|
|
for (int i = 0; i < linktbl->num; i++) {
|
|
|
|
linktbl->links[i] = calloc(1, sizeof(Link));
|
2019-04-26 12:42:11 +02:00
|
|
|
if (!linktbl->links[i]) {
|
2019-04-26 09:26:47 +02:00
|
|
|
fprintf(stderr, "LinkTable_disk_open(): calloc links[i] failed!\n");
|
2019-04-26 08:39:45 +02:00
|
|
|
}
|
2019-04-26 09:26:47 +02:00
|
|
|
fread(linktbl->links[i]->linkname, sizeof(char), MAX_FILENAME_LEN, fp);
|
2019-04-26 08:39:45 +02:00
|
|
|
fread(linktbl->links[i]->f_url, sizeof(char), MAX_PATH_LEN, fp);
|
|
|
|
fread(&linktbl->links[i]->type, sizeof(LinkType), 1, fp);
|
|
|
|
fread(&linktbl->links[i]->content_length, sizeof(size_t), 1, fp);
|
|
|
|
fread(&linktbl->links[i]->time, sizeof(long), 1, fp);
|
|
|
|
if (feof(fp)) {
|
|
|
|
/* reached EOF */
|
|
|
|
fprintf(stderr,
|
2019-04-26 09:26:47 +02:00
|
|
|
"LinkTable_disk_open(): reached EOF!\n");
|
2019-04-26 08:39:45 +02:00
|
|
|
LinkTable_free(linktbl);
|
|
|
|
LinkTable_disk_delete(dirn);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
if (ferror(fp)) {
|
2019-04-26 09:26:47 +02:00
|
|
|
fprintf(stderr, "LinkTable_disk_open(): encountered ferror!\n");
|
2019-04-26 08:39:45 +02:00
|
|
|
LinkTable_free(linktbl);
|
|
|
|
LinkTable_disk_delete(dirn);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (fclose(fp)) {
|
2019-04-26 09:26:47 +02:00
|
|
|
fprintf(stderr,
|
|
|
|
"LinkTable_disk_open(): cannot close the file pointer, %s\n",
|
2019-04-26 08:39:45 +02:00
|
|
|
strerror(errno));
|
|
|
|
}
|
|
|
|
return linktbl;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-04-22 13:06:34 +02:00
|
|
|
LinkTable *path_to_Link_LinkTable_new(const char *path)
|
|
|
|
{
|
|
|
|
Link *link = path_to_Link(path);
|
2019-04-22 16:26:25 +02:00
|
|
|
if (!link->next_table) {
|
|
|
|
link->next_table = LinkTable_new(link->f_url);
|
|
|
|
}
|
2019-04-22 13:06:34 +02:00
|
|
|
return link->next_table;
|
|
|
|
}
|
|
|
|
|
2018-07-26 11:29:44 +02:00
|
|
|
static Link *path_to_Link_recursive(char *path, LinkTable *linktbl)
|
|
|
|
{
|
|
|
|
/* skip the leading '/' if it exists */
|
|
|
|
if (*path == '/') {
|
|
|
|
path++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* remove the last '/' if it exists */
|
2019-04-26 08:39:45 +02:00
|
|
|
char *slash = &(path[strnlen(path, MAX_PATH_LEN) - 1]);
|
2018-07-26 11:29:44 +02:00
|
|
|
if (*slash == '/') {
|
|
|
|
*slash = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
slash = strchr(path, '/');
|
|
|
|
if ( slash == NULL ) {
|
|
|
|
/* We cannot find another '/', we have reached the last level */
|
|
|
|
for (int i = 1; i < linktbl->num; i++) {
|
2019-04-26 08:39:45 +02:00
|
|
|
if (!strncmp(path, linktbl->links[i]->linkname, MAX_FILENAME_LEN)) {
|
2018-07-26 11:29:44 +02:00
|
|
|
/* We found our link */
|
|
|
|
return linktbl->links[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* We can still find '/', time to consume the path and traverse
|
|
|
|
* the tree structure
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2019-04-22 02:50:05 +02:00
|
|
|
* add termination mark to the current string,
|
2018-07-26 11:29:44 +02:00
|
|
|
* effective create two substrings
|
|
|
|
*/
|
|
|
|
*slash = '\0';
|
|
|
|
/* move the pointer past the '/' */
|
|
|
|
char *next_path = slash + 1;
|
|
|
|
for (int i = 1; i < linktbl->num; i++) {
|
2019-04-26 08:39:45 +02:00
|
|
|
if (!strncmp(path, linktbl->links[i]->linkname, MAX_FILENAME_LEN)) {
|
2018-07-26 11:29:44 +02:00
|
|
|
/* The next sub-directory exists */
|
2019-04-22 16:26:25 +02:00
|
|
|
if (!linktbl->links[i]->next_table) {
|
|
|
|
linktbl->links[i]->next_table = LinkTable_new(
|
|
|
|
linktbl->links[i]->f_url);
|
|
|
|
}
|
2019-04-22 13:06:34 +02:00
|
|
|
return path_to_Link_recursive(
|
|
|
|
next_path, linktbl->links[i]->next_table);
|
2018-07-26 11:29:44 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2019-04-22 14:32:15 +02:00
|
|
|
Link *path_to_Link(const char *path)
|
|
|
|
{
|
2019-04-26 08:39:45 +02:00
|
|
|
char *new_path = strndup(path, MAX_PATH_LEN);
|
2019-04-22 14:32:15 +02:00
|
|
|
if (!new_path) {
|
|
|
|
fprintf(stderr, "path_to_Link(): cannot allocate memory\n");
|
|
|
|
exit(EXIT_FAILURE);
|
|
|
|
}
|
|
|
|
Link *link = path_to_Link_recursive(new_path, ROOT_LINK_TBL);
|
|
|
|
free(new_path);
|
|
|
|
return link;
|
|
|
|
}
|
|
|
|
|
2018-07-26 11:29:44 +02:00
|
|
|
long path_download(const char *path, char *output_buf, size_t size,
|
|
|
|
off_t offset)
|
|
|
|
{
|
|
|
|
Link *link;
|
|
|
|
link = path_to_Link(path);
|
|
|
|
if (!link) {
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t start = offset;
|
|
|
|
size_t end = start + size;
|
|
|
|
char range_str[64];
|
|
|
|
snprintf(range_str, sizeof(range_str), "%lu-%lu", start, end);
|
2019-04-25 09:50:04 +02:00
|
|
|
fprintf(stderr, "path_download(%s, %s);\n", path, range_str);
|
2018-07-26 11:29:44 +02:00
|
|
|
|
|
|
|
MemoryStruct buf;
|
|
|
|
buf.size = 0;
|
|
|
|
buf.memory = NULL;
|
|
|
|
|
|
|
|
CURL *curl = Link_to_curl(link);
|
|
|
|
curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&buf);
|
|
|
|
curl_easy_setopt(curl, CURLOPT_RANGE, range_str);
|
|
|
|
|
2019-09-01 12:39:47 +02:00
|
|
|
#ifdef LINK_LOCK_DEBUG
|
2019-08-31 22:21:28 +02:00
|
|
|
fprintf(stderr,
|
2019-09-01 01:43:50 +02:00
|
|
|
"path_download(): thread %lu: locking and unlocking link_lock;\n",
|
2019-08-31 22:21:28 +02:00
|
|
|
pthread_self());
|
2019-09-01 12:39:47 +02:00
|
|
|
#endif
|
2019-09-01 01:43:50 +02:00
|
|
|
|
2019-09-01 02:21:40 +02:00
|
|
|
PTHREAD_MUTEX_LOCK(&link_lock);
|
|
|
|
PTHREAD_MUTEX_UNLOCK(&link_lock);
|
2019-09-01 01:43:50 +02:00
|
|
|
|
2018-07-26 11:29:44 +02:00
|
|
|
transfer_blocking(curl);
|
|
|
|
|
|
|
|
long http_resp;
|
|
|
|
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
|
|
|
|
if ( !(
|
|
|
|
(http_resp != HTTP_OK) ||
|
|
|
|
(http_resp != HTTP_PARTIAL_CONTENT) ||
|
|
|
|
(http_resp != HTTP_RANGE_NOT_SATISFIABLE)
|
|
|
|
)) {
|
|
|
|
fprintf(stderr, "path_download(): Could not download %s, HTTP %ld\n",
|
|
|
|
link->f_url, http_resp);
|
|
|
|
return -ENOENT;
|
|
|
|
}
|
|
|
|
|
|
|
|
double dl;
|
|
|
|
curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &dl);
|
|
|
|
|
|
|
|
size_t recv = dl;
|
|
|
|
if (recv > size) {
|
|
|
|
recv = size;
|
|
|
|
}
|
|
|
|
|
|
|
|
memmove(output_buf, buf.memory, recv);
|
|
|
|
curl_easy_cleanup(curl);
|
|
|
|
free(buf.memory);
|
|
|
|
return recv;
|
|
|
|
}
|