now clean up after downloading
This commit is contained in:
parent
f674831616
commit
25648ad947
116
network.c
116
network.c
|
@ -13,17 +13,22 @@ LinkTable *ROOT_LINK_TBL;
|
||||||
|
|
||||||
/* ------------------------ Static variable ------------------------------ */
|
/* ------------------------ Static variable ------------------------------ */
|
||||||
static CURLM *curl_multi;
|
static CURLM *curl_multi;
|
||||||
static char *url_append(const char *url, const char *sublink);
|
static CURLSH *curl_share;
|
||||||
static size_t
|
|
||||||
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp);
|
/* Forward declarations */
|
||||||
static Link *Link_new(const char *p_url);
|
|
||||||
|
static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl);
|
||||||
|
static int is_valid_link_p_url(const char *n);
|
||||||
static void Link_free(Link *link);
|
static void Link_free(Link *link);
|
||||||
|
static Link *Link_new(const char *p_url);
|
||||||
|
static void Link_curl_init(Link *link);
|
||||||
static void LinkTable_free(LinkTable *linktbl);
|
static void LinkTable_free(LinkTable *linktbl);
|
||||||
static void LinkTable_add(LinkTable *linktbl, Link *link);
|
static void LinkTable_add(LinkTable *linktbl, Link *link);
|
||||||
static void LinkTable_fill(LinkTable *linktbl);
|
static void LinkTable_fill(LinkTable *linktbl);
|
||||||
static int is_valid_link_p_url(const char *n);
|
|
||||||
static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl);
|
|
||||||
static Link *path_to_Link_recursive(char *path, LinkTable *linktbl);
|
static Link *path_to_Link_recursive(char *path, LinkTable *linktbl);
|
||||||
|
static char *url_append(const char *url, const char *sublink);
|
||||||
|
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
|
||||||
|
void *userp);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief blocking transfer function
|
* \brief blocking transfer function
|
||||||
|
@ -45,6 +50,40 @@ static void do_transfer(CURL *curl);
|
||||||
static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl);
|
static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl);
|
||||||
|
|
||||||
/* -------------------------- Functions ---------------------------------- */
|
/* -------------------------- Functions ---------------------------------- */
|
||||||
|
void network_init(const char *url)
|
||||||
|
{
|
||||||
|
curl_global_init(CURL_GLOBAL_ALL);
|
||||||
|
curl_multi = curl_multi_init();
|
||||||
|
curl_multi_setopt(curl_multi, CURLMOPT_MAXCONNECTS,
|
||||||
|
(long)NETWORK_MAXIMUM_CONNECTION);
|
||||||
|
|
||||||
|
ROOT_LINK_TBL = LinkTable_new(url);
|
||||||
|
|
||||||
|
curl_share = curl_share_init();
|
||||||
|
curl_share_setopt(curl_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE);
|
||||||
|
curl_share_setopt(curl_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Link_curl_init(Link *link)
|
||||||
|
{
|
||||||
|
link->curl = curl_easy_init();
|
||||||
|
|
||||||
|
/* set up some basic curl stuff */
|
||||||
|
curl_easy_setopt(link->curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
|
||||||
|
curl_easy_setopt(link->curl, CURLOPT_USERAGENT, "mount-http-dir/libcurl");
|
||||||
|
curl_easy_setopt(link->curl, CURLOPT_VERBOSE, 0);
|
||||||
|
curl_easy_setopt(link->curl, CURLOPT_FOLLOWLOCATION, 1);
|
||||||
|
/*
|
||||||
|
* only 1 redirection is really needed
|
||||||
|
* - for following directories without the '/'
|
||||||
|
*/
|
||||||
|
curl_easy_setopt(link->curl, CURLOPT_MAXREDIRS, 3);
|
||||||
|
curl_easy_setopt(link->curl, CURLOPT_URL, link->f_url);
|
||||||
|
curl_easy_setopt(link->curl, CURLOPT_SHARE, curl_share);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static char *url_append(const char *url, const char *sublink)
|
static char *url_append(const char *url, const char *sublink)
|
||||||
{
|
{
|
||||||
int needs_separator = 0;
|
int needs_separator = 0;
|
||||||
|
@ -171,15 +210,6 @@ WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
|
||||||
return realsize - 1;
|
return realsize - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void network_init(const char *url)
|
|
||||||
{
|
|
||||||
curl_global_init(CURL_GLOBAL_ALL);
|
|
||||||
curl_multi = curl_multi_init();
|
|
||||||
curl_multi_setopt(curl_multi, CURLMOPT_MAXCONNECTS,
|
|
||||||
(long)NETWORK_MAXIMUM_CONNECTION);
|
|
||||||
ROOT_LINK_TBL = LinkTable_new(url);
|
|
||||||
}
|
|
||||||
|
|
||||||
static Link *Link_new(const char *p_url)
|
static Link *Link_new(const char *p_url)
|
||||||
{
|
{
|
||||||
Link *link = calloc(1, sizeof(Link));
|
Link *link = calloc(1, sizeof(Link));
|
||||||
|
@ -194,19 +224,6 @@ static Link *Link_new(const char *p_url)
|
||||||
|
|
||||||
link->type = LINK_UNKNOWN;
|
link->type = LINK_UNKNOWN;
|
||||||
|
|
||||||
link->curl = curl_easy_init();
|
|
||||||
|
|
||||||
/* set up some basic curl stuff */
|
|
||||||
curl_easy_setopt(link->curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
|
|
||||||
curl_easy_setopt(link->curl, CURLOPT_USERAGENT, "mount-http-dir/libcurl");
|
|
||||||
curl_easy_setopt(link->curl, CURLOPT_VERBOSE, 0);
|
|
||||||
curl_easy_setopt(link->curl, CURLOPT_FOLLOWLOCATION, 1);
|
|
||||||
/*
|
|
||||||
* only 1 redirection is really needed
|
|
||||||
* - for following directories without the '/'
|
|
||||||
*/
|
|
||||||
curl_easy_setopt(link->curl, CURLOPT_MAXREDIRS, 3);
|
|
||||||
|
|
||||||
return link;
|
return link;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,30 +239,28 @@ size_t Link_download(Link *link, MemoryStruct *ms, off_t offset,
|
||||||
{
|
{
|
||||||
size_t start = offset;
|
size_t start = offset;
|
||||||
size_t end = start + size;
|
size_t end = start + size;
|
||||||
CURL *curl = link->curl;
|
Link_curl_init(link);
|
||||||
char range_str[64];
|
char range_str[64];
|
||||||
snprintf(range_str, sizeof(range_str), "%lu-%lu", start, end);
|
snprintf(range_str, sizeof(range_str), "%lu-%lu", start, end);
|
||||||
|
|
||||||
curl_easy_setopt(curl, CURLOPT_NOBODY, 0);
|
curl_easy_setopt(link->curl, CURLOPT_RANGE, range_str);
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, link->f_url);
|
|
||||||
curl_easy_setopt(curl, CURLOPT_RANGE, range_str);
|
|
||||||
curl_easy_setopt(link->curl, CURLOPT_WRITEDATA, (void *)ms);
|
curl_easy_setopt(link->curl, CURLOPT_WRITEDATA, (void *)ms);
|
||||||
|
|
||||||
do_transfer(curl);
|
do_transfer(link->curl);
|
||||||
|
|
||||||
long http_resp;
|
long http_resp;
|
||||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
|
curl_easy_getinfo(link->curl, CURLINFO_RESPONSE_CODE, &http_resp);
|
||||||
if ( (http_resp != HTTP_OK) && ( http_resp != HTTP_PARTIAL_CONTENT) ) {
|
if ( (http_resp != HTTP_OK) && ( http_resp != HTTP_PARTIAL_CONTENT) ) {
|
||||||
fprintf(stderr, "Link_download(): Could not download %s, HTTP %ld\n",
|
fprintf(stderr, "Link_download(): Could not download %s, HTTP %ld\n",
|
||||||
link->f_url, http_resp);
|
link->f_url, http_resp);
|
||||||
|
fflush(stdout);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
double dl;
|
double dl;
|
||||||
curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &dl);
|
curl_easy_getinfo(link->curl, CURLINFO_SIZE_DOWNLOAD, &dl);
|
||||||
size_t s = dl;
|
size_t s = dl;
|
||||||
|
curl_easy_cleanup(link->curl);
|
||||||
fflush(stdout);
|
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -257,9 +272,10 @@ LinkTable *LinkTable_new(const char *url)
|
||||||
LinkTable_add(linktbl, Link_new("/"));
|
LinkTable_add(linktbl, Link_new("/"));
|
||||||
Link *head_link = linktbl->links[0];
|
Link *head_link = linktbl->links[0];
|
||||||
head_link->type = LINK_HEAD;
|
head_link->type = LINK_HEAD;
|
||||||
curl_easy_setopt(head_link->curl, CURLOPT_URL, url);
|
strncpy(head_link->f_url, url, URL_LEN_MAX);
|
||||||
|
|
||||||
/* start downloading the base URL */
|
/* start downloading the base URL */
|
||||||
|
Link_curl_init(head_link);
|
||||||
MemoryStruct ms;
|
MemoryStruct ms;
|
||||||
curl_easy_setopt(head_link->curl, CURLOPT_WRITEDATA, (void *)&ms);
|
curl_easy_setopt(head_link->curl, CURLOPT_WRITEDATA, (void *)&ms);
|
||||||
|
|
||||||
|
@ -275,6 +291,7 @@ URL: %s, HTTP %ld\n", url, http_resp);
|
||||||
linktbl = NULL;
|
linktbl = NULL;
|
||||||
return linktbl;
|
return linktbl;
|
||||||
};
|
};
|
||||||
|
curl_easy_cleanup(head_link->curl);
|
||||||
|
|
||||||
/* Otherwise parsed the received data */
|
/* Otherwise parsed the received data */
|
||||||
GumboOutput* output = gumbo_parse(ms.memory);
|
GumboOutput* output = gumbo_parse(ms.memory);
|
||||||
|
@ -284,6 +301,7 @@ URL: %s, HTTP %ld\n", url, http_resp);
|
||||||
|
|
||||||
/* Fill in the link table */
|
/* Fill in the link table */
|
||||||
LinkTable_fill(linktbl);
|
LinkTable_fill(linktbl);
|
||||||
|
|
||||||
return linktbl;
|
return linktbl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -312,21 +330,23 @@ void LinkTable_fill(LinkTable *linktbl)
|
||||||
for (int i = 0; i < linktbl->num; i++) {
|
for (int i = 0; i < linktbl->num; i++) {
|
||||||
Link *this_link = linktbl->links[i];
|
Link *this_link = linktbl->links[i];
|
||||||
if (this_link->type == LINK_UNKNOWN) {
|
if (this_link->type == LINK_UNKNOWN) {
|
||||||
CURL *curl = this_link->curl;
|
|
||||||
char *url;
|
char *url;
|
||||||
curl_easy_getinfo(head_link->curl, CURLINFO_EFFECTIVE_URL, &url);
|
url = url_append(head_link->f_url, this_link->p_url);
|
||||||
url = url_append(url, this_link->p_url);
|
strncpy(this_link->f_url, url, URL_LEN_MAX);
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
free(url);
|
||||||
curl_easy_setopt(curl, CURLOPT_NOBODY, 1);
|
|
||||||
|
|
||||||
do_transfer(curl);
|
Link_curl_init(this_link);
|
||||||
|
curl_easy_setopt(this_link->curl, CURLOPT_NOBODY, 1);
|
||||||
|
|
||||||
|
do_transfer(this_link->curl);
|
||||||
|
|
||||||
long http_resp;
|
long http_resp;
|
||||||
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
|
curl_easy_getinfo(this_link->curl, CURLINFO_RESPONSE_CODE,
|
||||||
|
&http_resp);
|
||||||
if (http_resp == HTTP_OK) {
|
if (http_resp == HTTP_OK) {
|
||||||
strncpy(this_link->f_url, url, URL_LEN_MAX);
|
|
||||||
double cl;
|
double cl;
|
||||||
curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
|
curl_easy_getinfo(this_link->curl,
|
||||||
|
CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
|
||||||
if (cl == -1) {
|
if (cl == -1) {
|
||||||
this_link->content_length = 0;
|
this_link->content_length = 0;
|
||||||
this_link->type = LINK_DIR;
|
this_link->type = LINK_DIR;
|
||||||
|
@ -337,8 +357,8 @@ void LinkTable_fill(LinkTable *linktbl)
|
||||||
} else {
|
} else {
|
||||||
this_link->type = LINK_INVALID;
|
this_link->type = LINK_INVALID;
|
||||||
}
|
}
|
||||||
free(url);
|
|
||||||
}
|
}
|
||||||
|
curl_easy_cleanup(this_link->curl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue