finished the link table filler

This commit is contained in:
Fufu Fang 2018-07-20 13:59:25 +01:00
parent 27166c429f
commit 98ac7d19e8
4 changed files with 78 additions and 34 deletions

10
data.h
View File

@ -21,11 +21,11 @@ typedef enum {
typedef struct {
char p_url[255];
LinkType type;
CURL *curl_h;
CURLcode res; /* initialise to -1, because all CURLcode are positive */
char *data;
size_t data_sz;
curl_off_t content_length;
CURL *curl;
CURLcode res; /* initialise to -1, as all valid CURLcode are positive */
char *body;
size_t body_sz;
size_t content_length;
} Link;
/**

90
link.c
View File

@ -1,7 +1,7 @@
#include <ctype.h>
#include "link.h"
#include "string.h"
#include "link.h"
static size_t
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
@ -9,16 +9,16 @@ WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
size_t realsize = size * nmemb;
Link *mem = (Link *)userp;
mem->data = realloc(mem->data, mem->data_sz + realsize + 1);
if(mem->data == NULL) {
mem->body = realloc(mem->body, mem->body_sz + realsize + 1);
if(mem->body == NULL) {
/* out of memory! */
printf("not enough memory (realloc returned NULL)\n");
return 0;
}
memcpy(&(mem->data[mem->data_sz]), contents, realsize);
mem->data_sz += realsize;
mem->data[mem->data_sz] = 0;
memcpy(&(mem->body[mem->body_sz]), contents, realsize);
mem->body_sz += realsize;
mem->body[mem->body_sz] = 0;
return realsize;
}
@ -27,26 +27,24 @@ Link *Link_new(const char *p_url)
{
Link *link = calloc(1, sizeof(Link));
size_t p_url_len = strnlen(p_url, LINK_LEN_MAX) + 1;
strncpy(link->p_url, p_url, p_url_len);
strncpy(link->p_url, p_url, LINK_LEN_MAX);
link->type = LINK_UNKNOWN;
link->curl_h = curl_easy_init();
link->curl = curl_easy_init();
link->res = -1;
link->data = malloc(1);
/* set up some basic curl stuff */
curl_easy_setopt(link->curl_h, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
curl_easy_setopt(link->curl_h, CURLOPT_WRITEDATA, (void *)link->data);
curl_easy_setopt(link->curl_h, CURLOPT_USERAGENT, "mount-http-dir/libcurl");
curl_easy_setopt(link->curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
curl_easy_setopt(link->curl, CURLOPT_WRITEDATA, (void *)link);
curl_easy_setopt(link->curl, CURLOPT_USERAGENT, "mount-http-dir/libcurl");
return link;
}
void Link_free(Link *link)
{
curl_easy_cleanup(link->curl_h);
free(link->data);
curl_easy_cleanup(link->curl);
free(link->body);
free(link);
link = NULL;
}
@ -54,16 +52,30 @@ void Link_free(Link *link)
LinkTable *LinkTable_new(const char *url)
{
LinkTable *linktbl = calloc(1, sizeof(LinkTable));
/* populate the base URL */
LinkTable_add(linktbl, Link_new("/"));
Link *this_link = linktbl->links[0];
curl_easy_setopt(this_link->curl_h, CURLOPT_URL, url);
this_link->res = curl_easy_perform(this_link->curl_h);
if (this_link->res != CURLE_OK) {
LinkTable_add(linktbl, Link_new(url));
Link *head_link = linktbl->links[0];
curl_easy_setopt(head_link->curl, CURLOPT_URL, url);
/* start downloading the base URL */
head_link->res = curl_easy_perform(head_link->curl);
/* if downloading base URL failed */
if (head_link->res != CURLE_OK) {
fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL");
LinkTable_free(linktbl);
linktbl = NULL;
return linktbl;
};
/* Otherwise parsed the received data */
GumboOutput* output = gumbo_parse(head_link->body);
HTML_to_LinkTable(output->root, linktbl);
gumbo_destroy_output(&kGumboDefaultOptions, output);
/* Fill in the link table */
LinkTable_fill(linktbl);
return linktbl;
}
@ -86,13 +98,42 @@ void LinkTable_add(LinkTable *linktbl, Link *link)
linktbl->links[linktbl->num - 1] = link;
}
void LinkTable_fill(LinkTable *linktbl)
{
for (int i = 0; i < linktbl->num; i++) {
Link *this_link = linktbl->links[i];
if (this_link->type == LINK_UNKNOWN) {
CURL *curl = this_link->curl;
char *url;
curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &url);
url = url_append(linktbl->links[0]->p_url, this_link->p_url);
curl_easy_setopt(curl, CURLOPT_URL, url);
free(url);
curl_easy_setopt(curl, CURLOPT_NOBODY, 1);
curl_easy_perform(curl);
double cl;
curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
if (cl == -1) {
this_link->content_length = 0;
this_link->type = LINK_DIR;
} else {
this_link->content_length = cl;
this_link->type = LINK_FILE;
}
}
}
}
void LinkTable_print(LinkTable *linktbl)
{
for (int i = 0; i < linktbl->num; i++) {
printf("%d %c %s\n",
Link *this_link = linktbl->links[i];
printf("%d %c %lu %s\n",
i,
linktbl->links[i]->type,
linktbl->links[i]->p_url);
this_link->type,
this_link->content_length,
this_link->p_url);
}
}
@ -149,8 +190,7 @@ char *url_upper(const char *url)
const char *pt = strrchr(url, '/');
/* +1 for the '/' */
size_t len = pt - url + 1;
char *str = malloc(len* sizeof(char));
strncpy(str, url, len);
char *str = strndup(url, len);
str[len] = '\0';
return str;
}

6
link.h
View File

@ -22,6 +22,12 @@ void LinkTable_free(LinkTable *linktbl);
/** \brief add a link to the link table */
void LinkTable_add(LinkTable *linktbl, Link *link);
/**
* \brief fill the LinkTable
* \details fill the LinkTable with link type information
*/
void LinkTable_fill(LinkTable *linktbl);
/** \brief print a LinkTable */
void LinkTable_print(LinkTable *linktbl);

6
test.c
View File

@ -24,10 +24,8 @@ void gumbo_test()
printf("--- start of gumbo_test ---\n");
LinkTable *linktbl = LinkTable_new(
"https://cdimage.debian.org/debian-cd/current/amd64/iso-dvd/");
GumboOutput* output = gumbo_parse(linktbl->links[0]->data);
HTML_to_LinkTable(output->root, linktbl);
gumbo_destroy_output(&kGumboDefaultOptions, output);
"https://www.fangfufu.co.uk/~fangfufu/test/");
LinkTable_print(linktbl);
LinkTable_free(linktbl);
printf("--- end of gumbo_test ---\n\n");