finished the link table filler

This commit is contained in:
Fufu Fang 2018-07-20 13:59:25 +01:00
parent 27166c429f
commit 98ac7d19e8
4 changed files with 78 additions and 34 deletions

10
data.h
View File

@ -21,11 +21,11 @@ typedef enum {
typedef struct { typedef struct {
char p_url[255]; char p_url[255];
LinkType type; LinkType type;
CURL *curl_h; CURL *curl;
CURLcode res; /* initialise to -1, because all CURLcode are positive */ CURLcode res; /* initialise to -1, as all valid CURLcode are positive */
char *data; char *body;
size_t data_sz; size_t body_sz;
curl_off_t content_length; size_t content_length;
} Link; } Link;
/** /**

90
link.c
View File

@ -1,7 +1,7 @@
#include <ctype.h> #include <ctype.h>
#include "link.h"
#include "string.h" #include "string.h"
#include "link.h"
static size_t static size_t
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
@ -9,16 +9,16 @@ WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
size_t realsize = size * nmemb; size_t realsize = size * nmemb;
Link *mem = (Link *)userp; Link *mem = (Link *)userp;
mem->data = realloc(mem->data, mem->data_sz + realsize + 1); mem->body = realloc(mem->body, mem->body_sz + realsize + 1);
if(mem->data == NULL) { if(mem->body == NULL) {
/* out of memory! */ /* out of memory! */
printf("not enough memory (realloc returned NULL)\n"); printf("not enough memory (realloc returned NULL)\n");
return 0; return 0;
} }
memcpy(&(mem->data[mem->data_sz]), contents, realsize); memcpy(&(mem->body[mem->body_sz]), contents, realsize);
mem->data_sz += realsize; mem->body_sz += realsize;
mem->data[mem->data_sz] = 0; mem->body[mem->body_sz] = 0;
return realsize; return realsize;
} }
@ -27,26 +27,24 @@ Link *Link_new(const char *p_url)
{ {
Link *link = calloc(1, sizeof(Link)); Link *link = calloc(1, sizeof(Link));
size_t p_url_len = strnlen(p_url, LINK_LEN_MAX) + 1; strncpy(link->p_url, p_url, LINK_LEN_MAX);
strncpy(link->p_url, p_url, p_url_len);
link->type = LINK_UNKNOWN; link->type = LINK_UNKNOWN;
link->curl_h = curl_easy_init(); link->curl = curl_easy_init();
link->res = -1; link->res = -1;
link->data = malloc(1);
/* set up some basic curl stuff */ /* set up some basic curl stuff */
curl_easy_setopt(link->curl_h, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); curl_easy_setopt(link->curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
curl_easy_setopt(link->curl_h, CURLOPT_WRITEDATA, (void *)link->data); curl_easy_setopt(link->curl, CURLOPT_WRITEDATA, (void *)link);
curl_easy_setopt(link->curl_h, CURLOPT_USERAGENT, "mount-http-dir/libcurl"); curl_easy_setopt(link->curl, CURLOPT_USERAGENT, "mount-http-dir/libcurl");
return link; return link;
} }
void Link_free(Link *link) void Link_free(Link *link)
{ {
curl_easy_cleanup(link->curl_h); curl_easy_cleanup(link->curl);
free(link->data); free(link->body);
free(link); free(link);
link = NULL; link = NULL;
} }
@ -54,16 +52,30 @@ void Link_free(Link *link)
LinkTable *LinkTable_new(const char *url) LinkTable *LinkTable_new(const char *url)
{ {
LinkTable *linktbl = calloc(1, sizeof(LinkTable)); LinkTable *linktbl = calloc(1, sizeof(LinkTable));
/* populate the base URL */ /* populate the base URL */
LinkTable_add(linktbl, Link_new("/")); LinkTable_add(linktbl, Link_new(url));
Link *this_link = linktbl->links[0]; Link *head_link = linktbl->links[0];
curl_easy_setopt(this_link->curl_h, CURLOPT_URL, url); curl_easy_setopt(head_link->curl, CURLOPT_URL, url);
this_link->res = curl_easy_perform(this_link->curl_h);
if (this_link->res != CURLE_OK) { /* start downloading the base URL */
head_link->res = curl_easy_perform(head_link->curl);
/* if downloading base URL failed */
if (head_link->res != CURLE_OK) {
fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL"); fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL");
LinkTable_free(linktbl); LinkTable_free(linktbl);
linktbl = NULL; linktbl = NULL;
return linktbl;
}; };
/* Otherwise parsed the received data */
GumboOutput* output = gumbo_parse(head_link->body);
HTML_to_LinkTable(output->root, linktbl);
gumbo_destroy_output(&kGumboDefaultOptions, output);
/* Fill in the link table */
LinkTable_fill(linktbl);
return linktbl; return linktbl;
} }
@ -86,13 +98,42 @@ void LinkTable_add(LinkTable *linktbl, Link *link)
linktbl->links[linktbl->num - 1] = link; linktbl->links[linktbl->num - 1] = link;
} }
void LinkTable_fill(LinkTable *linktbl)
{
for (int i = 0; i < linktbl->num; i++) {
Link *this_link = linktbl->links[i];
if (this_link->type == LINK_UNKNOWN) {
CURL *curl = this_link->curl;
char *url;
curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &url);
url = url_append(linktbl->links[0]->p_url, this_link->p_url);
curl_easy_setopt(curl, CURLOPT_URL, url);
free(url);
curl_easy_setopt(curl, CURLOPT_NOBODY, 1);
curl_easy_perform(curl);
double cl;
curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
if (cl == -1) {
this_link->content_length = 0;
this_link->type = LINK_DIR;
} else {
this_link->content_length = cl;
this_link->type = LINK_FILE;
}
}
}
}
void LinkTable_print(LinkTable *linktbl) void LinkTable_print(LinkTable *linktbl)
{ {
for (int i = 0; i < linktbl->num; i++) { for (int i = 0; i < linktbl->num; i++) {
printf("%d %c %s\n", Link *this_link = linktbl->links[i];
printf("%d %c %lu %s\n",
i, i,
linktbl->links[i]->type, this_link->type,
linktbl->links[i]->p_url); this_link->content_length,
this_link->p_url);
} }
} }
@ -149,8 +190,7 @@ char *url_upper(const char *url)
const char *pt = strrchr(url, '/'); const char *pt = strrchr(url, '/');
/* +1 for the '/' */ /* +1 for the '/' */
size_t len = pt - url + 1; size_t len = pt - url + 1;
char *str = malloc(len* sizeof(char)); char *str = strndup(url, len);
strncpy(str, url, len);
str[len] = '\0'; str[len] = '\0';
return str; return str;
} }

6
link.h
View File

@ -22,6 +22,12 @@ void LinkTable_free(LinkTable *linktbl);
/** \brief add a link to the link table */ /** \brief add a link to the link table */
void LinkTable_add(LinkTable *linktbl, Link *link); void LinkTable_add(LinkTable *linktbl, Link *link);
/**
* \brief fill the LinkTable
* \details fill the LinkTable with link type information
*/
void LinkTable_fill(LinkTable *linktbl);
/** \brief print a LinkTable */ /** \brief print a LinkTable */
void LinkTable_print(LinkTable *linktbl); void LinkTable_print(LinkTable *linktbl);

6
test.c
View File

@ -24,10 +24,8 @@ void gumbo_test()
printf("--- start of gumbo_test ---\n"); printf("--- start of gumbo_test ---\n");
LinkTable *linktbl = LinkTable_new( LinkTable *linktbl = LinkTable_new(
"https://cdimage.debian.org/debian-cd/current/amd64/iso-dvd/"); "https://www.fangfufu.co.uk/~fangfufu/test/");
GumboOutput* output = gumbo_parse(linktbl->links[0]->data);
HTML_to_LinkTable(output->root, linktbl);
gumbo_destroy_output(&kGumboDefaultOptions, output);
LinkTable_print(linktbl); LinkTable_print(linktbl);
LinkTable_free(linktbl); LinkTable_free(linktbl);
printf("--- end of gumbo_test ---\n\n"); printf("--- end of gumbo_test ---\n\n");