finished the link table filler
This commit is contained in:
parent
27166c429f
commit
98ac7d19e8
10
data.h
10
data.h
|
@ -21,11 +21,11 @@ typedef enum {
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char p_url[255];
|
char p_url[255];
|
||||||
LinkType type;
|
LinkType type;
|
||||||
CURL *curl_h;
|
CURL *curl;
|
||||||
CURLcode res; /* initialise to -1, because all CURLcode are positive */
|
CURLcode res; /* initialise to -1, as all valid CURLcode are positive */
|
||||||
char *data;
|
char *body;
|
||||||
size_t data_sz;
|
size_t body_sz;
|
||||||
curl_off_t content_length;
|
size_t content_length;
|
||||||
} Link;
|
} Link;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
90
link.c
90
link.c
|
@ -1,7 +1,7 @@
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
#include "link.h"
|
|
||||||
#include "string.h"
|
#include "string.h"
|
||||||
|
#include "link.h"
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
|
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
|
||||||
|
@ -9,16 +9,16 @@ WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
|
||||||
size_t realsize = size * nmemb;
|
size_t realsize = size * nmemb;
|
||||||
Link *mem = (Link *)userp;
|
Link *mem = (Link *)userp;
|
||||||
|
|
||||||
mem->data = realloc(mem->data, mem->data_sz + realsize + 1);
|
mem->body = realloc(mem->body, mem->body_sz + realsize + 1);
|
||||||
if(mem->data == NULL) {
|
if(mem->body == NULL) {
|
||||||
/* out of memory! */
|
/* out of memory! */
|
||||||
printf("not enough memory (realloc returned NULL)\n");
|
printf("not enough memory (realloc returned NULL)\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
memcpy(&(mem->data[mem->data_sz]), contents, realsize);
|
memcpy(&(mem->body[mem->body_sz]), contents, realsize);
|
||||||
mem->data_sz += realsize;
|
mem->body_sz += realsize;
|
||||||
mem->data[mem->data_sz] = 0;
|
mem->body[mem->body_sz] = 0;
|
||||||
|
|
||||||
return realsize;
|
return realsize;
|
||||||
}
|
}
|
||||||
|
@ -27,26 +27,24 @@ Link *Link_new(const char *p_url)
|
||||||
{
|
{
|
||||||
Link *link = calloc(1, sizeof(Link));
|
Link *link = calloc(1, sizeof(Link));
|
||||||
|
|
||||||
size_t p_url_len = strnlen(p_url, LINK_LEN_MAX) + 1;
|
strncpy(link->p_url, p_url, LINK_LEN_MAX);
|
||||||
strncpy(link->p_url, p_url, p_url_len);
|
|
||||||
|
|
||||||
link->type = LINK_UNKNOWN;
|
link->type = LINK_UNKNOWN;
|
||||||
link->curl_h = curl_easy_init();
|
link->curl = curl_easy_init();
|
||||||
link->res = -1;
|
link->res = -1;
|
||||||
link->data = malloc(1);
|
|
||||||
|
|
||||||
/* set up some basic curl stuff */
|
/* set up some basic curl stuff */
|
||||||
curl_easy_setopt(link->curl_h, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
|
curl_easy_setopt(link->curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
|
||||||
curl_easy_setopt(link->curl_h, CURLOPT_WRITEDATA, (void *)link->data);
|
curl_easy_setopt(link->curl, CURLOPT_WRITEDATA, (void *)link);
|
||||||
curl_easy_setopt(link->curl_h, CURLOPT_USERAGENT, "mount-http-dir/libcurl");
|
curl_easy_setopt(link->curl, CURLOPT_USERAGENT, "mount-http-dir/libcurl");
|
||||||
|
|
||||||
return link;
|
return link;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Link_free(Link *link)
|
void Link_free(Link *link)
|
||||||
{
|
{
|
||||||
curl_easy_cleanup(link->curl_h);
|
curl_easy_cleanup(link->curl);
|
||||||
free(link->data);
|
free(link->body);
|
||||||
free(link);
|
free(link);
|
||||||
link = NULL;
|
link = NULL;
|
||||||
}
|
}
|
||||||
|
@ -54,16 +52,30 @@ void Link_free(Link *link)
|
||||||
LinkTable *LinkTable_new(const char *url)
|
LinkTable *LinkTable_new(const char *url)
|
||||||
{
|
{
|
||||||
LinkTable *linktbl = calloc(1, sizeof(LinkTable));
|
LinkTable *linktbl = calloc(1, sizeof(LinkTable));
|
||||||
|
|
||||||
/* populate the base URL */
|
/* populate the base URL */
|
||||||
LinkTable_add(linktbl, Link_new("/"));
|
LinkTable_add(linktbl, Link_new(url));
|
||||||
Link *this_link = linktbl->links[0];
|
Link *head_link = linktbl->links[0];
|
||||||
curl_easy_setopt(this_link->curl_h, CURLOPT_URL, url);
|
curl_easy_setopt(head_link->curl, CURLOPT_URL, url);
|
||||||
this_link->res = curl_easy_perform(this_link->curl_h);
|
|
||||||
if (this_link->res != CURLE_OK) {
|
/* start downloading the base URL */
|
||||||
|
head_link->res = curl_easy_perform(head_link->curl);
|
||||||
|
|
||||||
|
/* if downloading base URL failed */
|
||||||
|
if (head_link->res != CURLE_OK) {
|
||||||
fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL");
|
fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL");
|
||||||
LinkTable_free(linktbl);
|
LinkTable_free(linktbl);
|
||||||
linktbl = NULL;
|
linktbl = NULL;
|
||||||
|
return linktbl;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* Otherwise parsed the received data */
|
||||||
|
GumboOutput* output = gumbo_parse(head_link->body);
|
||||||
|
HTML_to_LinkTable(output->root, linktbl);
|
||||||
|
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
||||||
|
|
||||||
|
/* Fill in the link table */
|
||||||
|
LinkTable_fill(linktbl);
|
||||||
return linktbl;
|
return linktbl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,13 +98,42 @@ void LinkTable_add(LinkTable *linktbl, Link *link)
|
||||||
linktbl->links[linktbl->num - 1] = link;
|
linktbl->links[linktbl->num - 1] = link;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void LinkTable_fill(LinkTable *linktbl)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < linktbl->num; i++) {
|
||||||
|
Link *this_link = linktbl->links[i];
|
||||||
|
if (this_link->type == LINK_UNKNOWN) {
|
||||||
|
CURL *curl = this_link->curl;
|
||||||
|
char *url;
|
||||||
|
curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &url);
|
||||||
|
url = url_append(linktbl->links[0]->p_url, this_link->p_url);
|
||||||
|
curl_easy_setopt(curl, CURLOPT_URL, url);
|
||||||
|
free(url);
|
||||||
|
curl_easy_setopt(curl, CURLOPT_NOBODY, 1);
|
||||||
|
curl_easy_perform(curl);
|
||||||
|
double cl;
|
||||||
|
curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
|
||||||
|
if (cl == -1) {
|
||||||
|
this_link->content_length = 0;
|
||||||
|
this_link->type = LINK_DIR;
|
||||||
|
} else {
|
||||||
|
this_link->content_length = cl;
|
||||||
|
this_link->type = LINK_FILE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void LinkTable_print(LinkTable *linktbl)
|
void LinkTable_print(LinkTable *linktbl)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < linktbl->num; i++) {
|
for (int i = 0; i < linktbl->num; i++) {
|
||||||
printf("%d %c %s\n",
|
Link *this_link = linktbl->links[i];
|
||||||
|
printf("%d %c %lu %s\n",
|
||||||
i,
|
i,
|
||||||
linktbl->links[i]->type,
|
this_link->type,
|
||||||
linktbl->links[i]->p_url);
|
this_link->content_length,
|
||||||
|
this_link->p_url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,8 +190,7 @@ char *url_upper(const char *url)
|
||||||
const char *pt = strrchr(url, '/');
|
const char *pt = strrchr(url, '/');
|
||||||
/* +1 for the '/' */
|
/* +1 for the '/' */
|
||||||
size_t len = pt - url + 1;
|
size_t len = pt - url + 1;
|
||||||
char *str = malloc(len* sizeof(char));
|
char *str = strndup(url, len);
|
||||||
strncpy(str, url, len);
|
|
||||||
str[len] = '\0';
|
str[len] = '\0';
|
||||||
return str;
|
return str;
|
||||||
}
|
}
|
||||||
|
|
6
link.h
6
link.h
|
@ -22,6 +22,12 @@ void LinkTable_free(LinkTable *linktbl);
|
||||||
/** \brief add a link to the link table */
|
/** \brief add a link to the link table */
|
||||||
void LinkTable_add(LinkTable *linktbl, Link *link);
|
void LinkTable_add(LinkTable *linktbl, Link *link);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \brief fill the LinkTable
|
||||||
|
* \details fill the LinkTable with link type information
|
||||||
|
*/
|
||||||
|
void LinkTable_fill(LinkTable *linktbl);
|
||||||
|
|
||||||
/** \brief print a LinkTable */
|
/** \brief print a LinkTable */
|
||||||
void LinkTable_print(LinkTable *linktbl);
|
void LinkTable_print(LinkTable *linktbl);
|
||||||
|
|
||||||
|
|
6
test.c
6
test.c
|
@ -24,10 +24,8 @@ void gumbo_test()
|
||||||
printf("--- start of gumbo_test ---\n");
|
printf("--- start of gumbo_test ---\n");
|
||||||
|
|
||||||
LinkTable *linktbl = LinkTable_new(
|
LinkTable *linktbl = LinkTable_new(
|
||||||
"https://cdimage.debian.org/debian-cd/current/amd64/iso-dvd/");
|
"https://www.fangfufu.co.uk/~fangfufu/test/");
|
||||||
GumboOutput* output = gumbo_parse(linktbl->links[0]->data);
|
|
||||||
HTML_to_LinkTable(output->root, linktbl);
|
|
||||||
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
|
||||||
LinkTable_print(linktbl);
|
LinkTable_print(linktbl);
|
||||||
LinkTable_free(linktbl);
|
LinkTable_free(linktbl);
|
||||||
printf("--- end of gumbo_test ---\n\n");
|
printf("--- end of gumbo_test ---\n\n");
|
||||||
|
|
Loading…
Reference in New Issue