From 98ac7d19e84aa0d5d0d86df3e7b5cf89ffa463ed Mon Sep 17 00:00:00 2001 From: Fufu Fang Date: Fri, 20 Jul 2018 13:59:25 +0100 Subject: [PATCH] finished the link table filler --- data.h | 10 +++---- link.c | 90 ++++++++++++++++++++++++++++++++++++++++++---------------- link.h | 6 ++++ test.c | 6 ++-- 4 files changed, 78 insertions(+), 34 deletions(-) diff --git a/data.h b/data.h index 34dbf74..8fc3129 100644 --- a/data.h +++ b/data.h @@ -21,11 +21,11 @@ typedef enum { typedef struct { char p_url[255]; LinkType type; - CURL *curl_h; - CURLcode res; /* initialise to -1, because all CURLcode are positive */ - char *data; - size_t data_sz; - curl_off_t content_length; + CURL *curl; + CURLcode res; /* initialise to -1, as all valid CURLcode are positive */ + char *body; + size_t body_sz; + size_t content_length; } Link; /** diff --git a/link.c b/link.c index 903ee1a..45129a2 100644 --- a/link.c +++ b/link.c @@ -1,7 +1,7 @@ #include -#include "link.h" #include "string.h" +#include "link.h" static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) @@ -9,16 +9,16 @@ WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) size_t realsize = size * nmemb; Link *mem = (Link *)userp; - mem->data = realloc(mem->data, mem->data_sz + realsize + 1); - if(mem->data == NULL) { + mem->body = realloc(mem->body, mem->body_sz + realsize + 1); + if(mem->body == NULL) { /* out of memory! */ printf("not enough memory (realloc returned NULL)\n"); return 0; } - memcpy(&(mem->data[mem->data_sz]), contents, realsize); - mem->data_sz += realsize; - mem->data[mem->data_sz] = 0; + memcpy(&(mem->body[mem->body_sz]), contents, realsize); + mem->body_sz += realsize; + mem->body[mem->body_sz] = 0; return realsize; } @@ -27,26 +27,24 @@ Link *Link_new(const char *p_url) { Link *link = calloc(1, sizeof(Link)); - size_t p_url_len = strnlen(p_url, LINK_LEN_MAX) + 1; - strncpy(link->p_url, p_url, p_url_len); + strncpy(link->p_url, p_url, LINK_LEN_MAX); link->type = LINK_UNKNOWN; - link->curl_h = curl_easy_init(); + link->curl = curl_easy_init(); link->res = -1; - link->data = malloc(1); /* set up some basic curl stuff */ - curl_easy_setopt(link->curl_h, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); - curl_easy_setopt(link->curl_h, CURLOPT_WRITEDATA, (void *)link->data); - curl_easy_setopt(link->curl_h, CURLOPT_USERAGENT, "mount-http-dir/libcurl"); + curl_easy_setopt(link->curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); + curl_easy_setopt(link->curl, CURLOPT_WRITEDATA, (void *)link); + curl_easy_setopt(link->curl, CURLOPT_USERAGENT, "mount-http-dir/libcurl"); return link; } void Link_free(Link *link) { - curl_easy_cleanup(link->curl_h); - free(link->data); + curl_easy_cleanup(link->curl); + free(link->body); free(link); link = NULL; } @@ -54,16 +52,30 @@ void Link_free(Link *link) LinkTable *LinkTable_new(const char *url) { LinkTable *linktbl = calloc(1, sizeof(LinkTable)); + /* populate the base URL */ - LinkTable_add(linktbl, Link_new("/")); - Link *this_link = linktbl->links[0]; - curl_easy_setopt(this_link->curl_h, CURLOPT_URL, url); - this_link->res = curl_easy_perform(this_link->curl_h); - if (this_link->res != CURLE_OK) { + LinkTable_add(linktbl, Link_new(url)); + Link *head_link = linktbl->links[0]; + curl_easy_setopt(head_link->curl, CURLOPT_URL, url); + + /* start downloading the base URL */ + head_link->res = curl_easy_perform(head_link->curl); + + /* if downloading base URL failed */ + if (head_link->res != CURLE_OK) { fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL"); LinkTable_free(linktbl); linktbl = NULL; + return linktbl; }; + + /* Otherwise parsed the received data */ + GumboOutput* output = gumbo_parse(head_link->body); + HTML_to_LinkTable(output->root, linktbl); + gumbo_destroy_output(&kGumboDefaultOptions, output); + + /* Fill in the link table */ + LinkTable_fill(linktbl); return linktbl; } @@ -86,13 +98,42 @@ void LinkTable_add(LinkTable *linktbl, Link *link) linktbl->links[linktbl->num - 1] = link; } +void LinkTable_fill(LinkTable *linktbl) +{ + for (int i = 0; i < linktbl->num; i++) { + Link *this_link = linktbl->links[i]; + if (this_link->type == LINK_UNKNOWN) { + CURL *curl = this_link->curl; + char *url; + curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, &url); + url = url_append(linktbl->links[0]->p_url, this_link->p_url); + curl_easy_setopt(curl, CURLOPT_URL, url); + free(url); + curl_easy_setopt(curl, CURLOPT_NOBODY, 1); + curl_easy_perform(curl); + double cl; + curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl); + if (cl == -1) { + this_link->content_length = 0; + this_link->type = LINK_DIR; + } else { + this_link->content_length = cl; + this_link->type = LINK_FILE; + } + } + } +} + + void LinkTable_print(LinkTable *linktbl) { for (int i = 0; i < linktbl->num; i++) { - printf("%d %c %s\n", + Link *this_link = linktbl->links[i]; + printf("%d %c %lu %s\n", i, - linktbl->links[i]->type, - linktbl->links[i]->p_url); + this_link->type, + this_link->content_length, + this_link->p_url); } } @@ -149,8 +190,7 @@ char *url_upper(const char *url) const char *pt = strrchr(url, '/'); /* +1 for the '/' */ size_t len = pt - url + 1; - char *str = malloc(len* sizeof(char)); - strncpy(str, url, len); + char *str = strndup(url, len); str[len] = '\0'; return str; } diff --git a/link.h b/link.h index 64e7c93..2645d27 100644 --- a/link.h +++ b/link.h @@ -22,6 +22,12 @@ void LinkTable_free(LinkTable *linktbl); /** \brief add a link to the link table */ void LinkTable_add(LinkTable *linktbl, Link *link); +/** + * \brief fill the LinkTable + * \details fill the LinkTable with link type information + */ +void LinkTable_fill(LinkTable *linktbl); + /** \brief print a LinkTable */ void LinkTable_print(LinkTable *linktbl); diff --git a/test.c b/test.c index 820dd4e..b1a9f45 100644 --- a/test.c +++ b/test.c @@ -24,10 +24,8 @@ void gumbo_test() printf("--- start of gumbo_test ---\n"); LinkTable *linktbl = LinkTable_new( - "https://cdimage.debian.org/debian-cd/current/amd64/iso-dvd/"); - GumboOutput* output = gumbo_parse(linktbl->links[0]->data); - HTML_to_LinkTable(output->root, linktbl); - gumbo_destroy_output(&kGumboDefaultOptions, output); + "https://www.fangfufu.co.uk/~fangfufu/test/"); + LinkTable_print(linktbl); LinkTable_free(linktbl); printf("--- end of gumbo_test ---\n\n");