finished writing the new linktable creation function
This commit is contained in:
parent
442d00141e
commit
27166c429f
12
data.h
12
data.h
|
@ -19,22 +19,22 @@ typedef enum {
|
|||
|
||||
/** \brief link data type */
|
||||
typedef struct {
|
||||
char *p_url;
|
||||
char p_url[255];
|
||||
LinkType type;
|
||||
CURL *curl_h;
|
||||
CURLcode res; /* initialise to -1, because all CURLcode are positive */
|
||||
char *data;
|
||||
size_t data_sz;
|
||||
curl_off_t content_length;
|
||||
} Link;
|
||||
|
||||
/** \brief link table type */
|
||||
/**
|
||||
* \brief link table type
|
||||
* \details index 0 contains the Link for the base URL
|
||||
*/
|
||||
typedef struct {
|
||||
int num;
|
||||
Link **links;
|
||||
} LinkTable;
|
||||
|
||||
|
||||
|
||||
extern char *BASE_URL;
|
||||
|
||||
#endif
|
||||
|
|
11
http.c
11
http.c
|
@ -1,11 +0,0 @@
|
|||
/**
|
||||
* \file http.c
|
||||
* \todo WARNING please fix url_feof
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "http.h"
|
||||
|
46
link.c
46
link.c
|
@ -3,37 +3,67 @@
|
|||
#include "link.h"
|
||||
#include "string.h"
|
||||
|
||||
static size_t
|
||||
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
|
||||
{
|
||||
size_t realsize = size * nmemb;
|
||||
Link *mem = (Link *)userp;
|
||||
|
||||
mem->data = realloc(mem->data, mem->data_sz + realsize + 1);
|
||||
if(mem->data == NULL) {
|
||||
/* out of memory! */
|
||||
printf("not enough memory (realloc returned NULL)\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
memcpy(&(mem->data[mem->data_sz]), contents, realsize);
|
||||
mem->data_sz += realsize;
|
||||
mem->data[mem->data_sz] = 0;
|
||||
|
||||
return realsize;
|
||||
}
|
||||
|
||||
Link *Link_new(const char *p_url)
|
||||
{
|
||||
Link *link = calloc(1, sizeof(Link));
|
||||
|
||||
size_t p_url_len = strnlen(p_url, LINK_LEN_MAX) + 1;
|
||||
link->p_url = malloc(p_url_len);
|
||||
link->p_url = strncpy(link->p_url, p_url, p_url_len);
|
||||
strncpy(link->p_url, p_url, p_url_len);
|
||||
|
||||
link->type = LINK_UNKNOWN;
|
||||
link->curl_h = curl_easy_init();
|
||||
link->res = -1;
|
||||
link->data = NULL;
|
||||
link->data_sz = 0;
|
||||
link->data = malloc(1);
|
||||
|
||||
/* set up some basic curl stuff */
|
||||
curl_easy_setopt(link->curl_h, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
|
||||
curl_easy_setopt(link->curl_h, CURLOPT_WRITEDATA, (void *)link->data);
|
||||
curl_easy_setopt(link->curl_h, CURLOPT_USERAGENT, "mount-http-dir/libcurl");
|
||||
|
||||
return link;
|
||||
}
|
||||
|
||||
void Link_free(Link *link)
|
||||
{
|
||||
free(link->p_url);
|
||||
curl_easy_cleanup(link->curl_h);
|
||||
free(link->data);
|
||||
free(link);
|
||||
link = NULL;
|
||||
}
|
||||
|
||||
LinkTable *LinkTable_new()
|
||||
LinkTable *LinkTable_new(const char *url)
|
||||
{
|
||||
LinkTable *linktbl = calloc(1, sizeof(LinkTable));
|
||||
linktbl->num = 0;
|
||||
linktbl->links = NULL;
|
||||
/* populate the base URL */
|
||||
LinkTable_add(linktbl, Link_new("/"));
|
||||
Link *this_link = linktbl->links[0];
|
||||
curl_easy_setopt(this_link->curl_h, CURLOPT_URL, url);
|
||||
this_link->res = curl_easy_perform(this_link->curl_h);
|
||||
if (this_link->res != CURLE_OK) {
|
||||
fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL");
|
||||
LinkTable_free(linktbl);
|
||||
linktbl = NULL;
|
||||
};
|
||||
return linktbl;
|
||||
}
|
||||
|
||||
|
|
2
link.h
2
link.h
|
@ -14,7 +14,7 @@ Link *Link_new();
|
|||
void Link_free(Link *link);
|
||||
|
||||
/** \brief make a new LinkTable */
|
||||
LinkTable *LinkTable_new();
|
||||
LinkTable *LinkTable_new(const char *url);
|
||||
|
||||
/** \brief free a LinkTable */
|
||||
void LinkTable_free(LinkTable *linktbl);
|
||||
|
|
18
main.c
18
main.c
|
@ -2,15 +2,25 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "link.h"
|
||||
|
||||
#include "test.h"
|
||||
|
||||
void init()
|
||||
{
|
||||
curl_global_init(CURL_GLOBAL_ALL);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
(void) argc;
|
||||
(void) argv;
|
||||
gumbo_test(argc, argv);
|
||||
// url_test();
|
||||
http_test();
|
||||
|
||||
init();
|
||||
|
||||
gumbo_test();
|
||||
url_test();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
34
test.c
34
test.c
|
@ -6,13 +6,7 @@
|
|||
|
||||
#include "link.h"
|
||||
#include "test.h"
|
||||
#include "http.h"
|
||||
|
||||
|
||||
int http_test()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
void url_test()
|
||||
{
|
||||
printf("--- start of url_test ---\n");
|
||||
|
@ -25,33 +19,13 @@ void url_test()
|
|||
printf("--- end of url_test ---\n\n");
|
||||
}
|
||||
|
||||
void gumbo_test(int argc, char **argv)
|
||||
void gumbo_test()
|
||||
{
|
||||
printf("--- start of gumbo_test ---\n");
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "Usage: find_links <html filename>.\n");
|
||||
}
|
||||
const char* filename = argv[1];
|
||||
|
||||
FILE *fp;
|
||||
fp = fopen(filename, "r");
|
||||
|
||||
if (!fp) {
|
||||
fprintf(stderr, "File %s not found!\n", filename);
|
||||
}
|
||||
|
||||
fseek(fp, 0L, SEEK_END);
|
||||
unsigned long filesize = ftell(fp);
|
||||
rewind(fp);
|
||||
|
||||
char* contents = (char*) malloc(sizeof(char) * filesize);
|
||||
if (fread(contents, 1, filesize, fp) != filesize) {
|
||||
fprintf(stderr, "Read error, %s\n", strerror(errno));
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
GumboOutput* output = gumbo_parse(contents);
|
||||
LinkTable *linktbl = LinkTable_new();
|
||||
LinkTable *linktbl = LinkTable_new(
|
||||
"https://cdimage.debian.org/debian-cd/current/amd64/iso-dvd/");
|
||||
GumboOutput* output = gumbo_parse(linktbl->links[0]->data);
|
||||
HTML_to_LinkTable(output->root, linktbl);
|
||||
gumbo_destroy_output(&kGumboDefaultOptions, output);
|
||||
LinkTable_print(linktbl);
|
||||
|
|
Loading…
Reference in New Issue