split link related function to link.h

2018-07-26 10:29:44 +01:00 · 2018-07-26 10:29:44 +01:00 · 684831a961
parent 46c98be116
commit 684831a961
7 changed files with 531 additions and 498 deletions
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 CC=gcc
 CFLAGS= -g -O2 -Wall -Wextra -lgumbo -lcurl -lfuse -lcrypto \
 	-D_FILE_OFFSET_BITS=64
-OBJ = main.o network.o fuse_local.o
+OBJ = main.o network.o fuse_local.o link.o

 %.o: %.c
 	$(CC) -c -o $@ $< $(CFLAGS)
--- a/fuse_local.c
+++ b/fuse_local.c
@ -1,5 +1,6 @@
 #include "fuse_local.h"

+#include "link.h"
 #include "network.h"

 #include <errno.h>
--- a/link.c
+++ b/link.c
@ -0,0 +1,434 @@
+#include "link.h"
+
+#include "network.h"
+
+#include <gumbo.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+
+#define HTTP_OK 200
+#define HTTP_PARTIAL_CONTENT 206
+#define HTTP_RANGE_NOT_SATISFIABLE 416
+
+/* ---------------- External variables -----------------------*/
+LinkTable *ROOT_LINK_TBL;
+
+static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl);
+static Link *Link_new(const char *p_url, LinkType type);
+static CURL *Link_to_curl(Link *link);
+static void Link_get_stat(Link *this_link);
+static void LinkTable_add(LinkTable *linktbl, Link *link);
+void LinkTable_fill(LinkTable *linktbl);
+static void LinkTable_free(LinkTable *linktbl);
+static void LinkTable_print(LinkTable *linktbl);
+static Link *path_to_Link_recursive(char *path, LinkTable *linktbl);
+static LinkType p_url_type(const char *p_url);
+static char *url_append(const char *url, const char *sublink);
+
+/**
+ * Shamelessly copied and pasted from:
+ * https://github.com/google/gumbo-parser/blob/master/examples/find_links.cc
+ */
+static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl)
+{
+    if (node->type != GUMBO_NODE_ELEMENT) {
+        return;
+    }
+    GumboAttribute* href;
+
+    if (node->v.element.tag == GUMBO_TAG_A &&
+        (href = gumbo_get_attribute(&node->v.element.attributes, "href"))) {
+        /* if it is valid, copy the link onto the heap */
+        LinkType type = p_url_type(href->value);
+    if (type) {
+        LinkTable_add(linktbl, Link_new(href->value, type));
+    }
+        }
+        /* Note the recursive call, lol. */
+        GumboVector *children = &node->v.element.children;
+        for (size_t i = 0; i < children->length; ++i) {
+            HTML_to_LinkTable((GumboNode*)children->data[i], linktbl);
+        }
+        return;
+}
+
+static Link *Link_new(const char *p_url, LinkType type)
+{
+    Link *link = calloc(1, sizeof(Link));
+    if (!link) {
+        fprintf(stderr, "Link_new(): calloc failure!\n");
+        exit(EXIT_FAILURE);
+    }
+    strncpy(link->p_url, p_url, LINK_LEN_MAX);
+    link->type = type;
+
+    /* remove the '/' from p_url if it exists */
+    char *c = &(link->p_url[strnlen(link->p_url, LINK_LEN_MAX) - 1]);
+    if ( *c == '/') {
+        *c = '\0';
+    }
+
+    return link;
+}
+
+static CURL *Link_to_curl(Link *link)
+{
+    CURL *curl = curl_easy_init();
+    if (!curl) {
+        fprintf(stderr, "Link_to_curl(): curl_easy_init() failed!\n");
+    }
+
+    /* set up some basic curl stuff */
+    curl_easy_setopt(curl, CURLOPT_USERAGENT, "httpdirfs - \
+    https://github.com/fangfufu/httpdirfs");
+    curl_easy_setopt(curl, CURLOPT_VERBOSE, 0);
+    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
+    /* for following directories without the '/' */
+    curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 2);
+    curl_easy_setopt(curl, CURLOPT_URL, link->f_url);
+    curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1);
+    curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 15);
+    curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
+    /*
+     * The write back function pointer has to be set at curl handle creation,
+     * for thread safety
+     */
+    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
+
+    return curl;
+}
+
+void Link_get_stat(Link *this_link)
+{
+    fprintf(stderr, "Link_get_size(%s);\n", this_link->f_url);
+
+    if (this_link->type == LINK_FILE) {
+        CURL *curl = Link_to_curl(this_link);
+        curl_easy_setopt(curl, CURLOPT_NOBODY, 1);
+        curl_easy_setopt(curl, CURLOPT_FILETIME, 1L);
+
+        /*
+         * We need to put the variable on the heap, because otherwise the
+         * variable gets popped from the stack as the function returns.
+         *
+         * It gets freed in curl_multi_perform_once();
+         */
+        TransferStruct *transfer = malloc(sizeof(TransferStruct));
+        if (!transfer) {
+            fprintf(stderr, "Link_get_size(): malloc failed!\n");
+        }
+        transfer->link = this_link;
+        transfer->type = FILESTAT;
+        curl_easy_setopt(curl, CURLOPT_PRIVATE, transfer);
+
+        transfer_nonblocking(curl);
+    }
+}
+
+void Link_set_stat(Link* this_link, CURL *curl)
+{
+    long http_resp;
+    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
+    if (http_resp == HTTP_OK) {
+        double cl = 0;
+        curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
+        curl_easy_getinfo(curl, CURLINFO_FILETIME, &(this_link->time));
+
+        if (cl == -1) {
+            /* Turns out not to be a file after all */
+            this_link->content_length = 0;
+            this_link->type = LINK_DIR;
+        } else {
+            this_link->content_length = cl;
+            this_link->type = LINK_FILE;
+        }
+    } else {
+        this_link->type = LINK_INVALID;
+    }
+}
+
+static void LinkTable_add(LinkTable *linktbl, Link *link)
+{
+    linktbl->num++;
+    linktbl->links = realloc(linktbl->links, linktbl->num * sizeof(Link *));
+    if (!linktbl->links) {
+        fprintf(stderr, "LinkTable_add(): realloc failure!\n");
+        exit(EXIT_FAILURE);
+    }
+    linktbl->links[linktbl->num - 1] = link;
+}
+
+void LinkTable_fill(LinkTable *linktbl)
+{
+    Link *head_link = linktbl->links[0];
+    for (int i = 0; i < linktbl->num; i++) {
+        Link *this_link = linktbl->links[i];
+        if (this_link->type) {
+            char *url;
+            url = url_append(head_link->f_url, this_link->p_url);
+            strncpy(this_link->f_url, url, URL_LEN_MAX);
+            free(url);
+
+            char *unescaped_p_url;
+            unescaped_p_url = curl_easy_unescape(NULL, this_link->p_url, 0,
+                                                 NULL);
+            strncpy(this_link->p_url, unescaped_p_url, LINK_LEN_MAX);
+            curl_free(unescaped_p_url);
+
+            if (this_link->type == LINK_FILE && !(this_link->content_length)) {
+                Link_get_stat(this_link);
+            } else if (this_link->type == LINK_DIR) {
+                this_link->time = head_link->time;
+            }
+        }
+    }
+    /* Block until the LinkTable is filled up */
+    while (curl_multi_perform_once()) {
+        usleep(1000);
+    }
+}
+
+static void LinkTable_free(LinkTable *linktbl)
+{
+    for (int i = 0; i < linktbl->num; i++) {
+        free(linktbl->links[i]);
+    }
+    free(linktbl->links);
+    free(linktbl);
+}
+
+LinkTable *LinkTable_new(const char *url)
+{
+    fprintf(stderr, "LinkTable_new(%s);\n", url);
+
+    LinkTable *linktbl = calloc(1, sizeof(LinkTable));
+    if (!linktbl) {
+        fprintf(stderr, "LinkTable_new(): calloc failure!\n");
+        exit(EXIT_FAILURE);
+    }
+
+    /* populate the base URL */
+    LinkTable_add(linktbl, Link_new("/", LINK_HEAD));
+    Link *head_link = linktbl->links[0];
+    head_link->type = LINK_HEAD;
+    strncpy(head_link->f_url, url, URL_LEN_MAX);
+
+    /* start downloading the base URL */
+    CURL *curl = Link_to_curl(head_link);
+    MemoryStruct buf;
+    buf.size = 0;
+    buf.memory = NULL;
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&buf);
+
+    transfer_blocking(curl);
+
+    /* if downloading base URL failed */
+    long http_resp;
+    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
+    if (http_resp != HTTP_OK) {
+        fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL, \
+        URL: %s, HTTP %ld\n", url, http_resp);
+
+        LinkTable_free(linktbl);
+        linktbl = NULL;
+        return linktbl;
+    };
+    curl_easy_getinfo(curl, CURLINFO_FILETIME, &(head_link->time));
+    curl_easy_cleanup(curl);
+
+    /* Otherwise parsed the received data */
+    GumboOutput* output = gumbo_parse(buf.memory);
+    HTML_to_LinkTable(output->root, linktbl);
+    gumbo_destroy_output(&kGumboDefaultOptions, output);
+    free(buf.memory);
+
+    /* Fill in the link table */
+    LinkTable_fill(linktbl);
+    return linktbl;
+}
+
+/** \brief print a LinkTable */
+static void LinkTable_print(LinkTable *linktbl)
+{
+    fprintf(stderr, "--------------------------------------------\n");
+    fprintf(stderr, " LinkTable %p for %s\n", linktbl,
+            linktbl->links[0]->f_url);
+    fprintf(stderr, "--------------------------------------------\n");
+    for (int i = 0; i < linktbl->num; i++) {
+        Link *this_link = linktbl->links[i];
+        fprintf(stderr, "%d %c %lu %s %s\n",
+                i,
+                this_link->type,
+                this_link->content_length,
+                this_link->p_url,
+                this_link->f_url
+        );
+
+    }
+    fprintf(stderr, "--------------------------------------------\n");
+}
+
+Link *path_to_Link(const char *path)
+{
+    char *new_path = strndup(path, URL_LEN_MAX);
+    if (!new_path) {
+        fprintf(stderr, "path_to_Link(): cannot allocate memory\n");
+        exit(EXIT_FAILURE);
+    }
+    Link *link = path_to_Link_recursive(new_path, ROOT_LINK_TBL);
+    free(new_path);
+    return link;
+}
+
+static Link *path_to_Link_recursive(char *path, LinkTable *linktbl)
+{
+    /* skip the leading '/' if it exists */
+    if (*path == '/') {
+        path++;
+    }
+
+    /* remove the last '/' if it exists */
+    char *slash = &(path[strnlen(path, URL_LEN_MAX) - 1]);
+    if (*slash == '/') {
+        *slash = '\0';
+    }
+
+    slash = strchr(path, '/');
+    if ( slash == NULL ) {
+        /* We cannot find another '/', we have reached the last level */
+        for (int i = 1; i < linktbl->num; i++) {
+            if (!strncmp(path, linktbl->links[i]->p_url, LINK_LEN_MAX)) {
+                /* We found our link */
+                return linktbl->links[i];
+            }
+        }
+    } else {
+        /*
+         * We can still find '/', time to consume the path and traverse
+         * the tree structure
+         */
+
+        /*
+         * add termination mark to  the current string,
+         * effective create two substrings
+         */
+        *slash = '\0';
+        /* move the pointer past the '/' */
+        char *next_path = slash + 1;
+        for (int i = 1; i < linktbl->num; i++) {
+            if (!strncmp(path, linktbl->links[i]->p_url, LINK_LEN_MAX)) {
+                /* The next sub-directory exists */
+                if (!(linktbl->links[i]->next_table)) {
+                    linktbl->links[i]->next_table = LinkTable_new(
+                        linktbl->links[i]->f_url);
+                    fprintf(stderr, "Created new link table for %s\n",
+                            linktbl->links[i]->f_url);
+                    LinkTable_print(linktbl->links[i]->next_table);
+                }
+
+                return path_to_Link_recursive(next_path,
+                                              linktbl->links[i]->next_table);
+            }
+        }
+    }
+    return NULL;
+}
+
+long path_download(const char *path, char *output_buf, size_t size,
+                   off_t offset)
+{
+    Link *link;
+    link = path_to_Link(path);
+    if (!link) {
+        return -ENOENT;
+    }
+
+    size_t start = offset;
+    size_t end = start + size;
+    char range_str[64];
+    snprintf(range_str, sizeof(range_str), "%lu-%lu", start, end);
+
+    MemoryStruct buf;
+    buf.size = 0;
+    buf.memory = NULL;
+
+    fprintf(stderr, "path_download(%s, %s);\n",
+            path, range_str);
+
+    CURL *curl = Link_to_curl(link);
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&buf);
+    curl_easy_setopt(curl, CURLOPT_RANGE, range_str);
+
+    transfer_blocking(curl);
+
+    long http_resp;
+    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
+    if ( !(
+        (http_resp != HTTP_OK) ||
+        (http_resp != HTTP_PARTIAL_CONTENT) ||
+        (http_resp != HTTP_RANGE_NOT_SATISFIABLE)
+    )) {
+        fprintf(stderr, "path_download(): Could not download %s, HTTP %ld\n",
+                link->f_url, http_resp);
+        return -ENOENT;
+    }
+
+    double dl;
+    curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &dl);
+
+    size_t recv = dl;
+    if (recv > size) {
+        recv = size;
+    }
+
+    memmove(output_buf, buf.memory, recv);
+    curl_easy_cleanup(curl);
+    free(buf.memory);
+    return recv;
+}
+
+static LinkType p_url_type(const char *p_url)
+{
+    /* The link name has to start with alphanumerical character */
+    if (!isalnum(p_url[0])) {
+        return LINK_INVALID;
+    }
+
+    /* check for http:// and https:// */
+    if ( !strncmp(p_url, "http://", 7) || !strncmp(p_url, "https://", 8) ) {
+        return LINK_INVALID;
+    }
+
+    if ( p_url[strlen(p_url) - 1] == '/' ) {
+        return LINK_DIR;
+    }
+
+    return LINK_FILE;
+}
+
+static char *url_append(const char *url, const char *sublink)
+{
+    int needs_separator = 0;
+    if (url[strlen(url)-1] != '/') {
+        needs_separator = 1;
+    }
+
+    char *str;
+    size_t ul = strlen(url);
+    size_t sl = strlen(sublink);
+    str = calloc(ul + sl + needs_separator + 1, sizeof(char));
+    if (!str) {
+        fprintf(stderr, "url_append(): calloc failure!\n");
+        exit(EXIT_FAILURE);
+    }
+    strncpy(str, url, ul);
+    if (needs_separator) {
+        str[ul] = '/';
+    }
+    strncat(str, sublink, sl);
+    return str;
+}
--- a/link.h
+++ b/link.h
@ -0,0 +1,62 @@
+#ifndef LINK_H
+#define LINK_H
+
+#include <curl/curl.h>
+
+#include <stdlib.h>
+
+#define URL_LEN_MAX 2048
+#define LINK_LEN_MAX 255
+
+/** \brief the link type */
+typedef enum {
+    LINK_HEAD = 'H',
+    LINK_DIR = 'D',
+    LINK_FILE = 'F',
+    LINK_INVALID = '\0'
+} LinkType;
+
+/**
+ * \brief link table type
+ * \details index 0 contains the Link for the base URL
+ */
+typedef struct LinkTable LinkTable;
+
+/** \brief link data type */
+typedef struct Link Link;
+
+
+struct Link {
+    char p_url[LINK_LEN_MAX];
+    char f_url[URL_LEN_MAX];
+    LinkType type;
+    size_t content_length;
+    LinkTable *next_table;
+    long time;
+};
+
+struct LinkTable {
+    int num;
+    Link **links;
+};
+
+/** \brief root link table */
+extern LinkTable *ROOT_LINK_TBL;
+
+void Link_set_stat(Link* this_link, CURL *curl);
+
+/** \brief create a new LinkTable */
+LinkTable *LinkTable_new(const char *url);
+
+/**
+ * \brief download a link */
+/* \return the number of bytes downloaded
+ */
+long path_download(const char *path, char *output_buf, size_t size,
+                   off_t offset);
+
+/** \brief find the link associated with a path */
+Link *path_to_Link(const char *path);
+
+
+#endif
--- a/main.c
+++ b/main.c
@ -1,7 +1,8 @@
 #include "network.h"
 #include "fuse_local.h"

-#include <stdio.h>
+// #include <stdio.h>
+#include <stdlib.h>

 static void help();

--- a/network.c
+++ b/network.c
@ -1,43 +1,19 @@
 #include "network.h"

-#include <curl/curl.h>
-#include <gumbo.h>
+#include "link.h"
+
 #include <openssl/crypto.h>

-#include <ctype.h>
 #include <errno.h>
 #include <pthread.h>
 #include <string.h>
 #include <stdio.h>
 #include <unistd.h>

-#define HTTP_OK 200
-#define HTTP_PARTIAL_CONTENT 206
-#define HTTP_RANGE_NOT_SATISFIABLE 416
-
-/* ---------------- External variables -----------------------*/
-LinkTable *ROOT_LINK_TBL;
-
-/* ----------------- Local structs ---------------------------*/
-typedef struct {
-    char *memory;
-    size_t size;
-} MemoryStruct;
-
-typedef enum {
-    FILESTAT = 's',
-    DATA = 'd'
-} TransferType;
-
-typedef struct {
-    TransferType type;
-    int transferring;
-    Link *link;
-} TransferStruct;
+/* ----------------- External variables ----------------------*/
+CURLSH *curl_share;

 /* ----------------- Static variable ----------------------- */
-/** \brief curl shared interface */
-static CURLSH *curl_share;
 /** \brief curl multi interface handle */
 static CURLM *curl_multi;
 /** \brief  mutex for transfer functions */
@ -54,25 +30,8 @@ static void curl_callback_lock(CURL *handle, curl_lock_data data,
                               curl_lock_access access, void *userptr);
 static void curl_callback_unlock(CURL *handle, curl_lock_data data,
                                 void *userptr);
-static int curl_multi_perform_once();
 void curl_process_msgs(CURLMsg *curl_msg, int n_running_curl, int n_mesgs);
-static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl);
-static Link *Link_new(const char *p_url, LinkType type);
-static CURL *Link_to_curl(Link *link);
-void Link_get_stat(Link *this_link);
-static void Link_set_stat(Link* this_link, CURL *curl);
-static void LinkTable_add(LinkTable *linktbl, Link *link);
-void LinkTable_fill(LinkTable *linktbl);
-static void LinkTable_free(LinkTable *linktbl);
-static void LinkTable_print(LinkTable *linktbl);
-static void transfer_blocking(CURL *curl);
-static void transfer_nonblocking(CURL *curl);
-static Link *path_to_Link_recursive(char *path, LinkTable *linktbl);
-static LinkType p_url_type(const char *p_url);
 static unsigned long thread_id(void);
-static char *url_append(const char *url, const char *sublink);
-static size_t
-WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp);

 /* -------------------- Functions -------------------------- */
 static void crypto_lock_callback(int mode, int type, char *file, int line)
@ -123,7 +82,7 @@ static void curl_callback_unlock(CURL *handle, curl_lock_data data,
    pthread_mutex_unlock(&curl_lock);
 }

-static int curl_multi_perform_once()
+int curl_multi_perform_once()
 {
    pthread_mutex_lock(&transfer_lock);
    /* Get curl multi interface to perform pending tasks */
@ -231,249 +190,6 @@ void curl_process_msgs(CURLMsg *curl_msg, int n_running_curl, int n_mesgs)
    }
 }

-/**
- * Shamelessly copied and pasted from:
- * https://github.com/google/gumbo-parser/blob/master/examples/find_links.cc
- */
-static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl)
-{
-    if (node->type != GUMBO_NODE_ELEMENT) {
-        return;
-    }
-    GumboAttribute* href;
-
-    if (node->v.element.tag == GUMBO_TAG_A &&
-            (href = gumbo_get_attribute(&node->v.element.attributes, "href"))) {
-        /* if it is valid, copy the link onto the heap */
-        LinkType type = p_url_type(href->value);
-        if (type) {
-            LinkTable_add(linktbl, Link_new(href->value, type));
-        }
-    }
-    /* Note the recursive call, lol. */
-    GumboVector *children = &node->v.element.children;
-    for (size_t i = 0; i < children->length; ++i) {
-        HTML_to_LinkTable((GumboNode*)children->data[i], linktbl);
-    }
-    return;
-}
-
-static Link *Link_new(const char *p_url, LinkType type)
-{
-    Link *link = calloc(1, sizeof(Link));
-    if (!link) {
-        fprintf(stderr, "Link_new(): calloc failure!\n");
-        exit(EXIT_FAILURE);
-    }
-    strncpy(link->p_url, p_url, LINK_LEN_MAX);
-    link->type = type;
-
-    /* remove the '/' from p_url if it exists */
-    char *c = &(link->p_url[strnlen(link->p_url, LINK_LEN_MAX) - 1]);
-    if ( *c == '/') {
-        *c = '\0';
-    }
-
-    return link;
-}
-
-static CURL *Link_to_curl(Link *link)
-{
-    CURL *curl = curl_easy_init();
-    if (!curl) {
-        fprintf(stderr, "Link_to_curl(): curl_easy_init() failed!\n");
-    }
-
-    /* set up some basic curl stuff */
-    curl_easy_setopt(curl, CURLOPT_USERAGENT, "httpdirfs - \
-    https://github.com/fangfufu/httpdirfs");
-    curl_easy_setopt(curl, CURLOPT_VERBOSE, 0);
-    curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1);
-    /* for following directories without the '/' */
-    curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 2);
-    curl_easy_setopt(curl, CURLOPT_URL, link->f_url);
-    curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1);
-    curl_easy_setopt(curl, CURLOPT_CONNECTTIMEOUT, 15);
-    curl_easy_setopt(curl, CURLOPT_SHARE, curl_share);
-    /*
-     * The write back function pointer has to be set at curl handle creation,
-     * for thread safety
-     */
-    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
-
-    return curl;
-}
-
-void Link_get_stat(Link *this_link)
-{
-    fprintf(stderr, "Link_get_size(%s);\n", this_link->f_url);
-
-    if (this_link->type == LINK_FILE) {
-        CURL *curl = Link_to_curl(this_link);
-        curl_easy_setopt(curl, CURLOPT_NOBODY, 1);
-        curl_easy_setopt(curl, CURLOPT_FILETIME, 1L);
-
-        /*
-         * We need to put the variable on the heap, because otherwise the
-         * variable gets popped from the stack as the function returns.
-         *
-         * It gets freed in curl_multi_perform_once();
-         */
-        TransferStruct *transfer = malloc(sizeof(TransferStruct));
-        if (!transfer) {
-            fprintf(stderr, "Link_get_size(): malloc failed!\n");
-        }
-        transfer->link = this_link;
-        transfer->type = FILESTAT;
-        curl_easy_setopt(curl, CURLOPT_PRIVATE, transfer);
-
-        transfer_nonblocking(curl);
-    }
-}
-
-static void Link_set_stat(Link* this_link, CURL *curl)
-{
-    long http_resp;
-    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
-    if (http_resp == HTTP_OK) {
-        double cl = 0;
-        curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
-        curl_easy_getinfo(curl, CURLINFO_FILETIME, &(this_link->time));
-
-        if (cl == -1) {
-            /* Turns out not to be a file after all */
-            this_link->content_length = 0;
-            this_link->type = LINK_DIR;
-        } else {
-            this_link->content_length = cl;
-            this_link->type = LINK_FILE;
-        }
-    } else {
-        this_link->type = LINK_INVALID;
-    }
-}
-
-static void LinkTable_add(LinkTable *linktbl, Link *link)
-{
-    linktbl->num++;
-    linktbl->links = realloc(linktbl->links, linktbl->num * sizeof(Link *));
-    if (!linktbl->links) {
-        fprintf(stderr, "LinkTable_add(): realloc failure!\n");
-        exit(EXIT_FAILURE);
-    }
-    linktbl->links[linktbl->num - 1] = link;
-}
-
-void LinkTable_fill(LinkTable *linktbl)
-{
-    Link *head_link = linktbl->links[0];
-    for (int i = 0; i < linktbl->num; i++) {
-        Link *this_link = linktbl->links[i];
-        if (this_link->type) {
-            char *url;
-            url = url_append(head_link->f_url, this_link->p_url);
-            strncpy(this_link->f_url, url, URL_LEN_MAX);
-            free(url);
-
-            char *unescaped_p_url;
-            unescaped_p_url = curl_easy_unescape(NULL, this_link->p_url, 0,
-                                                 NULL);
-            strncpy(this_link->p_url, unescaped_p_url, LINK_LEN_MAX);
-            curl_free(unescaped_p_url);
-
-            if (this_link->type == LINK_FILE && !(this_link->content_length)) {
-                Link_get_stat(this_link);
-            } else if (this_link->type == LINK_DIR) {
-                this_link->time = head_link->time;
-            }
-        }
-    }
-    /* Block until the LinkTable is filled up */
-    while (curl_multi_perform_once()) {
-        usleep(1000);
-    }
-}
-
-static void LinkTable_free(LinkTable *linktbl)
-{
-    for (int i = 0; i < linktbl->num; i++) {
-        free(linktbl->links[i]);
-    }
-    free(linktbl->links);
-    free(linktbl);
-}
-
-LinkTable *LinkTable_new(const char *url)
-{
-    fprintf(stderr, "LinkTable_new(%s);\n", url);
-
-    LinkTable *linktbl = calloc(1, sizeof(LinkTable));
-    if (!linktbl) {
-        fprintf(stderr, "LinkTable_new(): calloc failure!\n");
-        exit(EXIT_FAILURE);
-    }
-
-    /* populate the base URL */
-    LinkTable_add(linktbl, Link_new("/", LINK_HEAD));
-    Link *head_link = linktbl->links[0];
-    head_link->type = LINK_HEAD;
-    strncpy(head_link->f_url, url, URL_LEN_MAX);
-
-    /* start downloading the base URL */
-    CURL *curl = Link_to_curl(head_link);
-    MemoryStruct buf;
-    buf.size = 0;
-    buf.memory = NULL;
-    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&buf);
-
-    transfer_blocking(curl);
-
-    /* if downloading base URL failed */
-    long http_resp;
-    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
-    if (http_resp != HTTP_OK) {
-        fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL, \
-URL: %s, HTTP %ld\n", url, http_resp);
-
-        LinkTable_free(linktbl);
-        linktbl = NULL;
-        return linktbl;
-    };
-    curl_easy_getinfo(curl, CURLINFO_FILETIME, &(head_link->time));
-    curl_easy_cleanup(curl);
-
-    /* Otherwise parsed the received data */
-    GumboOutput* output = gumbo_parse(buf.memory);
-    HTML_to_LinkTable(output->root, linktbl);
-    gumbo_destroy_output(&kGumboDefaultOptions, output);
-    free(buf.memory);
-
-    /* Fill in the link table */
-    LinkTable_fill(linktbl);
-    return linktbl;
-}
-
-/** \brief print a LinkTable */
-static void LinkTable_print(LinkTable *linktbl)
-{
-    fprintf(stderr, "--------------------------------------------\n");
-    fprintf(stderr, " LinkTable %p for %s\n", linktbl,
-            linktbl->links[0]->f_url);
-    fprintf(stderr, "--------------------------------------------\n");
-    for (int i = 0; i < linktbl->num; i++) {
-        Link *this_link = linktbl->links[i];
-        fprintf(stderr, "%d %c %lu %s %s\n",
-                i,
-                this_link->type,
-                this_link->content_length,
-                this_link->p_url,
-                this_link->f_url
-               );
-
-    }
-    fprintf(stderr, "--------------------------------------------\n");
-}
-
 void network_init(const char *url)
 {

@ -532,7 +248,7 @@ void network_init(const char *url)
    ROOT_LINK_TBL = LinkTable_new(url);
 }

-static void transfer_blocking(CURL *curl)
+void transfer_blocking(CURL *curl)
 {
    /*
     * We don't need to malloc here, as the transfer is finished before
@ -559,7 +275,7 @@ static void transfer_blocking(CURL *curl)
    }
 }

-static void transfer_nonblocking(CURL *curl)
+void transfer_nonblocking(CURL *curl)
 {
    pthread_mutex_lock(&transfer_lock);
    CURLMcode res = curl_multi_add_handle(curl_multi, curl);
@ -572,144 +288,6 @@ static void transfer_nonblocking(CURL *curl)
    }
 }

-Link *path_to_Link(const char *path)
-{
-    char *new_path = strndup(path, URL_LEN_MAX);
-    if (!new_path) {
-        fprintf(stderr, "path_to_Link(): cannot allocate memory\n");
-        exit(EXIT_FAILURE);
-    }
-    Link *link = path_to_Link_recursive(new_path, ROOT_LINK_TBL);
-    free(new_path);
-    return link;
-}
-
-static Link *path_to_Link_recursive(char *path, LinkTable *linktbl)
-{
-    /* skip the leading '/' if it exists */
-    if (*path == '/') {
-        path++;
-    }
-
-    /* remove the last '/' if it exists */
-    char *slash = &(path[strnlen(path, URL_LEN_MAX) - 1]);
-    if (*slash == '/') {
-        *slash = '\0';
-    }
-
-    slash = strchr(path, '/');
-    if ( slash == NULL ) {
-        /* We cannot find another '/', we have reached the last level */
-        for (int i = 1; i < linktbl->num; i++) {
-            if (!strncmp(path, linktbl->links[i]->p_url, LINK_LEN_MAX)) {
-                /* We found our link */
-                return linktbl->links[i];
-            }
-        }
-    } else {
-        /*
-         * We can still find '/', time to consume the path and traverse
-         * the tree structure
-         */
-
-        /*
-         * add termination mark to  the current string,
-         * effective create two substrings
-         */
-        *slash = '\0';
-        /* move the pointer past the '/' */
-        char *next_path = slash + 1;
-        for (int i = 1; i < linktbl->num; i++) {
-            if (!strncmp(path, linktbl->links[i]->p_url, LINK_LEN_MAX)) {
-                /* The next sub-directory exists */
-                if (!(linktbl->links[i]->next_table)) {
-                    linktbl->links[i]->next_table = LinkTable_new(
-                                                        linktbl->links[i]->f_url);
-                    fprintf(stderr, "Created new link table for %s\n",
-                            linktbl->links[i]->f_url);
-                    LinkTable_print(linktbl->links[i]->next_table);
-                }
-
-                return path_to_Link_recursive(next_path,
-                                              linktbl->links[i]->next_table);
-            }
-        }
-    }
-    return NULL;
-}
-
-long path_download(const char *path, char *output_buf, size_t size,
-                   off_t offset)
-{
-    Link *link;
-    link = path_to_Link(path);
-    if (!link) {
-        return -ENOENT;
-    }
-
-    size_t start = offset;
-    size_t end = start + size;
-    char range_str[64];
-    snprintf(range_str, sizeof(range_str), "%lu-%lu", start, end);
-
-    MemoryStruct buf;
-    buf.size = 0;
-    buf.memory = NULL;
-
-    fprintf(stderr, "path_download(%s, %s);\n",
-            path, range_str);
-
-    CURL *curl = Link_to_curl(link);
-    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&buf);
-    curl_easy_setopt(curl, CURLOPT_RANGE, range_str);
-
-    transfer_blocking(curl);
-
-    long http_resp;
-    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
-    if ( !(
-                (http_resp != HTTP_OK) ||
-                (http_resp != HTTP_PARTIAL_CONTENT) ||
-                (http_resp != HTTP_RANGE_NOT_SATISFIABLE)
-            )) {
-        fprintf(stderr, "path_download(): Could not download %s, HTTP %ld\n",
-                link->f_url, http_resp);
-        return -ENOENT;
-    }
-
-    double dl;
-    curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &dl);
-
-    size_t recv = dl;
-    if (recv > size) {
-        recv = size;
-    }
-
-    memmove(output_buf, buf.memory, recv);
-    curl_easy_cleanup(curl);
-    free(buf.memory);
-    return recv;
-}
-
-static LinkType p_url_type(const char *p_url)
-{
-    /* The link name has to start with alphanumerical character */
-    if (!isalnum(p_url[0])) {
-        return LINK_INVALID;
-    }
-
-    /* check for http:// and https:// */
-    if ( !strncmp(p_url, "http://", 7) || !strncmp(p_url, "https://", 8) ) {
-        return LINK_INVALID;
-    }
-
-    if ( p_url[strlen(p_url) - 1] == '/' ) {
-        return LINK_DIR;
-    }
-
-    return LINK_FILE;
-}
-
 static unsigned long thread_id(void)
 {
    unsigned long ret;
@ -718,30 +296,7 @@ static unsigned long thread_id(void)
    return ret;
 }

-static char *url_append(const char *url, const char *sublink)
-{
-    int needs_separator = 0;
-    if (url[strlen(url)-1] != '/') {
-        needs_separator = 1;
-    }
-
-    char *str;
-    size_t ul = strlen(url);
-    size_t sl = strlen(sublink);
-    str = calloc(ul + sl + needs_separator + 1, sizeof(char));
-    if (!str) {
-        fprintf(stderr, "url_append(): calloc failure!\n");
-        exit(EXIT_FAILURE);
-    }
-    strncpy(str, url, ul);
-    if (needs_separator) {
-        str[ul] = '/';
-    }
-    strncat(str, sublink, sl);
-    return str;
-}
-
-static size_t
+size_t
 WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
 {
    size_t realsize = size * nmemb;
--- a/network.h
+++ b/network.h
@ -1,61 +1,41 @@
 #ifndef NETWORK_H
 #define NETWORK_H

-#include <stdlib.h>
+#include "link.h"
+
+#include <curl/curl.h>

-#define URL_LEN_MAX 2048
-#define LINK_LEN_MAX 255
 #define CURL_MULTI_MAX_CONNECTION 20

-/** \brief the link type */
+typedef struct {
+    char *memory;
+    size_t size;
+} MemoryStruct;
+
 typedef enum {
-    LINK_HEAD = 'H',
-    LINK_DIR = 'D',
-    LINK_FILE = 'F',
-    LINK_INVALID = '\0'
-} LinkType;
+    FILESTAT = 's',
+    DATA = 'd'
+} TransferType;

-/**
- * \brief link table type
- * \details index 0 contains the Link for the base URL
- */
-typedef struct LinkTable LinkTable;
+typedef struct {
+    TransferType type;
+    int transferring;
+    Link *link;
+} TransferStruct;

-/** \brief link data type */
-typedef struct Link Link;
+/** \brief curl shared interface */
+extern CURLSH *curl_share;

-
-struct Link {
-    char p_url[LINK_LEN_MAX];
-    char f_url[URL_LEN_MAX];
-    LinkType type;
-    size_t content_length;
-    LinkTable *next_table;
-    long time;
-};
-
-struct LinkTable {
-    int num;
-    Link **links;
-};
-
-/** \brief root link table */
-extern LinkTable *ROOT_LINK_TBL;
+int curl_multi_perform_once();

 /** \brief Initialise the network module */
 void network_init(const char *url);

-/**
- * \brief download a link */
-/* \return the number of bytes downloaded
- */
-long path_download(const char *path, char *output_buf, size_t size,
-                   off_t offset);
+void transfer_blocking(CURL *curl);

-/** \brief create a new LinkTable */
-LinkTable *LinkTable_new(const char *url);
+void transfer_nonblocking(CURL *curl);

-/** \brief find the link associated with a path */
-Link *path_to_Link(const char *path);
+size_t
+WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp);

 #endif