#include "network.h" #include #include #include #include #include #include #include #include #include #define HTTP_OK 200 #define HTTP_PARTIAL_CONTENT 206 #define HTTP_RANGE_NOT_SATISFIABLE 416 /* ---------------- External variables -----------------------*/ LinkTable *ROOT_LINK_TBL; /* ----------------- Local structs ---------------------------*/ typedef struct { char *memory; size_t size; } MemoryStruct; typedef enum { FILESTAT = 's', DATA = 'd' } TransferType; typedef struct { TransferType type; int transferring; Link *link; } TransferStruct; /* ----------------- Static variable ----------------------- */ /** \brief curl shared interface */ static CURLSH *curl_share; /** \brief curl multi interface handle */ static CURLM *curl_multi; /** \brief pthread mutex for transfer functions */ static pthread_mutex_t transfer_lock; /** \brief the lock array for cryptographic functions */ static pthread_mutex_t *crypto_lockarray; /** \brief pthread mutex for curl itself */ static pthread_mutex_t curl_lock; /* ---------------- Static function prototype ---------------*/ static void crypto_lock_callback(int mode, int type, char *file, int line); static void crypto_lock_init(void); static void curl_lock_callback(CURL *handle, curl_lock_data data, curl_lock_access access, void *userptr); static void curl_unlock_callback(CURL *handle, curl_lock_data data, void *userptr); static int curl_multi_perform_once(); static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl); static Link *Link_new(const char *p_url, LinkType type); static CURL *Link_to_curl(Link *link); void Link_get_stat(Link *this_link); static void Link_set_stat(Link* this_link, CURL *curl); static void LinkTable_add(LinkTable *linktbl, Link *link); void LinkTable_fill(LinkTable *linktbl); static void LinkTable_free(LinkTable *linktbl); static void LinkTable_print(LinkTable *linktbl); static void transfer_blocking(CURL *curl); static void transfer_nonblocking(CURL *curl); static Link *path_to_Link_recursive(char *path, LinkTable *linktbl); static LinkType p_url_type(const char *p_url); static unsigned long thread_id(void); static char *url_append(const char *url, const char *sublink); static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp); /* -------------------- Functions -------------------------- */ static void crypto_lock_callback(int mode, int type, char *file, int line) { (void)file; (void)line; if(mode & CRYPTO_LOCK) { pthread_mutex_lock(&(crypto_lockarray[type])); } else { pthread_mutex_unlock(&(crypto_lockarray[type])); } } static void crypto_lock_init(void) { int i; crypto_lockarray = (pthread_mutex_t *)OPENSSL_malloc(CRYPTO_num_locks() * sizeof(pthread_mutex_t)); for(i = 0; i 0) { fprintf(stderr, "curl_multi_perform_once(): curl_multi_fdset: %d, %s\n", res, curl_multi_strerror(res)); exit(EXIT_FAILURE); } if(max_fd == -1) { /* * https://curl.haxx.se/libcurl/c/curl_multi_fdset.html * The above web page suggests sleeping for 100ms, unless * curl_multi_timeout() suggests something shorter. */ if (timeout > 100) { timeout = 100; } } /* timeout is in miliseconds */ struct timeval t; t.tv_sec = timeout/1000; /* seconds */ t.tv_usec = (timeout%1000)*1000; /* microseconds */ if(select(max_fd + 1, &read_fd_set, &write_fd_set, &exc_fd_set, &t) < 0) { fprintf(stderr, "curl_multi_perform_once(): select(%i,,,,%li): %i: %s\n", max_fd + 1, timeout, errno, strerror(errno)); exit(EXIT_FAILURE); } /* Process the message queue */ int n_mesgs; CURLMsg *curl_msg; while((curl_msg = curl_multi_info_read(curl_multi, &n_mesgs))) { if (curl_msg->msg == CURLMSG_DONE) { TransferStruct *transfer; CURL *curl = curl_msg->easy_handle; curl_easy_getinfo(curl_msg->easy_handle, CURLINFO_PRIVATE, &transfer); transfer->transferring = 0; char *url = NULL; if (curl_msg->data.result) { curl_easy_getinfo(curl, CURLINFO_EFFECTIVE_URL, url); fprintf(stderr, "curl_multi_perform_once(): %d - %s <%s>\n", curl_msg->data.result, curl_easy_strerror(curl_msg->data.result), url); sleep(1); } else { /* Transfer successful, query the file size */ if (transfer->type == FILESTAT) { Link_set_stat(transfer->link, curl); } } curl_multi_remove_handle(curl_multi, curl); /* clean up the handle, if we are querying the file size */ if (transfer->type == FILESTAT) { curl_easy_cleanup(curl); free(transfer); } } else { fprintf(stderr, "curl_multi_perform_once(): curl_msg->msg: %d\n", curl_msg->msg); } } pthread_mutex_unlock(&transfer_lock); return n_running_curl; } /** * Shamelessly copied and pasted from: * https://github.com/google/gumbo-parser/blob/master/examples/find_links.cc */ static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl) { if (node->type != GUMBO_NODE_ELEMENT) { return; } GumboAttribute* href; if (node->v.element.tag == GUMBO_TAG_A && (href = gumbo_get_attribute(&node->v.element.attributes, "href"))) { /* if it is valid, copy the link onto the heap */ LinkType type = p_url_type(href->value); if (type) { LinkTable_add(linktbl, Link_new(href->value, type)); } } /* Note the recursive call, lol. */ GumboVector *children = &node->v.element.children; for (size_t i = 0; i < children->length; ++i) { HTML_to_LinkTable((GumboNode*)children->data[i], linktbl); } return; } static Link *Link_new(const char *p_url, LinkType type) { Link *link = calloc(1, sizeof(Link)); if (!link) { fprintf(stderr, "Link_new(): calloc failure!\n"); exit(EXIT_FAILURE); } strncpy(link->p_url, p_url, LINK_LEN_MAX); link->type = type; /* remove the '/' from p_url if it exists */ char *c = &(link->p_url[strnlen(link->p_url, LINK_LEN_MAX) - 1]); if ( *c == '/') { *c = '\0'; } return link; } static CURL *Link_to_curl(Link *link) { CURL *curl = curl_easy_init(); if (!curl) { fprintf(stderr, "Link_to_curl(): curl_easy_init() failed!\n"); } /* set up some basic curl stuff */ curl_easy_setopt(curl, CURLOPT_USERAGENT, "httpdirfs - \ https://github.com/fangfufu/httpdirfs"); curl_easy_setopt(curl, CURLOPT_VERBOSE, 0); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); /* for following directories without the '/' */ curl_easy_setopt(curl, CURLOPT_MAXREDIRS, 2); curl_easy_setopt(curl, CURLOPT_URL, link->f_url); curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, 1); curl_easy_setopt(curl, CURLOPT_SHARE, curl_share); /* * The write back function pointer has to be set at curl handle creation, * for thread safety */ curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); return curl; } void Link_get_stat(Link *this_link) { fprintf(stderr, "Link_get_size(%s);\n", this_link->f_url); if (this_link->type == LINK_FILE) { CURL *curl = Link_to_curl(this_link); curl_easy_setopt(curl, CURLOPT_NOBODY, 1); curl_easy_setopt(curl, CURLOPT_FILETIME, 1L); /* * We need to put the variable on the heap, because otherwise the * variable gets popped from the stack as the function returns. * * It gets freed in curl_multi_perform_once(); */ TransferStruct *transfer = malloc(sizeof(TransferStruct)); if (!transfer) { fprintf(stderr, "Link_get_size(): malloc failed!\n"); } transfer->link = this_link; transfer->type = FILESTAT; curl_easy_setopt(curl, CURLOPT_PRIVATE, transfer); transfer_nonblocking(curl); } } static void Link_set_stat(Link* this_link, CURL *curl) { long http_resp; curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp); if (http_resp == HTTP_OK) { double cl = 0; curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl); curl_easy_getinfo(curl, CURLINFO_FILETIME, &(this_link->time)); if (cl == -1) { /* Turns out not to be a file after all */ this_link->content_length = 0; this_link->type = LINK_DIR; } else { this_link->content_length = cl; this_link->type = LINK_FILE; } } else { this_link->type = LINK_INVALID; } } static void LinkTable_add(LinkTable *linktbl, Link *link) { linktbl->num++; linktbl->links = realloc(linktbl->links, linktbl->num * sizeof(Link *)); if (!linktbl->links) { fprintf(stderr, "LinkTable_add(): realloc failure!\n"); exit(EXIT_FAILURE); } linktbl->links[linktbl->num - 1] = link; } void LinkTable_fill(LinkTable *linktbl) { Link *head_link = linktbl->links[0]; for (int i = 0; i < linktbl->num; i++) { Link *this_link = linktbl->links[i]; if (this_link->type) { char *url; url = url_append(head_link->f_url, this_link->p_url); strncpy(this_link->f_url, url, URL_LEN_MAX); free(url); char *unescaped_p_url; unescaped_p_url = curl_easy_unescape(NULL, this_link->p_url, 0, NULL); strncpy(this_link->p_url, unescaped_p_url, LINK_LEN_MAX); curl_free(unescaped_p_url); if (this_link->type == LINK_FILE && !(this_link->content_length)) { Link_get_stat(this_link); } else if (this_link->type == LINK_DIR) { this_link->time = head_link->time; } } } /* Block until the LinkTable is filled up */ while (curl_multi_perform_once()) { usleep(100*1000); }; } static void LinkTable_free(LinkTable *linktbl) { for (int i = 0; i < linktbl->num; i++) { free(linktbl->links[i]); } free(linktbl->links); free(linktbl); } LinkTable *LinkTable_new(const char *url) { fprintf(stderr, "LinkTable_new(%s);\n", url); LinkTable *linktbl = calloc(1, sizeof(LinkTable)); if (!linktbl) { fprintf(stderr, "LinkTable_new(): calloc failure!\n"); exit(EXIT_FAILURE); } /* populate the base URL */ LinkTable_add(linktbl, Link_new("/", LINK_HEAD)); Link *head_link = linktbl->links[0]; head_link->type = LINK_HEAD; strncpy(head_link->f_url, url, URL_LEN_MAX); /* start downloading the base URL */ CURL *curl = Link_to_curl(head_link); MemoryStruct buf; buf.size = 0; buf.memory = NULL; curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&buf); transfer_blocking(curl); /* if downloading base URL failed */ long http_resp; curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp); if (http_resp != HTTP_OK) { fprintf(stderr, "link.c: LinkTable_new() cannot retrive the base URL, \ URL: %s, HTTP %ld\n", url, http_resp); LinkTable_free(linktbl); linktbl = NULL; return linktbl; }; curl_easy_getinfo(curl, CURLINFO_FILETIME, &(head_link->time)); curl_easy_cleanup(curl); /* Otherwise parsed the received data */ GumboOutput* output = gumbo_parse(buf.memory); HTML_to_LinkTable(output->root, linktbl); gumbo_destroy_output(&kGumboDefaultOptions, output); free(buf.memory); /* Fill in the link table */ LinkTable_fill(linktbl); return linktbl; } /** \brief print a LinkTable */ static void LinkTable_print(LinkTable *linktbl) { fprintf(stderr, "--------------------------------------------\n"); fprintf(stderr, " LinkTable %p for %s\n", linktbl, linktbl->links[0]->f_url); fprintf(stderr, "--------------------------------------------\n"); for (int i = 0; i < linktbl->num; i++) { Link *this_link = linktbl->links[i]; fprintf(stderr, "%d %c %lu %s %s\n", i, this_link->type, this_link->content_length, this_link->p_url, this_link->f_url ); } fprintf(stderr, "--------------------------------------------\n"); } void network_init(const char *url) { /* * Intialise the cryptographic locks, these are shamelessly copied from * https://curl.haxx.se/libcurl/c/threaded-ssl.html */ crypto_lock_init(); /* Global related */ if (curl_global_init(CURL_GLOBAL_ALL)) { fprintf(stderr, "network_init(): curl_global_init() failed!\n"); exit(EXIT_FAILURE); } /* Share related */ curl_share = curl_share_init(); if (!(curl_share)) { fprintf(stderr, "network_init(): curl_share_init() failed!\n"); exit(EXIT_FAILURE); } curl_share_setopt(curl_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_COOKIE); curl_share_setopt(curl_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS); curl_share_setopt(curl_share, CURLSHOPT_SHARE, CURL_LOCK_DATA_CONNECT); if (pthread_mutex_init(&curl_lock, NULL) != 0) { printf( "network_init(): curl_lock initialisation failed!\n"); exit(EXIT_FAILURE); } curl_share_setopt(curl_share, CURLSHOPT_LOCKFUNC, curl_lock_callback); curl_share_setopt(curl_share, CURLSHOPT_UNLOCKFUNC, curl_unlock_callback); /* Multi related */ curl_multi = curl_multi_init(); if (!curl_multi) { fprintf(stderr, "network_init(): curl_multi_init() failed!\n"); exit(EXIT_FAILURE); } curl_multi_setopt(curl_multi, CURLMOPT_MAXCONNECTS, CURL_MULTI_MAX_CONNECTION); /* Initialise transfer lock */ if (pthread_mutex_init(&transfer_lock, NULL) != 0) { printf( "network_init(): transfer_lock initialisation failed!\n"); exit(EXIT_FAILURE); } curl_version_info_data *data = curl_version_info(CURLVERSION_NOW); printf("libcurl SSL engine: %s\n", data->ssl_version); /* create the root link table */ ROOT_LINK_TBL = LinkTable_new(url); } static void transfer_blocking(CURL *curl) { /* * We don't need to malloc here, as the transfer is finished before * the variable gets popped from the stack */ volatile TransferStruct transfer; transfer.type = DATA; transfer.transferring = 1; curl_easy_setopt(curl, CURLOPT_PRIVATE, &transfer); pthread_mutex_lock(&transfer_lock); CURLMcode res = curl_multi_add_handle(curl_multi, curl); pthread_mutex_unlock(&transfer_lock); if(res > 0) { fprintf(stderr, "blocking_multi_transfer(): %d, %s\n", res, curl_multi_strerror(res)); exit(EXIT_FAILURE); } while (transfer.transferring) { curl_multi_perform_once(); usleep(100*1000); } } static void transfer_nonblocking(CURL *curl) { pthread_mutex_lock(&transfer_lock); CURLMcode res = curl_multi_add_handle(curl_multi, curl); pthread_mutex_unlock(&transfer_lock); if(res > 0) { fprintf(stderr, "blocking_multi_transfer(): %d, %s\n", res, curl_multi_strerror(res)); exit(EXIT_FAILURE); } } Link *path_to_Link(const char *path) { char *new_path = strndup(path, URL_LEN_MAX); if (!new_path) { fprintf(stderr, "path_to_Link(): cannot allocate memory\n"); exit(EXIT_FAILURE); } Link *link = path_to_Link_recursive(new_path, ROOT_LINK_TBL); free(new_path); return link; } static Link *path_to_Link_recursive(char *path, LinkTable *linktbl) { /* skip the leading '/' if it exists */ if (*path == '/') { path++; } /* remove the last '/' if it exists */ char *slash = &(path[strnlen(path, URL_LEN_MAX) - 1]); if (*slash == '/') { *slash = '\0'; } slash = strchr(path, '/'); if ( slash == NULL ) { /* We cannot find another '/', we have reached the last level */ for (int i = 1; i < linktbl->num; i++) { if (!strncmp(path, linktbl->links[i]->p_url, LINK_LEN_MAX)) { /* We found our link */ return linktbl->links[i]; } } } else { /* * We can still find '/', time to consume the path and traverse * the tree structure */ /* * add termination mark to the current string, * effective create two substrings */ *slash = '\0'; /* move the pointer past the '/' */ char *next_path = slash + 1; for (int i = 1; i < linktbl->num; i++) { if (!strncmp(path, linktbl->links[i]->p_url, LINK_LEN_MAX)) { /* The next sub-directory exists */ if (!(linktbl->links[i]->next_table)) { linktbl->links[i]->next_table = LinkTable_new( linktbl->links[i]->f_url); fprintf(stderr, "Created new link table for %s\n", linktbl->links[i]->f_url); LinkTable_print(linktbl->links[i]->next_table); } return path_to_Link_recursive(next_path, linktbl->links[i]->next_table); } } } return NULL; } long path_download(const char *path, char *output_buf, size_t size, off_t offset) { Link *link; link = path_to_Link(path); if (!link) { return -ENOENT; } size_t start = offset; size_t end = start + size; char range_str[64]; snprintf(range_str, sizeof(range_str), "%lu-%lu", start, end); MemoryStruct buf; buf.size = 0; buf.memory = NULL; fprintf(stderr, "path_download(%s, %s);\n", path, range_str); CURL *curl = Link_to_curl(link); curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)&buf); curl_easy_setopt(curl, CURLOPT_RANGE, range_str); transfer_blocking(curl); long http_resp; curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp); if ( !( (http_resp != HTTP_OK) || (http_resp != HTTP_PARTIAL_CONTENT) || (http_resp != HTTP_RANGE_NOT_SATISFIABLE) )) { fprintf(stderr, "path_download(): Could not download %s, HTTP %ld\n", link->f_url, http_resp); return -ENOENT; } double dl; curl_easy_getinfo(curl, CURLINFO_SIZE_DOWNLOAD, &dl); size_t recv = dl; if (recv > size) { recv = size; } memmove(output_buf, buf.memory, recv); curl_easy_cleanup(curl); free(buf.memory); return recv; } static LinkType p_url_type(const char *p_url) { /* The link name has to start with alphanumerical character */ if (!isalnum(p_url[0])) { return LINK_INVALID; } /* check for http:// and https:// */ if ( !strncmp(p_url, "http://", 7) || !strncmp(p_url, "https://", 8) ) { return LINK_INVALID; } if ( p_url[strlen(p_url) - 1] == '/' ) { return LINK_DIR; } return LINK_FILE; } static unsigned long thread_id(void) { unsigned long ret; ret = (unsigned long)pthread_self(); return ret; } static char *url_append(const char *url, const char *sublink) { int needs_separator = 0; if (url[strlen(url)-1] != '/') { needs_separator = 1; } char *str; size_t ul = strlen(url); size_t sl = strlen(sublink); str = calloc(ul + sl + needs_separator + 1, sizeof(char)); if (!str) { fprintf(stderr, "url_append(): calloc failure!\n"); exit(EXIT_FAILURE); } strncpy(str, url, ul); if (needs_separator) { str[ul] = '/'; } strncat(str, sublink, sl); return str; } static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) { size_t realsize = size * nmemb; MemoryStruct *mem = (MemoryStruct *)userp; mem->memory = realloc(mem->memory, mem->size + realsize + 1); if(!mem->memory) { /* out of memory! */ fprintf(stderr, "WriteMemoryCallback(): realloc failure!\n"); exit(EXIT_FAILURE); return 0; } memmove(&(mem->memory[mem->size]), contents, realsize); mem->size += realsize; mem->memory[mem->size] = 0; return realsize; }