initial commit

2018-07-18 16:26:26 +01:00 · 2018-07-18 16:26:26 +01:00 · cd097dc4f1
commit cd097dc4f1
11 changed files with 844 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+tmp
+*.o
+mount-http-dir
--- a/.kateproject
+++ b/.kateproject
@ -0,0 +1,9 @@
+{
+    "name": "mount-http-dir", 
+    "files": [ { "git": 1 } ],
+    "build" : {
+        "directory": "build",
+        "build": "make",
+        "install": "make install"
+    }
+}
--- a/15
+++ b/15
@ -0,0 +1,15 @@
+CC=gcc
+CFLAGS=-I. -Wall -Wextra -lgumbo -g
+DEPS =
+OBJ = main.o link.o test.o
+
+%.o: %.c $(DEPS)
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+mount-http-dir: $(OBJ)
+	$(CC) -o $@ $^ $(CFLAGS)
+
+.PHONY: clean
+
+clean:
+	rm -f *.o mount-http-dir
--- a/23
+++ b/23
@ -0,0 +1,23 @@
+## Basic ideas
+# Syntax
+mount-http-directory-listing url cache mount_pt
+
+# Libraries used
+libcurl libgumbo libfuse
+
+
+- Seek an URL
+- If fail, continue
+- Download it
+- Allocate random directory
+- Parse it
+    - For each link, seek
+    - if fail
+        - directory
+    - else
+        - actual file
+
+Things to write:
+1) Link parser
+2) libcurl
+3) fuser
--- a/http.c
+++ b/http.c
@ -0,0 +1,546 @@
+/*****************************************************************************
+ *
+ * This example source code introduces a c library buffered I/O interface to
+ * URL reads it supports fopen(), fread(), fgets(), feof(), fclose(),
+ * rewind(). Supported functions have identical prototypes to their normal c
+ * lib namesakes and are preceaded by url_ .
+ *
+ * Using this code you can replace your program's fopen() with url_fopen()
+ * and fread() with url_fread() and it become possible to read remote streams
+ * instead of (only) local files. Local files (ie those that can be directly
+ * fopened) will drop back to using the underlying clib implementations
+ *
+ * See the main() function at the bottom that shows an app that retrieves from
+ * a specified url using fgets() and fread() and saves as two output files.
+ *
+ * Copyright (c) 2003, 2017 Simtec Electronics
+ *
+ * Re-implemented by Vincent Sanders <vince@kyllikki.org> with extensive
+ * reference to original curl example code
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This example requires libcurl 7.9.7 or later.
+ */
+/* <DESC>
+ * implements an fopen() abstraction allowing reading from URLs
+ * </DESC>
+ */
+
+#include <stdio.h>
+#include <string.h>
+#ifndef WIN32
+#  include <sys/time.h>
+#endif
+#include <stdlib.h>
+#include <errno.h>
+
+#include <curl/curl.h>
+
+enum fcurl_type_e {
+  CFTYPE_NONE = 0,
+  CFTYPE_FILE = 1,
+  CFTYPE_CURL = 2
+};
+
+struct fcurl_data
+{
+  enum fcurl_type_e type;     /* type of handle */
+  union {
+    CURL *curl;
+    FILE *file;
+  } handle;                   /* handle */
+
+  char *buffer;               /* buffer to store cached data*/
+  size_t buffer_len;          /* currently allocated buffers length */
+  size_t buffer_pos;          /* end of data in buffer*/
+  int still_running;          /* Is background url fetch still in progress */
+};
+
+typedef struct fcurl_data URL_FILE;
+
+/* exported functions */
+URL_FILE *url_fopen(const char *url, const char *operation);
+int url_fclose(URL_FILE *file);
+int url_feof(URL_FILE *file);
+size_t url_fread(void *ptr, size_t size, size_t nmemb, URL_FILE *file);
+char *url_fgets(char *ptr, size_t size, URL_FILE *file);
+void url_rewind(URL_FILE *file);
+
+/* we use a global one for convenience */
+static CURLM *multi_handle;
+
+/* curl calls this routine to get more data */
+static size_t write_callback(char *buffer,
+                             size_t size,
+                             size_t nitems,
+                             void *userp)
+{
+  char *newbuff;
+  size_t rembuff;
+
+  URL_FILE *url = (URL_FILE *)userp;
+  size *= nitems;
+
+  rembuff = url->buffer_len - url->buffer_pos; /* remaining space in buffer */
+
+  if(size > rembuff) {
+    /* not enough space in buffer */
+    newbuff = realloc(url->buffer, url->buffer_len + (size - rembuff));
+    if(newbuff == NULL) {
+      fprintf(stderr, "callback buffer grow failed\n");
+      size = rembuff;
+    }
+    else {
+      /* realloc succeeded increase buffer size*/
+      url->buffer_len += size - rembuff;
+      url->buffer = newbuff;
+    }
+  }
+
+  memcpy(&url->buffer[url->buffer_pos], buffer, size);
+  url->buffer_pos += size;
+
+  return size;
+}
+
+/* use to attempt to fill the read buffer up to requested number of bytes */
+static int fill_buffer(URL_FILE *file, size_t want)
+{
+  fd_set fdread;
+  fd_set fdwrite;
+  fd_set fdexcep;
+  struct timeval timeout;
+  int rc;
+  CURLMcode mc; /* curl_multi_fdset() return code */
+
+  /* only attempt to fill buffer if transactions still running and buffer
+   * doesn't exceed required size already
+   */
+  if((!file->still_running) || (file->buffer_pos > want))
+    return 0;
+
+  /* attempt to fill buffer */
+  do {
+    int maxfd = -1;
+    long curl_timeo = -1;
+
+    FD_ZERO(&fdread);
+    FD_ZERO(&fdwrite);
+    FD_ZERO(&fdexcep);
+
+    /* set a suitable timeout to fail on */
+    timeout.tv_sec = 60; /* 1 minute */
+    timeout.tv_usec = 0;
+
+    curl_multi_timeout(multi_handle, &curl_timeo);
+    if(curl_timeo >= 0) {
+      timeout.tv_sec = curl_timeo / 1000;
+      if(timeout.tv_sec > 1)
+        timeout.tv_sec = 1;
+      else
+        timeout.tv_usec = (curl_timeo % 1000) * 1000;
+    }
+
+    /* get file descriptors from the transfers */
+    mc = curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);
+
+    if(mc != CURLM_OK) {
+      fprintf(stderr, "curl_multi_fdset() failed, code %d.\n", mc);
+      break;
+    }
+
+    /* On success the value of maxfd is guaranteed to be >= -1. We call
+       select(maxfd + 1, ...); specially in case of (maxfd == -1) there are
+       no fds ready yet so we call select(0, ...) --or Sleep() on Windows--
+       to sleep 100ms, which is the minimum suggested value in the
+       curl_multi_fdset() doc. */
+
+    if(maxfd == -1) {
+#ifdef _WIN32
+      Sleep(100);
+      rc = 0;
+#else
+      /* Portable sleep for platforms other than Windows. */
+      struct timeval wait = { 0, 100 * 1000 }; /* 100ms */
+      rc = select(0, NULL, NULL, NULL, &wait);
+#endif
+    }
+    else {
+      /* Note that on some platforms 'timeout' may be modified by select().
+         If you need access to the original value save a copy beforehand. */
+      rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
+    }
+
+    switch(rc) {
+    case -1:
+      /* select error */
+      break;
+
+    case 0:
+    default:
+      /* timeout or readable/writable sockets */
+      curl_multi_perform(multi_handle, &file->still_running);
+      break;
+    }
+  } while(file->still_running && (file->buffer_pos < want));
+  return 1;
+}
+
+/* use to remove want bytes from the front of a files buffer */
+static int use_buffer(URL_FILE *file, size_t want)
+{
+  /* sort out buffer */
+  if((file->buffer_pos - want) <= 0) {
+    /* ditch buffer - write will recreate */
+    free(file->buffer);
+    file->buffer = NULL;
+    file->buffer_pos = 0;
+    file->buffer_len = 0;
+  }
+  else {
+    /* move rest down make it available for later */
+    memmove(file->buffer,
+            &file->buffer[want],
+            (file->buffer_pos - want));
+
+    file->buffer_pos -= want;
+  }
+  return 0;
+}
+
+URL_FILE *url_fopen(const char *url, const char *operation)
+{
+  /* this code could check for URLs or types in the 'url' and
+     basically use the real fopen() for standard files */
+
+  URL_FILE *file;
+  (void)operation;
+
+  file = calloc(1, sizeof(URL_FILE));
+  if(!file)
+    return NULL;
+
+  file->handle.file = fopen(url, operation);
+  if(file->handle.file)
+    file->type = CFTYPE_FILE; /* marked as URL */
+
+  else {
+    file->type = CFTYPE_CURL; /* marked as URL */
+    file->handle.curl = curl_easy_init();
+
+    curl_easy_setopt(file->handle.curl, CURLOPT_URL, url);
+    curl_easy_setopt(file->handle.curl, CURLOPT_WRITEDATA, file);
+    curl_easy_setopt(file->handle.curl, CURLOPT_VERBOSE, 0L);
+    curl_easy_setopt(file->handle.curl, CURLOPT_WRITEFUNCTION, write_callback);
+
+    if(!multi_handle)
+      multi_handle = curl_multi_init();
+
+    curl_multi_add_handle(multi_handle, file->handle.curl);
+
+    /* lets start the fetch */
+    curl_multi_perform(multi_handle, &file->still_running);
+
+    if((file->buffer_pos == 0) && (!file->still_running)) {
+      /* if still_running is 0 now, we should return NULL */
+
+      /* make sure the easy handle is not in the multi handle anymore */
+      curl_multi_remove_handle(multi_handle, file->handle.curl);
+
+      /* cleanup */
+      curl_easy_cleanup(file->handle.curl);
+
+      free(file);
+
+      file = NULL;
+    }
+  }
+  return file;
+}
+
+int url_fclose(URL_FILE *file)
+{
+  int ret = 0;/* default is good return */
+
+  switch(file->type) {
+  case CFTYPE_FILE:
+    ret = fclose(file->handle.file); /* passthrough */
+    break;
+
+  case CFTYPE_CURL:
+    /* make sure the easy handle is not in the multi handle anymore */
+    curl_multi_remove_handle(multi_handle, file->handle.curl);
+
+    /* cleanup */
+    curl_easy_cleanup(file->handle.curl);
+    break;
+
+  default: /* unknown or supported type - oh dear */
+    ret = EOF;
+    errno = EBADF;
+    break;
+  }
+
+  free(file->buffer);/* free any allocated buffer space */
+  free(file);
+
+  return ret;
+}
+
+int url_feof(URL_FILE *file)
+{
+  int ret = 0;
+
+  switch(file->type) {
+  case CFTYPE_FILE:
+    ret = feof(file->handle.file);
+    break;
+
+  case CFTYPE_CURL:
+    if((file->buffer_pos == 0) && (!file->still_running))
+      ret = 1;
+    break;
+
+  default: /* unknown or supported type - oh dear */
+    ret = -1;
+    errno = EBADF;
+    break;
+  }
+  return ret;
+}
+
+size_t url_fread(void *ptr, size_t size, size_t nmemb, URL_FILE *file)
+{
+  size_t want;
+
+  switch(file->type) {
+  case CFTYPE_FILE:
+    want = fread(ptr, size, nmemb, file->handle.file);
+    break;
+
+  case CFTYPE_CURL:
+    want = nmemb * size;
+
+    fill_buffer(file, want);
+
+    /* check if there's data in the buffer - if not fill_buffer()
+     * either errored or EOF */
+    if(!file->buffer_pos)
+      return 0;
+
+    /* ensure only available data is considered */
+    if(file->buffer_pos < want)
+      want = file->buffer_pos;
+
+    /* xfer data to caller */
+    memcpy(ptr, file->buffer, want);
+
+    use_buffer(file, want);
+
+    want = want / size;     /* number of items */
+    break;
+
+  default: /* unknown or supported type - oh dear */
+    want = 0;
+    errno = EBADF;
+    break;
+
+  }
+  return want;
+}
+
+char *url_fgets(char *ptr, size_t size, URL_FILE *file)
+{
+  size_t want = size - 1;/* always need to leave room for zero termination */
+  size_t loop;
+
+  switch(file->type) {
+  case CFTYPE_FILE:
+    ptr = fgets(ptr, (int)size, file->handle.file);
+    break;
+
+  case CFTYPE_CURL:
+    fill_buffer(file, want);
+
+    /* check if there's data in the buffer - if not fill either errored or
+     * EOF */
+    if(!file->buffer_pos)
+      return NULL;
+
+    /* ensure only available data is considered */
+    if(file->buffer_pos < want)
+      want = file->buffer_pos;
+
+    /*buffer contains data */
+    /* look for newline or eof */
+    for(loop = 0; loop < want; loop++) {
+      if(file->buffer[loop] == '\n') {
+        want = loop + 1;/* include newline */
+        break;
+      }
+    }
+
+    /* xfer data to caller */
+    memcpy(ptr, file->buffer, want);
+    ptr[want] = 0;/* always null terminate */
+
+    use_buffer(file, want);
+
+    break;
+
+  default: /* unknown or supported type - oh dear */
+    ptr = NULL;
+    errno = EBADF;
+    break;
+  }
+
+  return ptr;/*success */
+}
+
+void url_rewind(URL_FILE *file)
+{
+  switch(file->type) {
+  case CFTYPE_FILE:
+    rewind(file->handle.file); /* passthrough */
+    break;
+
+  case CFTYPE_CURL:
+    /* halt transaction */
+    curl_multi_remove_handle(multi_handle, file->handle.curl);
+
+    /* restart */
+    curl_multi_add_handle(multi_handle, file->handle.curl);
+
+    /* ditch buffer - write will recreate - resets stream pos*/
+    free(file->buffer);
+    file->buffer = NULL;
+    file->buffer_pos = 0;
+    file->buffer_len = 0;
+
+    break;
+
+  default: /* unknown or supported type - oh dear */
+    break;
+  }
+}
+
+#define FGETSFILE "fgets.test"
+#define FREADFILE "fread.test"
+#define REWINDFILE "rewind.test"
+
+/* Small main program to retrieve from a url using fgets and fread saving the
+ * output to two test files (note the fgets method will corrupt binary files if
+ * they contain 0 chars */
+int main(int argc, char *argv[])
+{
+  URL_FILE *handle;
+  FILE *outf;
+
+  size_t nread;
+  char buffer[256];
+  const char *url;
+
+  if(argc < 2)
+    url = "http://192.168.7.3/testfile";/* default to testurl */
+  else
+    url = argv[1];/* use passed url */
+
+  /* copy from url line by line with fgets */
+  outf = fopen(FGETSFILE, "wb+");
+  if(!outf) {
+    perror("couldn't open fgets output file\n");
+    return 1;
+  }
+
+  handle = url_fopen(url, "r");
+  if(!handle) {
+    printf("couldn't url_fopen() %s\n", url);
+    fclose(outf);
+    return 2;
+  }
+
+  while(!url_feof(handle)) {
+    url_fgets(buffer, sizeof(buffer), handle);
+    fwrite(buffer, 1, strlen(buffer), outf);
+  }
+
+  url_fclose(handle);
+
+  fclose(outf);
+
+
+  /* Copy from url with fread */
+  outf = fopen(FREADFILE, "wb+");
+  if(!outf) {
+    perror("couldn't open fread output file\n");
+    return 1;
+  }
+
+  handle = url_fopen("testfile", "r");
+  if(!handle) {
+    printf("couldn't url_fopen() testfile\n");
+    fclose(outf);
+    return 2;
+  }
+
+  do {
+    nread = url_fread(buffer, 1, sizeof(buffer), handle);
+    fwrite(buffer, 1, nread, outf);
+  } while(nread);
+
+  url_fclose(handle);
+
+  fclose(outf);
+
+
+  /* Test rewind */
+  outf = fopen(REWINDFILE, "wb+");
+  if(!outf) {
+    perror("couldn't open fread output file\n");
+    return 1;
+  }
+
+  handle = url_fopen("testfile", "r");
+  if(!handle) {
+    printf("couldn't url_fopen() testfile\n");
+    fclose(outf);
+    return 2;
+  }
+
+  nread = url_fread(buffer, 1, sizeof(buffer), handle);
+  fwrite(buffer, 1, nread, outf);
+  url_rewind(handle);
+
+  buffer[0]='\n';
+  fwrite(buffer, 1, 1, outf);
+
+  nread = url_fread(buffer, 1, sizeof(buffer), handle);
+  fwrite(buffer, 1, nread, outf);
+
+  url_fclose(handle);
+
+  fclose(outf);
+
+  return 0;/* all done */
+}
--- a/http.h
+++ b/http.h
--- a/link.c
+++ b/link.c
@ -0,0 +1,136 @@
+#include <ctype.h>
+
+#include "link.h"
+#include "string.h"
+
+static char linktype_to_char(linktype t)
+{
+    switch (t) {
+        case LINK_DIR :
+            return 'D';
+        case LINK_FILE :
+            return 'F';
+        case LINK_UNKNOWN :
+            return 'U';
+        default :
+            return 'E';
+    }
+}
+
+void linklist_print(ll_t *links)
+{
+    for (int i = 0; i < links->num; i++) {
+        fprintf(stderr, "%d %c %s\n",
+                i,
+                linktype_to_char(links->type[i]),
+                links->link[i]);
+    }
+}
+
+ll_t *linklist_new()
+{
+    ll_t *links = malloc(sizeof(ll_t));
+    links->num = 0;
+    links->link = NULL;
+    links->type = NULL;
+    return links;
+}
+
+static int is_valid_link(const char *n)
+{
+    /* The link name has to start with alphanumerical character */
+    if (!isalnum(n[0])) {
+        return 0;
+    }
+    /* check for http:// and https:// */
+    int c = strlen(n);
+    if (c > 5) {
+        if (n[0] == 'h' && n[1] == 't' && n[2] == 't' && n[3] == 'p') {
+            if ((n[4] == ':' && n[5] == '/' && n[6] == '/') ||
+                (n[4] == 's' && n[5] == ':' && n[6] == '/' && n[7] == '/')) {
+                return 0;
+            }
+        }
+    }
+    return 1;
+}
+
+/*
+ * Shamelessly copied and pasted from:
+ * https://github.com/google/gumbo-parser/blob/master/examples/find_links.cc
+ */
+void html_to_linklist(GumboNode *node, ll_t *links)
+{
+    if (node->type != GUMBO_NODE_ELEMENT) {
+        return;
+    }
+    GumboAttribute* href;
+
+    if (node->v.element.tag == GUMBO_TAG_A &&
+        (href = gumbo_get_attribute(&node->v.element.attributes, "href"))) {
+        /* if it is valid, copy the link onto the heap */
+        if (is_valid_link(href->value)) {
+            links->num++;
+            if (!links->link) {
+                links->link = malloc(sizeof(char *));
+                links->type = malloc(sizeof(linktype *));
+            } else {
+                links->link = realloc(links->link, links->num * sizeof(char *));
+                links->type = realloc(links->type,
+                                      links->num * sizeof(linktype *));
+            }
+            int i = links->num - 1;
+            links->link[i] = malloc(strlen(href->value) * sizeof(char *));
+            strcpy(links->link[i], href->value);
+            links->type[i] = LINK_UNKNOWN;
+        }
+    }
+
+    /* Note the recursive call, lol. */
+    GumboVector *children = &node->v.element.children;
+    for (size_t i = 0; i < children->length; ++i) {
+        html_to_linklist((GumboNode*)children->data[i], links);
+    }
+    return;
+}
+
+void linklist_free(ll_t *links)
+{
+    for (int i = 0; i < links->num; i++) {
+        free(links->link[i]);
+    }
+    free(links->type);
+    free(links);
+}
+
+/* the upper level */
+char *url_upper(const char *url)
+{
+    const char *pt = strrchr(url, '/');
+    /* +1 for the '/' */
+    size_t  len = pt - url + 1;
+    char *str = malloc(len* sizeof(char));
+    strncpy(str, url, len);
+    str[len] = '\0';
+    return str;
+}
+
+/* append url */
+char *url_append(const char *url, const char *sublink)
+{
+    int needs_separator = 0;
+    if (url[strlen(url)-1] != '/') {
+        needs_separator = 1;
+    }
+
+    char *str;
+    size_t ul = strlen(url);
+    size_t sl = strlen(sublink);
+    str = calloc(ul + sl + needs_separator, sizeof(char));
+    strncpy(str, url, ul);
+    if (needs_separator) {
+        str[ul] = '/';
+    }
+    strncat(str, sublink, sl);
+    return str;
+}
--- a/link.h
+++ b/link.h
@ -0,0 +1,40 @@
+#ifndef LINK_H
+#define LINK_H
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <gumbo.h>
+/* \brief the link type */
+typedef enum {
+    LINK_DIR,
+    LINK_FILE,
+    LINK_UNKNOWN
+} linktype;
+
+/* \brief link list data type */
+typedef struct {
+    int num;
+    char **link;
+    linktype *type;
+} ll_t;
+
+/* \brief make a new link list */
+ll_t *linklist_new();
+
+/* \brief print a link list */
+void linklist_print(ll_t *links);
+
+/* \brief convert a html page to a link list */
+void html_to_linklist(GumboNode *node, ll_t *links);
+
+/* \brief free a link list */
+void linklist_free(ll_t *links);
+
+/* \brief the upper level */
+/* \warning does not check if you have reached the base level! */
+char *url_upper(const char *url);
+
+/* \brief append url */
+char *url_append(const char *url, const char *sublink);
+
+#endif
--- a/main.c
+++ b/main.c
@ -0,0 +1,13 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "test.h"
+
+
+int main(int argc, char** argv) {
+    gumbo_test(argc, argv);
+    url_test();
+    return 0;
+}
--- a/test.c
+++ b/test.c
@ -0,0 +1,53 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "link.h"
+#include "test.h"
+
+void url_test()
+{
+    printf("--- start of url_test ---\n");
+    char *url1 = "http://www.google.com/";
+    char *url2 = "http://www.google.com";
+    char *cat_url1 = url_append(url1, "fangfufu");
+    char *cat_url2 = url_append(url2, "fangfufu");
+    printf("%d %s\n", (int) strlen(cat_url1), cat_url1);
+    printf("%d %s\n", (int) strlen(cat_url2), cat_url2);
+    printf("--- end of url_test ---\n\n");
+}
+
+void gumbo_test(int argc, char **argv)
+{
+    printf("--- start of gumbo_test ---\n");
+    if (argc != 2) {
+        fprintf(stderr, "Usage: find_links <html filename>.\n");
+    }
+    const char* filename = argv[1];
+
+    FILE *fp;
+    fp = fopen(filename, "r");
+
+    if (!fp) {
+        fprintf(stderr, "File %s not found!\n", filename);
+    }
+
+    fseek(fp, 0L, SEEK_END);
+    unsigned long filesize = ftell(fp);
+    rewind(fp);
+
+    char* contents = (char*) malloc(sizeof(char) * filesize);
+    if (fread(contents, 1, filesize, fp) != filesize) {
+        fprintf(stderr, "Read error, %s\n", strerror(errno));
+    }
+    fclose(fp);
+
+    GumboOutput* output = gumbo_parse(contents);
+    ll_t *links = linklist_new();
+    html_to_linklist(output->root, links);
+    gumbo_destroy_output(&kGumboDefaultOptions, output);
+    linklist_print(links);
+    linklist_free(links);
+    printf("--- end of gumbo_test ---\n\n");
+}
--- a/test.h
+++ b/test.h
@ -0,0 +1,6 @@
+#ifndef TEST_H
+#define TEST_H
+void url_test();
+
+void gumbo_test(int argc, char **argv);
+#endif