initial commit

2018-07-18 16:26:26 +01:00 · 2018-07-18 16:26:26 +01:00 · cd097dc4f1
commit cd097dc4f1
11 changed files with 844 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 tmp
 *.o
 mount-http-dir
--- a/.kateproject
+++ b/.kateproject
@ -0,0 +1,9 @@
 {
    "name": "mount-http-dir", 
    "files": [ { "git": 1 } ],
    "build" : {
        "directory": "build",
        "build": "make",
        "install": "make install"
    }
 }
--- a/15
+++ b/15
@ -0,0 +1,15 @@
 CC=gcc
 CFLAGS=-I. -Wall -Wextra -lgumbo -g
 DEPS =
 OBJ = main.o link.o test.o
 %.o: %.c $(DEPS)
 	$(CC) -c -o $@ $< $(CFLAGS)
 mount-http-dir: $(OBJ)
 	$(CC) -o $@ $^ $(CFLAGS)
 .PHONY: clean
 clean:
 	rm -f *.o mount-http-dir
--- a/23
+++ b/23
@ -0,0 +1,23 @@
 ## Basic ideas
 # Syntax
 mount-http-directory-listing url cache mount_pt
 # Libraries used
 libcurl libgumbo libfuse
 - Seek an URL
 - If fail, continue
 - Download it
 - Allocate random directory
 - Parse it
    - For each link, seek
    - if fail
        - directory
    - else
        - actual file
 Things to write:
 1) Link parser
 2) libcurl
 3) fuser
--- a/http.c
+++ b/http.c
@ -0,0 +1,546 @@
 /*****************************************************************************
 *
 * This example source code introduces a c library buffered I/O interface to
 * URL reads it supports fopen(), fread(), fgets(), feof(), fclose(),
 * rewind(). Supported functions have identical prototypes to their normal c
 * lib namesakes and are preceaded by url_ .
 *
 * Using this code you can replace your program's fopen() with url_fopen()
 * and fread() with url_fread() and it become possible to read remote streams
 * instead of (only) local files. Local files (ie those that can be directly
 * fopened) will drop back to using the underlying clib implementations
 *
 * See the main() function at the bottom that shows an app that retrieves from
 * a specified url using fgets() and fread() and saves as two output files.
 *
 * Copyright (c) 2003, 2017 Simtec Electronics
 *
 * Re-implemented by Vincent Sanders <vince@kyllikki.org> with extensive
 * reference to original curl example code
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * This example requires libcurl 7.9.7 or later.
 */
 /* <DESC>
 * implements an fopen() abstraction allowing reading from URLs
 * </DESC>
 */
 #include <stdio.h>
 #include <string.h>
 #ifndef WIN32
 #  include <sys/time.h>
 #endif
 #include <stdlib.h>
 #include <errno.h>
 #include <curl/curl.h>
 enum fcurl_type_e {
  CFTYPE_NONE = 0,
  CFTYPE_FILE = 1,
  CFTYPE_CURL = 2
 };
 struct fcurl_data
 {
  enum fcurl_type_e type;     /* type of handle */
  union {
    CURL *curl;
    FILE *file;
  } handle;                   /* handle */
  char *buffer;               /* buffer to store cached data*/
  size_t buffer_len;          /* currently allocated buffers length */
  size_t buffer_pos;          /* end of data in buffer*/
  int still_running;          /* Is background url fetch still in progress */
 };
 typedef struct fcurl_data URL_FILE;
 /* exported functions */
 URL_FILE *url_fopen(const char *url, const char *operation);
 int url_fclose(URL_FILE *file);
 int url_feof(URL_FILE *file);
 size_t url_fread(void *ptr, size_t size, size_t nmemb, URL_FILE *file);
 char *url_fgets(char *ptr, size_t size, URL_FILE *file);
 void url_rewind(URL_FILE *file);
 /* we use a global one for convenience */
 static CURLM *multi_handle;
 /* curl calls this routine to get more data */
 static size_t write_callback(char *buffer,
                             size_t size,
                             size_t nitems,
                             void *userp)
 {
  char *newbuff;
  size_t rembuff;
  URL_FILE *url = (URL_FILE *)userp;
  size *= nitems;
  rembuff = url->buffer_len - url->buffer_pos; /* remaining space in buffer */
  if(size > rembuff) {
    /* not enough space in buffer */
    newbuff = realloc(url->buffer, url->buffer_len + (size - rembuff));
    if(newbuff == NULL) {
      fprintf(stderr, "callback buffer grow failed\n");
      size = rembuff;
    }
    else {
      /* realloc succeeded increase buffer size*/
      url->buffer_len += size - rembuff;
      url->buffer = newbuff;
    }
  }
  memcpy(&url->buffer[url->buffer_pos], buffer, size);
  url->buffer_pos += size;
  return size;
 }
 /* use to attempt to fill the read buffer up to requested number of bytes */
 static int fill_buffer(URL_FILE *file, size_t want)
 {
  fd_set fdread;
  fd_set fdwrite;
  fd_set fdexcep;
  struct timeval timeout;
  int rc;
  CURLMcode mc; /* curl_multi_fdset() return code */
  /* only attempt to fill buffer if transactions still running and buffer
   * doesn't exceed required size already
   */
  if((!file->still_running) || (file->buffer_pos > want))
    return 0;
  /* attempt to fill buffer */
  do {
    int maxfd = -1;
    long curl_timeo = -1;
    FD_ZERO(&fdread);
    FD_ZERO(&fdwrite);
    FD_ZERO(&fdexcep);
    /* set a suitable timeout to fail on */
    timeout.tv_sec = 60; /* 1 minute */
    timeout.tv_usec = 0;
    curl_multi_timeout(multi_handle, &curl_timeo);
    if(curl_timeo >= 0) {
      timeout.tv_sec = curl_timeo / 1000;
      if(timeout.tv_sec > 1)
        timeout.tv_sec = 1;
      else
        timeout.tv_usec = (curl_timeo % 1000) * 1000;
    }
    /* get file descriptors from the transfers */
    mc = curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);
    if(mc != CURLM_OK) {
      fprintf(stderr, "curl_multi_fdset() failed, code %d.\n", mc);
      break;
    }
    /* On success the value of maxfd is guaranteed to be >= -1. We call
       select(maxfd + 1, ...); specially in case of (maxfd == -1) there are
       no fds ready yet so we call select(0, ...) --or Sleep() on Windows--
       to sleep 100ms, which is the minimum suggested value in the
       curl_multi_fdset() doc. */
    if(maxfd == -1) {
 #ifdef _WIN32
      Sleep(100);
      rc = 0;
 #else
      /* Portable sleep for platforms other than Windows. */
      struct timeval wait = { 0, 100 * 1000 }; /* 100ms */
      rc = select(0, NULL, NULL, NULL, &wait);
 #endif
    }
    else {
      /* Note that on some platforms 'timeout' may be modified by select().
         If you need access to the original value save a copy beforehand. */
      rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
    }
    switch(rc) {
    case -1:
      /* select error */
      break;
    case 0:
    default:
      /* timeout or readable/writable sockets */
      curl_multi_perform(multi_handle, &file->still_running);
      break;
    }
  } while(file->still_running && (file->buffer_pos < want));
  return 1;
 }
 /* use to remove want bytes from the front of a files buffer */
 static int use_buffer(URL_FILE *file, size_t want)
 {
  /* sort out buffer */
  if((file->buffer_pos - want) <= 0) {
    /* ditch buffer - write will recreate */
    free(file->buffer);
    file->buffer = NULL;
    file->buffer_pos = 0;
    file->buffer_len = 0;
  }
  else {
    /* move rest down make it available for later */
    memmove(file->buffer,
            &file->buffer[want],
            (file->buffer_pos - want));
    file->buffer_pos -= want;
  }
  return 0;
 }
 URL_FILE *url_fopen(const char *url, const char *operation)
 {
  /* this code could check for URLs or types in the 'url' and
     basically use the real fopen() for standard files */
  URL_FILE *file;
  (void)operation;
  file = calloc(1, sizeof(URL_FILE));
  if(!file)
    return NULL;
  file->handle.file = fopen(url, operation);
  if(file->handle.file)
    file->type = CFTYPE_FILE; /* marked as URL */
  else {
    file->type = CFTYPE_CURL; /* marked as URL */
    file->handle.curl = curl_easy_init();
    curl_easy_setopt(file->handle.curl, CURLOPT_URL, url);
    curl_easy_setopt(file->handle.curl, CURLOPT_WRITEDATA, file);
    curl_easy_setopt(file->handle.curl, CURLOPT_VERBOSE, 0L);
    curl_easy_setopt(file->handle.curl, CURLOPT_WRITEFUNCTION, write_callback);
    if(!multi_handle)
      multi_handle = curl_multi_init();
    curl_multi_add_handle(multi_handle, file->handle.curl);
    /* lets start the fetch */
    curl_multi_perform(multi_handle, &file->still_running);
    if((file->buffer_pos == 0) && (!file->still_running)) {
      /* if still_running is 0 now, we should return NULL */
      /* make sure the easy handle is not in the multi handle anymore */
      curl_multi_remove_handle(multi_handle, file->handle.curl);
      /* cleanup */
      curl_easy_cleanup(file->handle.curl);
      free(file);
      file = NULL;
    }
  }
  return file;
 }
 int url_fclose(URL_FILE *file)
 {
  int ret = 0;/* default is good return */
  switch(file->type) {
  case CFTYPE_FILE:
    ret = fclose(file->handle.file); /* passthrough */
    break;
  case CFTYPE_CURL:
    /* make sure the easy handle is not in the multi handle anymore */
    curl_multi_remove_handle(multi_handle, file->handle.curl);
    /* cleanup */
    curl_easy_cleanup(file->handle.curl);
    break;
  default: /* unknown or supported type - oh dear */
    ret = EOF;
    errno = EBADF;
    break;
  }
  free(file->buffer);/* free any allocated buffer space */
  free(file);
  return ret;
 }
 int url_feof(URL_FILE *file)
 {
  int ret = 0;
  switch(file->type) {
  case CFTYPE_FILE:
    ret = feof(file->handle.file);
    break;
  case CFTYPE_CURL:
    if((file->buffer_pos == 0) && (!file->still_running))
      ret = 1;
    break;
  default: /* unknown or supported type - oh dear */
    ret = -1;
    errno = EBADF;
    break;
  }
  return ret;
 }
 size_t url_fread(void *ptr, size_t size, size_t nmemb, URL_FILE *file)
 {
  size_t want;
  switch(file->type) {
  case CFTYPE_FILE:
    want = fread(ptr, size, nmemb, file->handle.file);
    break;
  case CFTYPE_CURL:
    want = nmemb * size;
    fill_buffer(file, want);
    /* check if there's data in the buffer - if not fill_buffer()
     * either errored or EOF */
    if(!file->buffer_pos)
      return 0;
    /* ensure only available data is considered */
    if(file->buffer_pos < want)
      want = file->buffer_pos;
    /* xfer data to caller */
    memcpy(ptr, file->buffer, want);
    use_buffer(file, want);
    want = want / size;     /* number of items */
    break;
  default: /* unknown or supported type - oh dear */
    want = 0;
    errno = EBADF;
    break;
  }
  return want;
 }
 char *url_fgets(char *ptr, size_t size, URL_FILE *file)
 {
  size_t want = size - 1;/* always need to leave room for zero termination */
  size_t loop;
  switch(file->type) {
  case CFTYPE_FILE:
    ptr = fgets(ptr, (int)size, file->handle.file);
    break;
  case CFTYPE_CURL:
    fill_buffer(file, want);
    /* check if there's data in the buffer - if not fill either errored or
     * EOF */
    if(!file->buffer_pos)
      return NULL;
    /* ensure only available data is considered */
    if(file->buffer_pos < want)
      want = file->buffer_pos;
    /*buffer contains data */
    /* look for newline or eof */
    for(loop = 0; loop < want; loop++) {
      if(file->buffer[loop] == '\n') {
        want = loop + 1;/* include newline */
        break;
      }
    }
    /* xfer data to caller */
    memcpy(ptr, file->buffer, want);
    ptr[want] = 0;/* always null terminate */
    use_buffer(file, want);
    break;
  default: /* unknown or supported type - oh dear */
    ptr = NULL;
    errno = EBADF;
    break;
  }
  return ptr;/*success */
 }
 void url_rewind(URL_FILE *file)
 {
  switch(file->type) {
  case CFTYPE_FILE:
    rewind(file->handle.file); /* passthrough */
    break;
  case CFTYPE_CURL:
    /* halt transaction */
    curl_multi_remove_handle(multi_handle, file->handle.curl);
    /* restart */
    curl_multi_add_handle(multi_handle, file->handle.curl);
    /* ditch buffer - write will recreate - resets stream pos*/
    free(file->buffer);
    file->buffer = NULL;
    file->buffer_pos = 0;
    file->buffer_len = 0;
    break;
  default: /* unknown or supported type - oh dear */
    break;
  }
 }
 #define FGETSFILE "fgets.test"
 #define FREADFILE "fread.test"
 #define REWINDFILE "rewind.test"
 /* Small main program to retrieve from a url using fgets and fread saving the
 * output to two test files (note the fgets method will corrupt binary files if
 * they contain 0 chars */
 int main(int argc, char *argv[])
 {
  URL_FILE *handle;
  FILE *outf;
  size_t nread;
  char buffer[256];
  const char *url;
  if(argc < 2)
    url = "http://192.168.7.3/testfile";/* default to testurl */
  else
    url = argv[1];/* use passed url */
  /* copy from url line by line with fgets */
  outf = fopen(FGETSFILE, "wb+");
  if(!outf) {
    perror("couldn't open fgets output file\n");
    return 1;
  }
  handle = url_fopen(url, "r");
  if(!handle) {
    printf("couldn't url_fopen() %s\n", url);
    fclose(outf);
    return 2;
  }
  while(!url_feof(handle)) {
    url_fgets(buffer, sizeof(buffer), handle);
    fwrite(buffer, 1, strlen(buffer), outf);
  }
  url_fclose(handle);
  fclose(outf);
  /* Copy from url with fread */
  outf = fopen(FREADFILE, "wb+");
  if(!outf) {
    perror("couldn't open fread output file\n");
    return 1;
  }
  handle = url_fopen("testfile", "r");
  if(!handle) {
    printf("couldn't url_fopen() testfile\n");
    fclose(outf);
    return 2;
  }
  do {
    nread = url_fread(buffer, 1, sizeof(buffer), handle);
    fwrite(buffer, 1, nread, outf);
  } while(nread);
  url_fclose(handle);
  fclose(outf);
  /* Test rewind */
  outf = fopen(REWINDFILE, "wb+");
  if(!outf) {
    perror("couldn't open fread output file\n");
    return 1;
  }
  handle = url_fopen("testfile", "r");
  if(!handle) {
    printf("couldn't url_fopen() testfile\n");
    fclose(outf);
    return 2;
  }
  nread = url_fread(buffer, 1, sizeof(buffer), handle);
  fwrite(buffer, 1, nread, outf);
  url_rewind(handle);
  buffer[0]='\n';
  fwrite(buffer, 1, 1, outf);
  nread = url_fread(buffer, 1, sizeof(buffer), handle);
  fwrite(buffer, 1, nread, outf);
  url_fclose(handle);
  fclose(outf);
  return 0;/* all done */
 }
--- a/http.h
+++ b/http.h
--- a/link.c
+++ b/link.c
@ -0,0 +1,136 @@
 #include <ctype.h>
 #include "link.h"
 #include "string.h"
 static char linktype_to_char(linktype t)
 {
    switch (t) {
        case LINK_DIR :
            return 'D';
        case LINK_FILE :
            return 'F';
        case LINK_UNKNOWN :
            return 'U';
        default :
            return 'E';
    }
 }
 void linklist_print(ll_t *links)
 {
    for (int i = 0; i < links->num; i++) {
        fprintf(stderr, "%d %c %s\n",
                i,
                linktype_to_char(links->type[i]),
                links->link[i]);
    }
 }
 ll_t *linklist_new()
 {
    ll_t *links = malloc(sizeof(ll_t));
    links->num = 0;
    links->link = NULL;
    links->type = NULL;
    return links;
 }
 static int is_valid_link(const char *n)
 {
    /* The link name has to start with alphanumerical character */
    if (!isalnum(n[0])) {
        return 0;
    }
    /* check for http:// and https:// */
    int c = strlen(n);
    if (c > 5) {
        if (n[0] == 'h' && n[1] == 't' && n[2] == 't' && n[3] == 'p') {
            if ((n[4] == ':' && n[5] == '/' && n[6] == '/') ||
                (n[4] == 's' && n[5] == ':' && n[6] == '/' && n[7] == '/')) {
                return 0;
            }
        }
    }
    return 1;
 }
 /*
 * Shamelessly copied and pasted from:
 * https://github.com/google/gumbo-parser/blob/master/examples/find_links.cc
 */
 void html_to_linklist(GumboNode *node, ll_t *links)
 {
    if (node->type != GUMBO_NODE_ELEMENT) {
        return;
    }
    GumboAttribute* href;
    if (node->v.element.tag == GUMBO_TAG_A &&
        (href = gumbo_get_attribute(&node->v.element.attributes, "href"))) {
        /* if it is valid, copy the link onto the heap */
        if (is_valid_link(href->value)) {
            links->num++;
            if (!links->link) {
                links->link = malloc(sizeof(char *));
                links->type = malloc(sizeof(linktype *));
            } else {
                links->link = realloc(links->link, links->num * sizeof(char *));
                links->type = realloc(links->type,
                                      links->num * sizeof(linktype *));
            }
            int i = links->num - 1;
            links->link[i] = malloc(strlen(href->value) * sizeof(char *));
            strcpy(links->link[i], href->value);
            links->type[i] = LINK_UNKNOWN;
        }
    }
    /* Note the recursive call, lol. */
    GumboVector *children = &node->v.element.children;
    for (size_t i = 0; i < children->length; ++i) {
        html_to_linklist((GumboNode*)children->data[i], links);
    }
    return;
 }
 void linklist_free(ll_t *links)
 {
    for (int i = 0; i < links->num; i++) {
        free(links->link[i]);
    }
    free(links->type);
    free(links);
 }
 /* the upper level */
 char *url_upper(const char *url)
 {
    const char *pt = strrchr(url, '/');
    /* +1 for the '/' */
    size_t  len = pt - url + 1;
    char *str = malloc(len* sizeof(char));
    strncpy(str, url, len);
    str[len] = '\0';
    return str;
 }
 /* append url */
 char *url_append(const char *url, const char *sublink)
 {
    int needs_separator = 0;
    if (url[strlen(url)-1] != '/') {
        needs_separator = 1;
    }
    char *str;
    size_t ul = strlen(url);
    size_t sl = strlen(sublink);
    str = calloc(ul + sl + needs_separator, sizeof(char));
    strncpy(str, url, ul);
    if (needs_separator) {
        str[ul] = '/';
    }
    strncat(str, sublink, sl);
    return str;
 }
--- a/link.h
+++ b/link.h
@ -0,0 +1,40 @@
 #ifndef LINK_H
 #define LINK_H
 #include <stdio.h>
 #include <stdlib.h>
 #include <gumbo.h>
 /* \brief the link type */
 typedef enum {
    LINK_DIR,
    LINK_FILE,
    LINK_UNKNOWN
 } linktype;
 /* \brief link list data type */
 typedef struct {
    int num;
    char **link;
    linktype *type;
 } ll_t;
 /* \brief make a new link list */
 ll_t *linklist_new();
 /* \brief print a link list */
 void linklist_print(ll_t *links);
 /* \brief convert a html page to a link list */
 void html_to_linklist(GumboNode *node, ll_t *links);
 /* \brief free a link list */
 void linklist_free(ll_t *links);
 /* \brief the upper level */
 /* \warning does not check if you have reached the base level! */
 char *url_upper(const char *url);
 /* \brief append url */
 char *url_append(const char *url, const char *sublink);
 #endif
--- a/main.c
+++ b/main.c
@ -0,0 +1,13 @@
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "test.h"
 int main(int argc, char** argv) {
    gumbo_test(argc, argv);
    url_test();
    return 0;
 }
--- a/test.c
+++ b/test.c
@ -0,0 +1,53 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
 #include "link.h"
 #include "test.h"
 void url_test()
 {
    printf("--- start of url_test ---\n");
    char *url1 = "http://www.google.com/";
    char *url2 = "http://www.google.com";
    char *cat_url1 = url_append(url1, "fangfufu");
    char *cat_url2 = url_append(url2, "fangfufu");
    printf("%d %s\n", (int) strlen(cat_url1), cat_url1);
    printf("%d %s\n", (int) strlen(cat_url2), cat_url2);
    printf("--- end of url_test ---\n\n");
 }
 void gumbo_test(int argc, char **argv)
 {
    printf("--- start of gumbo_test ---\n");
    if (argc != 2) {
        fprintf(stderr, "Usage: find_links <html filename>.\n");
    }
    const char* filename = argv[1];
    FILE *fp;
    fp = fopen(filename, "r");
    if (!fp) {
        fprintf(stderr, "File %s not found!\n", filename);
    }
    fseek(fp, 0L, SEEK_END);
    unsigned long filesize = ftell(fp);
    rewind(fp);
    char* contents = (char*) malloc(sizeof(char) * filesize);
    if (fread(contents, 1, filesize, fp) != filesize) {
        fprintf(stderr, "Read error, %s\n", strerror(errno));
    }
    fclose(fp);
    GumboOutput* output = gumbo_parse(contents);
    ll_t *links = linklist_new();
    html_to_linklist(output->root, links);
    gumbo_destroy_output(&kGumboDefaultOptions, output);
    linklist_print(links);
    linklist_free(links);
    printf("--- end of gumbo_test ---\n\n");
 }
--- a/test.h
+++ b/test.h
@ -0,0 +1,6 @@
 #ifndef TEST_H
 #define TEST_H
 void url_test();
 void gumbo_test(int argc, char **argv);
 #endif