#include #include #include #include #include #ifdef TESTURLLIB int main (int argc, char **argv) { Url *url; char url_str[1025]; url = urllib_init (); if (argc == 2) { if (urllib_parse (url, argv[1])) return 1; } if (argc >= 3) { if (urllib_parse (url, argv[1])) return 1; if (urllib_join (url, argv[2])) return 1; } else { printf ("Enter a url : "); scanf ("%s", url_str); if (urllib_parse (url, url_str)) return 1; printf ("Enter a path : "); scanf ("%s", url_str); if (urllib_join (url, url_str)) return 1; } urllib_tostring (url, url_str); printf ("=> %s\n", url_str); urllib_free (url); return 0; } #endif Url* urllib_init (void) { Url *url = malloc (sizeof (Url)); url->scheme = NULL; url->host = NULL; url->port = NULL; url->path = NULL; return url; } void urllib_free (Url *url) { free (url->scheme); free (url->host); free (url->port); free (url->path); free (url); } int urllib_parse (Url *url, const char *url_string) { const char *cursor = url_string; char *tmpcursor = NULL; int len, i; /* get scheme */ tmpcursor = strchr (cursor, ':'); if (tmpcursor == NULL) return 1;//error if not found len = tmpcursor - cursor; /* Verify scheme syntax */ if (!isalpha (cursor[0])) return 1; for (i = 1; i < len; i++) if (!(isalnum(cursor[i]) || cursor[i] == '+' || cursor[i] == '-' || cursor[i] == '.')) return 1; url->scheme = malloc (sizeof (char) * (len + 1)); if (url->scheme == NULL) return 1; strncpy (url->scheme, cursor, len); url->scheme[len] = '\0'; //Move after first ':' cursor = tmpcursor + 1; if (strlen (cursor) <= 2) return 1;//There must be 2 '/' and an host if (cursor[0] != '/' || cursor[1] != '/') return 1; //parse IPv6, IPv4 or hostname cursor += 2; if (cursor[0] == '[') // IPv6 { tmpcursor = strchr (cursor, ']'); if (tmpcursor == NULL) return 1; len = tmpcursor - cursor; for (i = 1; i < len; i++)//verify if (!isdigit (cursor[i]) && !(tolower(cursor[i]) >= 'a' && tolower(cursor[i]) <= 'f') && !(cursor[i] == ':')) return 1; } else // IPv4 or hostname { tmpcursor = strchr (cursor, ':'); if (tmpcursor == NULL) tmpcursor = strchr (cursor, '/'); if (tmpcursor == NULL) len = strlen (cursor); else len = tmpcursor - cursor; for (i = 0; i < len; i++) if (!isalnum (cursor[i]) && !(cursor[i] == '.')) return 1; } url->host = malloc (sizeof (char) * (len + 1)); if (url->host == NULL) return 1; strncpy (url->host, cursor, len); url->host[len] = '\0'; //parse port (or not) cursor += len; if (cursor[0] == ':') { tmpcursor = strchr (cursor, '/'); if (tmpcursor == NULL) len = strlen (cursor); else len = tmpcursor - cursor; for (i = 1; i < len; i++) if (!isdigit (cursor[i])) return 1; url->port = malloc (sizeof (char) * len); if (url->port == NULL) return 1; strncpy (url->port, cursor + 1, len - 1); cursor += len; } //parse path len = strlen (cursor); if (len > 0) { url->path = malloc (sizeof (char) * (len + 1)); if (url->path == NULL) return 1; strncpy (url->path, cursor, len); } return 0; } void urllib_tostring (Url *url, char *dest) { if (url->port == NULL) if (url->path == NULL) sprintf (dest, "%s://%s\r\n", url->scheme, url->host); else sprintf (dest, "%s://%s%s\r\n", url->scheme, url->host, url->path); else if (url->path == NULL) sprintf (dest, "%s://%s:%s\r\n", url->scheme, url->host, url->port); else sprintf (dest, "%s://%s:%s%s\r\n", url->scheme, url->host, url->port, url->path); } int urllib_join (Url *url, const char *path) {// view RFC 3986, section 5.2.4 int len = strlen (path); char *base, *result, *inputcursor; if (len == 0)//nothing to merge return 0; if (url->path == NULL)//no current path, path = '/' { url->path = malloc (sizeof (char) * 2); if (url->path == NULL) return 1; url->path[0] = '/'; url->path[1] = '\0'; } if (path[0] != '/') //relative path { int url_path_len = strlen (path); if (url->path[url_path_len - 1] != '/')//we are on a file { char *tmpcursor = strrchr (url->path, '/'); if (tmpcursor == NULL) return -1; tmpcursor[1] = '\0'; } len += url_path_len; base = malloc (sizeof (char) * (len + 1)); if (base == NULL) return 1; result = malloc (sizeof (char) * (len + 1)); if (result == NULL) { free (base); return 1; } sprintf (base, "%s%s", url->path, path); result[0] = '\0'; } else //absolute path { base = malloc (sizeof (char) * (len + 1)); if (base == NULL) return 1; result = malloc (sizeof (char) * (len + 1)); if (result == NULL) { free (base); return 1; } strncpy (base, path, len); base[len] = '\0'; result[0] = '\0'; } inputcursor = base; len = strlen (inputcursor); while (len > 0) //while input buffer is not empty { char *tmpcursor; if (len >= 3 && inputcursor[0] == '.' && inputcursor[1] == '.' && inputcursor[2] == '/') { inputcursor += 3; len = strlen (inputcursor); } else if (len >= 2 && inputcursor[0] == '.' && inputcursor[1] == '/') { inputcursor += 2; len = strlen (inputcursor); } else if (len >= 3 && inputcursor[0] == '/' && inputcursor[1] == '.' && inputcursor[2] == '/') { inputcursor += 2; len = strlen (inputcursor); } else if (len == 2 && inputcursor[0] == '/' && inputcursor[1] == '.') { inputcursor += 1; inputcursor[0] = '/'; len = strlen (inputcursor); } else if (len >= 4 && inputcursor[0] == '/' && inputcursor[1] == '.' && inputcursor[2] == '.' && inputcursor[3] == '/') { inputcursor += 3; len = strlen (inputcursor); //remove result last segment tmpcursor = strrchr (result, '/'); if (tmpcursor != NULL) *tmpcursor = '\0'; } else if (len == 3 && inputcursor[0] == '/' && inputcursor[1] == '.' && inputcursor[2] == '.') { inputcursor += 2; inputcursor[0] = '/'; len = strlen (inputcursor); //remove result last segment tmpcursor = strrchr (result, '/'); if (tmpcursor != NULL) *tmpcursor = '\0'; } else if ((len == 2 && inputcursor[0] == '.' && inputcursor[1] == '.') || (len == 1 && inputcursor[0] == '.')) { inputcursor[0] = '\0'; len = strlen (inputcursor); } else { //get first segment of inputcursor then append it to result and delete it tmpcursor = strchr (inputcursor + 1, '/'); if (tmpcursor == NULL)//last segment in input buffer { sprintf (result, "%s%s", result, inputcursor); *inputcursor = '\0'; } else { len = tmpcursor - inputcursor; strncpy (result + strlen (result), inputcursor, len); inputcursor = tmpcursor; } len = strlen (inputcursor); } } free (url->path); url->path = result; free (base); return 0; }