Bugfix: No longer deadlock after encountering HTTP 429 while filling up a Linktable.

- Renamed some functions
- After initialise parse of the HTML file, files are no longer assigned as LINK_FILE. They are now assigned as LINK_UNINITIALISED_FILE.
- Link_req_file_stat() now crashes if the link type is other than LINK_UNINITIALISED_FILE.
This commit is contained in:
Fufu Fang 2019-09-03 22:11:23 +01:00
parent 367ce58e7f
commit b6777c0478
No known key found for this signature in database
GPG Key ID: 0F6BB5EF6F8BB729
5 changed files with 100 additions and 64 deletions

View File

@ -65,7 +65,7 @@ static Link *Link_new(const char *linkname, LinkType type)
return link;
}
static LinkType linkname_type(const char *linkname)
static LinkType linkname_to_LinkType(const char *linkname)
{
/* The link name has to start with alphanumerical character */
if (!isalnum(linkname[0])) {
@ -81,7 +81,7 @@ static LinkType linkname_type(const char *linkname)
return LINK_DIR;
}
return LINK_FILE;
return LINK_UNINITIALISED_FILE;
}
/**
@ -97,8 +97,8 @@ static void HTML_to_LinkTable(GumboNode *node, LinkTable *linktbl)
if (node->v.element.tag == GUMBO_TAG_A &&
(href = gumbo_get_attribute(&node->v.element.attributes, "href"))) {
/* if it is valid, copy the link onto the heap */
LinkType type = linkname_type(href->value);
if ( (type == LINK_DIR) || (type == LINK_FILE) ) {
LinkType type = linkname_to_LinkType(href->value);
if ( (type == LINK_DIR) || (type == LINK_UNINITIALISED_FILE) ) {
LinkTable_add(linktbl, Link_new(href->value, type));
}
}
@ -154,8 +154,14 @@ static CURL *Link_to_curl(Link *link)
return curl;
}
void Link_get_stat(Link *this_link)
void Link_req_file_stat(Link *this_link)
{
if (this_link->type != LINK_UNINITIALISED_FILE) {
fprintf(stderr, "Link_req_file_stat(), invalid request, LinkType: %c",
this_link->type);
exit_failure();
}
CURL *curl = Link_to_curl(this_link);
curl_easy_setopt(curl, CURLOPT_NOBODY, 1);
curl_easy_setopt(curl, CURLOPT_FILETIME, 1L);
@ -178,7 +184,7 @@ void Link_get_stat(Link *this_link)
transfer_nonblocking(curl);
}
void Link_set_stat(Link* this_link, CURL *curl)
void Link_set_file_stat(Link* this_link, CURL *curl)
{
long http_resp;
curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_resp);
@ -186,24 +192,59 @@ void Link_set_stat(Link* this_link, CURL *curl)
double cl = 0;
curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
curl_easy_getinfo(curl, CURLINFO_FILETIME, &(this_link->time));
if (this_link->type == 'F') {
if (cl == -1) {
this_link->type = LINK_INVALID;
} else {
this_link->content_length = cl;
}
if (cl == -1) {
this_link->type = LINK_INVALID;
} else {
this_link->type = LINK_FILE;
this_link->content_length = cl;
}
} else {
fprintf(stderr, "Link_set_stat(): HTTP %ld", http_resp);
this_link->type = LINK_INVALID;
fprintf(stderr, "Link_set_file_stat(): HTTP %ld", http_resp);
if (http_resp == HTTP_TOO_MANY_REQUESTS) {
fprintf(stderr, ", re-adding the link to the queue");
Link_get_stat(this_link);
fprintf(stderr, ", retrying later.\n");
} else {
this_link->type = LINK_INVALID;
}
fprintf(stderr, ".\n");
}
}
/**
* \brief Fill in the uninitialised entries in a link table
* \details Try and get the stats for each link in the link table. This will get
* repeated until the uninitialised entry count drop to zero.
*/
static void LinkTable_uninitialised_fill(LinkTable *linktbl)
{
int u;
char s[STATUS_LEN];
fprintf(stderr, "LinkTable_uninitialised_fill(): ... ");
do {
u = 0;
for (int i = 0; i < linktbl->num; i++) {
Link *this_link = linktbl->links[i];
if (this_link->type == LINK_UNINITIALISED_FILE) {
Link_req_file_stat(linktbl->links[i]);
u++;
}
}
/* Block until the gaps are filled */
int n = curl_multi_perform_once();
int i = 0;
int j = 0;
while ( (i = curl_multi_perform_once()) ) {
if (j) {
erase_string(stderr, STATUS_LEN, s);
}
snprintf(s, STATUS_LEN, "%d / %d", n-i, n);
fprintf(stderr, "%s", s);
j++;
}
} while (u);
erase_string(stderr, STATUS_LEN, s);
fprintf(stderr, "Done!\n");
}
static void LinkTable_fill(LinkTable *linktbl)
{
Link *head_link = linktbl->links[0];
@ -220,51 +261,23 @@ static void LinkTable_fill(LinkTable *linktbl)
strncpy(this_link->linkname, unescaped_linkname, MAX_FILENAME_LEN);
curl_free(unescaped_linkname);
curl_easy_cleanup(c);
Link_get_stat(this_link);
}
/* Block until the LinkTable is filled up */
fprintf(stderr, "LinkTable_fill(): ... ");
int n = curl_multi_perform_once();
int i = 0;
int j = 0;
char s[STATUS_LEN];
while ( (i = curl_multi_perform_once()) ) {
if (j) {
for (size_t k = 0; k < strnlen(s, STATUS_LEN); k++) {
fprintf(stderr, "\b");
}
}
snprintf(s, STATUS_LEN, "%d / %d", i, n);
fprintf(stderr, "%s", s);
j++;
}
for (size_t k = 0; k < strnlen(s, MAX_FILENAME_LEN); k++) {
fprintf(stderr, "\b");
}
fprintf(stderr, "Done!\n");
LinkTable_uninitialised_fill(linktbl);
}
/**
* \brief fill in the gaps in a link table
*/
static void LinkTable_gap_fill(LinkTable *linktbl)
static void LinkTable_invalid_reset(LinkTable *linktbl)
{
for (int i = 0; i < linktbl->num; i++) {
Link *this_link = linktbl->links[i];
if ((this_link->type != LINK_FILE) &&
(this_link->type != LINK_DIR) &&
(this_link->type != LINK_HEAD)) {
Link_get_stat(linktbl->links[i]);
if (this_link->type == LINK_INVALID) {
this_link->type = LINK_UNINITIALISED_FILE;
}
}
/* Block until the gaps are filled */
while (curl_multi_perform_once())
;
}
static void LinkTable_free(LinkTable *linktbl)
{
for (int i = 0; i < linktbl->num; i++) {
@ -381,7 +394,8 @@ HTTP %ld\n", url, http_resp);
LinkTable_fill(linktbl);
} else {
/* Fill in the holes in the link table */
LinkTable_gap_fill(linktbl);
LinkTable_invalid_reset(linktbl);
LinkTable_uninitialised_fill(linktbl);
}
/* Save the link table */

View File

@ -20,7 +20,8 @@ typedef enum {
LINK_HEAD = 'H',
LINK_DIR = 'D',
LINK_FILE = 'F',
LINK_INVALID = 'I'
LINK_INVALID = 'I',
LINK_UNINITIALISED_FILE = 'U'
} LinkType;
/**
@ -74,12 +75,12 @@ void link_system_init();
/**
* \brief Add a link to the curl multi bundle for querying stats
*/
void Link_get_stat(Link *this_link);
void Link_req_file_stat(Link *this_link);
/**
* \brief Set the stats of a link, after curl multi handle finished querying
*/
void Link_set_stat(Link* this_link, CURL *curl);
void Link_set_file_stat(Link* this_link, CURL *curl);
/**
* \brief create a new LinkTable

View File

@ -135,7 +135,7 @@ static void curl_process_msgs(CURLMsg *curl_msg, int n_running_curl,
if (!curl_msg->data.result) {
/* Transfer successful, query the file size */
if (transfer->type == FILESTAT) {
Link_set_stat(transfer->link, curl);
Link_set_file_stat(transfer->link, curl);
}
} else {
fprintf(stderr, "curl_process_msgs(): %d - %s <%s>\n",

View File

@ -3,7 +3,6 @@
#include <execinfo.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -72,3 +71,17 @@ void exit_failure()
exit(EXIT_FAILURE);
}
void erase_string(FILE *file, size_t max_len, char *s)
{
size_t l = strnlen(s, max_len);
for (size_t k = 0; k < l; k++) {
fprintf(file, "\b");
}
for (size_t k = 0; k < l; k++) {
fprintf(file, " ");
}
for (size_t k = 0; k < l; k++) {
fprintf(file, "\b");
}
}

View File

@ -7,6 +7,7 @@
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
/**
* \brief the maximum length of a path and a URL.
@ -17,6 +18,21 @@
/** \brief the maximum length of a filename. */
#define MAX_FILENAME_LEN 255
/**
* \brief append a path
* \details This function appends a path with the next level, while taking the
* trailing slash of the upper level into account.
*
* Please free the char * after use.
*/
char *path_append(const char *path, const char *filename);
/**
* \brief division, but rounded to the nearest integer rather than truncating
*/
int64_t round_div(int64_t a, int64_t b);
/**
* \brief wrapper for pthread_mutex_lock()
*/
@ -32,18 +48,10 @@ void PTHREAD_MUTEX_UNLOCK(pthread_mutex_t *x);
*/
void exit_failure();
/**
* \brief append a path
* \details This function appends a path with the next level, while taking the
* trailing slash of the upper level into account.
*
* Please free the char * after use.
*/
char *path_append(const char *path, const char *filename);
/**
* \brief division, but rounded to the nearest integer rather than truncating
* \brief erase a string from the terminal
*/
int64_t round_div(int64_t a, int64_t b);
void erase_string(FILE *file, size_t max_len, char *s);
#endif