Implementing an HTTP Downloader in C

Click the blue text
Implementing an HTTP Downloader in C
Follow us

Due to changes in the public account push rules, please click “View” and add “Star” to get exciting technical shares at the first time

Source from the internet, please delete if infringing

This time I would like to share an HTTP downloader implemented in C. For example, when doing OTA upgrade functions, we often only get the link to the upgrade package, and we need to download it ourselves, which requires an HTTP downloader.

Here is a share:

Function:

1. Supports downloading using chunked transfer

2. Can download redirected pages when redirected

3. The interface to be implemented is int http_download(char *url, char *save_path)

Thoughts:

1. Parse the input URL to separate the host, port number, and file path information

2. Parse the host’s DNS

3. Fill in the HTTP request header and send the package to the server

4. Parse the received HTTP header, extract status code, Content-length, Transfer-Encoding, and other field information

(1) If it is a normal header, proceed to the next normal receiving process

(2) If the status code is 302, extract the redirect address from the header and restart the download action with the new address

(3) If the transfer method is chunked, read data in segments and concatenate

(4) If it is a 404 or other status code, print error information

Defects:

  • Too many error handling cases make the code look uncomfortable

Others:

1. How to port it to a system without a file system?

Just modify the saving part in the sava_data interface

2. How to improve download speed?

  • Increase the read/write buffer size
  • Change to multithreading, use the Range field for segmented reading, and then concatenate them together

Code:

/************************************************************
Copyright (C), 2016, Leon, All Rights Reserved.
FileName: download.c
coding: UTF-8
Description: Implement a simple HTTP download function
Author: Leon
Version: 1.0
Date: 2016-12-2 10:49:32
Function:

History:
<author>    <time>  <version>   <description>
 Leon

************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <arpa inet.h="">
#include <sys types.h="">
#include <sys socket.h="">
#include <fcntl.h>
#include <unistd.h>
#include <netdb.h>
#include <errno.h>

#define HOST_NAME_LEN   256
#define URI_MAX_LEN     2048
#define RECV_BUF        8192
#define RCV_SND_TIMEOUT (10*1000)   //Receiving and sending data timeout (ms)

typedef struct {
    int sock;                       //Socket for communication with the server
    FILE *in;                       //Convert sock descriptor to file pointer for easy read/write
    char host_name[HOST_NAME_LEN];  //Host name
    int port;                       //Host port number
    char uri[URI_MAX_LEN];          //Resource path
    char buffer[RECV_BUF];          //Read/write buffer
    int status_code;                //HTTP status code
    int chunked_flag;               //Chunked transfer flag
    int len;                        //Length in Content-length
    char location[URI_MAX_LEN];     //Redirect address
    char *save_path;                //Pointer to save content path
    FILE *save_file;                //File pointer for saving content
    int recv_data_len;              //Total length of received data
    time_t start_recv_time;         //Start receiving data time
    time_t end_recv_time;           //End receiving data time
} http_t;

/* Print macro */
#define MSG_DEBUG   0x01
#define MSG_INFO    0x02
#define MSG_ERROR   0x04

static int print_level = /*MSG_DEBUG |*/ MSG_INFO | MSG_ERROR;

#define lprintf(level, format, argv...) do{     
    if(level & print_level)     
        printf("[%s][%s(%d)]:"format, #level, __FUNCTION__, __LINE__, ##argv);  
}while(0)

#define MIN(x, y) ((x) > (y) ? (y) : (x))

#define HTTP_OK         200
#define HTTP_REDIRECT   302
#define HTTP_NOT_FOUND  404

/* Case-insensitive strstr */
char *strncasestr(char *str, char *sub)
{
    if(!str || !sub)
        return NULL;

    int len = strlen(sub);
    if (len == 0)
    {
        return NULL;
    }

    while (*str)
    {
        if (strncasecmp(str, sub, len) == 0)
        {
            return str;
        }
        ++str;
    }
    return NULL;
}

/* Parse URL, return 0 on success, -1 on failure */
/* http://127.0.0.1:8080/testfile */
int parser_URL(char *url, http_t *info)
{
    char *tmp = url, *start = NULL, *end = NULL;
    int len = 0;

    /* Skip http:// */
    if(strncasestr(tmp, "http://"))
    {   
        tmp += strlen("http://");
    }
    start = tmp;
    if(!(tmp = strchr(start, '/')))
    {
        lprintf(MSG_ERROR, "url invaild\n");
        return -1;      
    }
    end = tmp;

    /*Parse port number and host*/
    info->port = 80;   //Default value 80

    len = MIN(end - start, HOST_NAME_LEN - 1);
    strncpy(info->host_name, start, len);
    info->host_name[len] = '\0';

    if((tmp = strchr(start, ':')) && tmp < end)
    {
        info->port = atoi(tmp + 1);
        if(info->port <= 0 || info->port >= 65535)
        {
            lprintf(MSG_ERROR, "url port invaild\n");
            return -1;
        }
        /* Override previous assignment */
        len = MIN(tmp - start, HOST_NAME_LEN - 1);
        strncpy(info->host_name, start, len);
        info->host_name[len] = '\0';
    }

    /* Copy uri */
    start = end;
    strncpy(info->uri, start, URI_MAX_LEN - 1);

    lprintf(MSG_INFO, "parse url ok\nhost:%s, port:%d, uri:%s\n", 
        info->host_name, info->port, info->uri);
    return 0;
}

/* DNS resolution, returns the first resolved address, -1 on failure, returns corresponding address on success */
unsigned long dns(char* host_name)
{
    struct hostent* host;
    struct in_addr addr;
    char **pp;

    host = gethostbyname(host_name);
    if (host == NULL)
    {
        lprintf(MSG_ERROR, "gethostbyname %s failed\n", host_name);
        return -1;
    }

    pp = host->h_addr_list;

    if (*pp!=NULL)
    {
        addr.s_addr = *((unsigned int *)*pp);
        lprintf(MSG_INFO, "%s address is %s\n", host_name, inet_ntoa(addr));
        pp++;
        return addr.s_addr;
    }

    return -1;
}

/* Set send and receive timeout */
int set_socket_option(int sock)
{
    struct timeval timeout;

    timeout.tv_sec = RCV_SND_TIMEOUT/1000;
    timeout.tv_usec = RCV_SND_TIMEOUT%1000*1000;
    lprintf(MSG_DEBUG, "%ds %dus\n", (int)timeout.tv_sec, (int)timeout.tv_usec);
    //Set socket to non-blocking
    // fcntl(sock ,F_SETFL, O_NONBLOCK); //Connect needs to be handled again in non-blocking mode

    // Set send timeout
    if(-1 == setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *)&timeout, 
            sizeof(struct timeval)))
    {
        lprintf(MSG_ERROR, "setsockopt error: %m\n");
        return -1;
    }

    // Set receive timeout
    if(-1 == setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, 
            sizeof(struct timeval)))
    {
        lprintf(MSG_ERROR, "setsockopt error: %m\n");
        return -1;
    }

    return 0;
}

/* Connect to server */
int connect_server(http_t *info)
{
    int sockfd;
    struct sockaddr_in server;
    unsigned long addr = 0;
    unsigned short port = info->port;

    sockfd = socket(AF_INET, SOCK_STREAM, 0);
    if (-1 == sockfd)
    {
        lprintf(MSG_ERROR, "socket create failed\n");
        goto failed;
    }

    if(-1 == set_socket_option(sockfd))
    {
        goto failed;
    }

    if ((addr = dns(info->host_name)) == -1)
    {
        lprintf(MSG_ERROR, "Get Dns Failed\n");
        goto failed;
    }
    memset(&server, 0, sizeof(server));
    server.sin_family = AF_INET; 
    server.sin_port = htons(port); 
    server.sin_addr.s_addr = addr;

    if (-1 == connect(sockfd, (struct sockaddr *)&server, sizeof(struct sockaddr)))
    {
        lprintf(MSG_ERROR, "connect failed: %m\n");
        goto failed;
    }

    info->sock = sockfd;
    return 0;

failed:
    if(sockfd != -1)
        close(sockfd);
    return -1;
}

/* Send HTTP request */
int send_request(http_t *info)
{
    int len;

    memset(info->buffer, 0x0, RECV_BUF);
    snprintf(info->buffer, RECV_BUF - 1, "GET %s HTTP/1.1\r\n"
        "Accept: */*\r\n"
        "User-Agent: Mozilla/5.0 (compatible; MSIE 5.01; Windows NT 5.0)\r\n"
        "Host: %s\r\n"
        "Connection: Close\r\n\r\n", info->uri, info->host_name);

    lprintf(MSG_DEBUG, "request:\n%s\n", info->buffer);
    return send(info->sock, info->buffer, strlen(info->buffer), 0);
}

/* Parse HTTP header */
int parse_http_header(http_t *info)
{
    char *p = NULL;

    // Parse the first line
    fgets(info->buffer, RECV_BUF, info->in);
    p = strchr(info->buffer, ' ');
    // Simple check if the first line of the HTTP header is valid
    if(!p || !strcasestr(info->buffer, "HTTP"))
    {
        lprintf(MSG_ERROR, "bad http head\n");
        return -1;
    }
    info->status_code = atoi(p + 1);   
    lprintf(MSG_DEBUG, "http status code: %d\n", info->status_code);

    // Loop to read and parse the HTTP header
    while(fgets(info->buffer, RECV_BUF, info->in))
    {
        // Check if the header has been read completely
        if(!strcmp(info->buffer, "\r\n"))
        {
            return 0;   /* Header parsed normally */
        }
        lprintf(MSG_DEBUG, "%s", info->buffer);
        // Parse length Content-length: 554
        if(p = strncasestr(info->buffer, "Content-length"))
        {
            p = strchr(p, ':');
            p += 2;     // Skip colon and the following space
            info->len = atoi(p);
            lprintf(MSG_INFO, "Content-length: %d\n", info->len);
        }
        else if(p = strncasestr(info->buffer, "Transfer-Encoding"))
        {
            if(strncasestr(info->buffer, "chunked"))
            {
                info->chunked_flag = 1;
            }
            else
            {
                /* Does not support other encoding transfer methods */
                lprintf(MSG_ERROR, "Not support %s", info->buffer);
                return -1;
            }
            lprintf(MSG_INFO, "%s", info->buffer);
        }
        else if(p = strncasestr(info->buffer, "Location"))
        {
            p = strchr(p, ':');
            p += 2;     // Skip colon and the following space
            strncpy(info->location, p, URI_MAX_LEN - 1);
            lprintf(MSG_INFO, "Location: %s\n", info->location);
        }
    }
    lprintf(MSG_ERROR, "bad http head\n");
    return -1;  /* Header parsing error */
}

/* Save the content of the server response */
int save_data(http_t *info, const char *buf, int len)
{
    int total_len = len;
    int write_len = 0;

    // If the file is not opened, open it first
    if(!info->save_file)
    {
        info->save_file = fopen(info->save_path, "w");
        if(!info->save_file)
        {
            lprintf(MSG_ERROR, "fopen %s error: %m\n", info->save_path);
            return -1;
        }
    }

    while(total_len)
    {
        write_len = fwrite(buf, sizeof(char), len, info->save_file);
        if(write_len < len && errno != EINTR)
        {
            lprintf(MSG_ERROR, "fwrite error: %m\n");
            return -1;
        }
        total_len -= write_len;
    }
}

/* Read data */
int read_data(http_t *info, int len)
{
    int total_len = len;
    int read_len = 0;
    int rtn_len = 0;

    while(total_len)
    {
        read_len = MIN(total_len, RECV_BUF);
        // lprintf(MSG_DEBUG, "need read len: %d\n", read_len);
        rtn_len = fread(info->buffer, sizeof(char), read_len, info->in);
        if(rtn_len < read_len)
        {
            if(ferror(info->in))
            {
                if(errno == EINTR) /* Signal interrupted the read operation */
                {
                    ;   /* Do nothing and continue */
                }
                else if(errno == EAGAIN || errno == EWOULDBLOCK) /* Timeout */
                {
                    lprintf(MSG_ERROR, "socket receive timeout: %dms\n", RCV_SND_TIMEOUT);
                    total_len -= rtn_len;
                    lprintf(MSG_DEBUG, "read len: %d\n", rtn_len);
                    break;
                }
                else    /* Other errors */
                {
                    lprintf(MSG_ERROR, "fread error: %m\n");
                    break;
                }
            }
            else    /* Reached the end of the file */
            {
                lprintf(MSG_ERROR, "socket closed by peer\n");
                total_len -= rtn_len;
                lprintf(MSG_DEBUG, "read len: %d\n", rtn_len);
                break;
            }
        }

        // lprintf(MSG_DEBUG, " %s\n", info->buffer);
        total_len -= rtn_len;
        lprintf(MSG_DEBUG, "read len: %d\n", rtn_len);
        if(-1 == save_data(info, info->buffer, rtn_len))
        {
            return -1;
        }
        info->recv_data_len += rtn_len;
    }
    if(total_len != 0)
    {
        lprintf(MSG_ERROR, "we need to read %d bytes, but read %d bytes now\n", 
            len, len - total_len);
        return -1;
    }
}

/* Receive chunked data from the server */
int recv_chunked_response(http_t *info)
{
    long part_len;

    // There is chunked, Content length is not present
    do{
        // Get the length of this part
        fgets(info->buffer, RECV_BUF, info->in);
        part_len = strtol(info->buffer, NULL, 16);
        lprintf(MSG_DEBUG, "part len: %ld\n", part_len);
        if(-1 == read_data(info, part_len))
            return -1;

        // Read the following \r\n two characters
        if(2 != fread(info->buffer, sizeof(char), 2, info->in))
        {
            lprintf(MSG_ERROR, "fread \r\n error : %m\n");
            return -1;
        }
    }while(part_len);
    return 0;
}

/* Calculate average download speed, unit byte/s */
float calc_download_speed(http_t *info)
{
    int diff_time = 0; 
    float speed = 0.0;

    diff_time = info->end_recv_time - info->start_recv_time;
    /* Minimum interval of 1s to avoid calculating floating point result as inf */
    if(0 == diff_time)
        diff_time = 1;
    speed = (float)info->recv_data_len / diff_time;

    return  speed;
}

/* Receive the response data from the server */
int recv_response(http_t *info)
{
    int len = 0, total_len = info->len;

    if(info->chunked_flag)
        return recv_chunked_response(info);

    if(-1 == read_data(info, total_len))
        return -1;

    return 0;
}

/* Cleanup operation */
void clean_up(http_t *info)
{
    if(info->in)
        fclose(info->in);
    if(-1 != info->sock)
        close(info->sock);
    if(info->save_file)
        fclose(info->save_file);
    if(info)
        free(info);
}

/* Main download function */
int http_download(char *url, char *save_path)
{
    http_t *info = NULL;
    char tmp[URI_MAX_LEN] = {0};

    if(!url || !save_path)
        return -1;

    // Initialize structure
    info = malloc(sizeof(http_t));
    if(!info)
    {
        lprintf(MSG_ERROR, "malloc failed\n");
        return -1;
    }
    memset(info, 0x0, sizeof(http_t));
    info->sock = -1;
    info->save_path = save_path;

    // Parse URL
    if(-1 == parser_URL(url, info))
        goto failed;

    // Connect to server
    if(-1 == connect_server(info))
        goto failed;

    // Send HTTP request message
    if(-1 == send_request(info))
        goto failed;

    // Receive response header information
    info->in = fdopen(info->sock, "r");
    if(!info->in)
    {
        lprintf(MSG_ERROR, "fdopen error\n");
        goto failed;
    }

    // Parse header
    if(-1 == parse_http_header(info))
        goto failed;

    switch(info->status_code)
    {
        case HTTP_OK:
            // Receive data
            lprintf(MSG_DEBUG, "recv data now\n");
            info->start_recv_time = time(0);
            if(-1 == recv_response(info))
                goto failed;

            info->end_recv_time = time(0);
            lprintf(MSG_INFO, "recv %d bytes\n", info->recv_data_len);
            lprintf(MSG_INFO, "Average download speed: %.2fKB/s\n", 
                    calc_download_speed(info)/1000);
            break;
        case HTTP_REDIRECT:
            // Restart this function
            lprintf(MSG_INFO, "redirect: %s\n", info->location);
            strncpy(tmp, info->location, URI_MAX_LEN - 1);
            clean_up(info);
            return http_download(tmp, save_path);

        case HTTP_NOT_FOUND:
            // Exit
            lprintf(MSG_ERROR, "Page not found\n");
            goto failed;
            break;

        default:
            lprintf(MSG_INFO, "Not supported http code %d\n", info->status_code);
            goto failed;
    }

    clean_up(info);
    return 0;
failed:
    clean_up(info);
    return -1;
}

/****************************************************************************
Test cases:
(1) Chunked receive test
./a.out "http://www.httpwatch.com/httpgallery/chunked/chunkedimage.aspx" test.aspx
(2) Redirect test
./a.out "192.168.10.1/main.html" test.txt
(3) Error input test
./a.out "32131233" test.txt
(4) Root directory input test
./a.out "www.baidu.com/" test.txt
(5) Port number access test
./a.out "192.168.0.200:8000/FS_AC6V1.0BR_V15.03.4.12_multi_TD01.bin" test.txt
****************************************************************************/

int main(int argc, char *argv[])
{
    if(argc < 3)
        return -1;

    http_download(argv[1], argv[2]);
    return 0;
}</errno.h></netdb.h></unistd.h></fcntl.h></sys></sys></arpa></string.h></stdlib.h></stdio.h></description></version></time></author>

If you are over 18 years old and find learning <strong>C language</strong> too difficult? Want to try other programming languages? Then I recommend you learn <strong>Python</strong>, currently, there is a free Python course worth 499 yuan available for a limited time, limited to 10 spots!




▲ Scan the QR code - Get it for free


Implementing an HTTP Downloader in C
Click to read the original text to learn more

Leave a Comment