本文简述一下使用C++的libcurl库开发Web信息搜集工具的开发过程。 - 1024 快乐!!

0x00 libcurl的安装

sudo apt-get install libcurl # debian
sudo brew install libcurl # mac

一般安装了curl这个工具的话都应该自带了。

0x01 编译命令

g++ main.cpp curl_controller.cpp  -o curl -lcurl -O3

0x02 源代码 - 暂未支持多线程,明天做

目录结构:

liyingzhes-MacBook-Pro:c_http liyingzhe$ tree -L 1
.
├── CMakeLists.txt
├── cmake-build-debug
├── curl
├── curl_controller.cpp
├── curl_controller.h
└── main.cpp

编译的时候只需要 curl_controller.cpp 、 curl_controller.h 、main.cpp

main.cpp

#include <iostream>
#include <curl/curl.h>
#include <string>
#include <regex>
#include "curl_controller.h"
#include <vector>
#define NUM 5 // 请求次数
int main() {
    struct REGEX REG; // 正则结构体
    /**
     * 单次请求可以直接赋值
     */
    HTTP_REQUEST_OPTIONS global_options;
    global_options.URL = "http://127.0.0.1";
    global_options.USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36";
    global_options.TIME_OUT = 5;

    std::vector <CURL *>curlist; // 发送请求队列
    CURL_CONTROLLER * Controller = new CURL_CONTROLLER;
    /**
     * 设置正则
     */
    REG.Title = "<title>([^<]+)</title>";
    REG.Server = "Server:\\\\s(.*?)\\\\r";
    /**
     * 动态分配内存
     * 用于存储响应信息
     */
    struct ResponseInfo * Response = new ResponseInfo[NUM];
    /**
     * 动态分配内存
     * 给每个请求都设置参数
     */
    struct HTTP_REQUEST_OPTIONS * OPTIONS = new HTTP_REQUEST_OPTIONS[NUM];
    for(int x =0 ; x < 5 ;x++){
        OPTIONS[x].URL = global_options.URL;
        OPTIONS[x].USER_AGENT = global_options.USER_AGENT;
        OPTIONS[x].TIME_OUT = global_options.TIME_OUT;
    }
    /**
     * 将指针放入数组,以便于多线程
     */
    for(int x =0 ; x < NUM ;x++){
        curlist.push_back(Controller->setOpt(&OPTIONS[x],&Response[x]));
    }
    /**
     * 普通for请求
     */
    for(int x = 0; x < NUM; x++ ){
        Controller->RunCurl(curlist[x],&Response[x]);
    }
    /**
     * 取得响应信息
     */
    for(int x =0 ; x < NUM ;x++){
        Controller->getResponse(&Response[x],&REG);
        std::cout << "Code  :" << Response[x].ResponseCode  << "\tTitle :" << Response[x].ResponseTitle << std::endl;
        std::cout << "Header :" << Response[x].ResponseHeader << std::endl;
    }
    /**
     * 释放内存
     */
    delete [] Response;
    delete [] OPTIONS;
    delete Controller;
    return 0;
}

curl_controller.h

//
// Created by liyingzhe on 24/10/17.
//

#ifndef C_HTTP_CURL_CONTROLLER_H
#define C_HTTP_CURL_CONTROLLER_H
#include <iostream>
#include <regex>
#include <curl/curl.h>

/**
 * 正则表达式
 */
struct REGEX{
    std::string Title; // 匹配标题的正则
    std::string Server; // 匹配Server的正则
    std::string Powered; // 匹配Powered的正则
};
/**
 * 设置必备参数
 */
struct HTTP_REQUEST_OPTIONS{
    short TIME_OUT; // 连接超时时间
    std::string URL; // 请求 URL
    std::string USER_AGENT; // 浏览器标识
    // 还可以扩展更多……
};
/**
 * 响应内容结构体
 */
struct ResponseInfo{
    std::string ResponseHeader; //响应头
    std::string ResponseBody; // 响应主体
    short int   ResponseCode; // 响应码
    std::string ResponseTitle; // 响应标题
    std::string ResponseServerInfo; // 响应Server内容
};
class CURL_CONTROLLER{
public:
    /**
     * 初始化 CURL
     */
    CURL_CONTROLLER();
    /**
     * 设置请求参数
     * @param options
     * @param Response
     * @return
     */
    CURL * setOpt(struct HTTP_REQUEST_OPTIONS * options,ResponseInfo * Response);
    /**
     * 获取响应内容
     * @param Response
     * @param REG
     * @return
     */
    ResponseInfo * getResponse(ResponseInfo * Response,REGEX * REG);
    /**
     * 执行请求
     * @param curl
     * @param Response
     */
    void RunCurl(CURL * curl,ResponseInfo * Response);
    /**
     * 释放资源
     */
    ~CURL_CONTROLLER();
    /**
     * CURL指针
     */
    CURL * curl;
private:
    /**
     * 回调函数
     * @param ptr
     * @param n
     * @param m
     * @param data
     * @return
     */
    static size_t callBackWrite(char * ptr,size_t n,size_t m,std::string * data);
};
#endif //C_HTTP_CURL_CONTROLLER_H

curl_controller.cpp

//
// Created by liyingzhe on 24/10/17.
//

#include "curl_controller.h"
/**
 * 回调函数
 * @param ptr
 * @param n
 * @param m
 * @param data
 * @return
 */
size_t CURL_CONTROLLER::callBackWrite(char *ptr, size_t n, size_t m, std::string *data) {
    if (data == NULL) return 0;
    data->append(ptr,n*m);
    return n*m;
}
/**
 * 初始化 CURL
 */
CURL_CONTROLLER::CURL_CONTROLLER() {
    try{
        curl = curl_easy_init();
    }catch (std::exception &e){
        std::cout << "CURL_CONTROLLER::CURL_CONTROLLER " <<e.what() << std::endl;
        exit(-1);
    }
}

/**
 * 设置请求参数
 * @param options
 * @param Response
 * @return
 */
CURL * CURL_CONTROLLER::setOpt(struct HTTP_REQUEST_OPTIONS * options,ResponseInfo * Response) {
    try{
        curl_easy_setopt(curl,CURLOPT_URL,options->URL.c_str());
        curl_easy_setopt(curl,CURLOPT_HEADER,1);
        curl_easy_setopt(curl,CURLOPT_TIMEOUT,options->TIME_OUT);
        curl_easy_setopt(curl,CURLOPT_USERAGENT,options->USER_AGENT.c_str());
        curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,callBackWrite);
        curl_easy_setopt(curl,CURLOPT_WRITEDATA,&Response->ResponseBody);
        curl_easy_setopt(curl,CURLOPT_WRITEHEADER,&Response->ResponseHeader); //header 头
        curl_easy_setopt(curl,CURLOPT_SSL_VERIFYPEER,false);
        curl_easy_setopt(curl,CURLOPT_SSL_VERIFYHOST,false);
        RunCurl(curl,Response);
    }catch (std::exception &e){
        std::cout <<  "CURL_CONTROLLER::setOpt "<<e.what() << std::endl;
    }
    return curl;
}
/**
 * 执行请求
 * @param curl
 * @param Response
 */
void CURL_CONTROLLER::RunCurl(CURL *curl, ResponseInfo *Response) {
    try{
        curl_easy_perform(curl);
        curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE,&Response->ResponseCode);
    }catch (std::exception &e){
        std::cout << "CURL_CONTROLLER::RunCurl " <<e.what() << std::endl;
    }
}
/**
 * 获取响应内容
 * @param Response
 * @param REG
 * @return
 */
ResponseInfo * CURL_CONTROLLER::getResponse(ResponseInfo * Response,REGEX * REG) {
    std::smatch match_title;
    std::regex title(REG->Title);
    if (regex_search(Response->ResponseBody, match_title, title)) {
        Response->ResponseTitle=match_title[1].str();
    }
    std::smatch match_server;
    std::regex server(REG->Server);
    if (regex_search(Response->ResponseHeader, match_server, server)) {
        Response->ResponseServerInfo=match_title[1].str();
    }
    return Response;
}
/**
 * 释放资源
 */
CURL_CONTROLLER::~CURL_CONTROLLER() {
    curl_easy_cleanup(curl);
}

明天继续写吧~ 先去下厨了!