本篇文章主要是记录一下使用CURL的坑

0x01 类的定义

class http {
public:
    http();
    http(std::string url, unsigned int port = 80);  // 设置请求地址以及端口
    bool setURL(std::string url);    // 设置请求地址
    bool setPort(unsigned int port); // 设置端口
    bool getRequest();  // get 方法
    bool postRequest(std::string data = ""); // post 方法
    bool setCookie(std::string cookie = "");
    bool addRequestHeaders(std::string header); // 添加请求头
    std::string getTitle(); // 获取标题
    std::string getResponse(); // 获取响应内容
    std::string getResponse(std::string Key); // 根据指定的键名获取键值 例子:getResponse("Server"); // BWS/1.1
    ~http(); // 释放资源
private:
    unsigned int _responseCode; // 响应码
    unsigned int _timeout = 2; // 超时时间
    unsigned short int _connectTimeout = 1; // 连接超时时间
    bool _request(); // 核心请求方法
    CURL * curl = NULL; // curl指针
    CURLcode _res; // 请求返回值
    std::string _url; // 请求地址
    unsigned int _port; // 请求端口
    std::string _title; // 响应标题
    std::string _responseHeader; // 响应头
    std::string _responseBody; // 响应主体内容
    bool isRequested; // 是否请求完毕
    curl_slist * _requestHeaders=NULL; // 请求头列表 headers 
    curl_slist * _responseCookie=NULL; // 响应Cookie
    static size_t _callBackWrite(char *ptr, size_t n, size_t m, std::string *data); // 回调函数,用于将响应内容放入内存操作
};

0x02 主要请求方法的实现

/**
 * 发送请求
 * @return
 */
bool http::_request() {
    curl_easy_setopt(curl,CURLOPT_TIMEOUT,_timeout);
    curl_easy_setopt(curl,CURLOPT_CONNECTTIMEOUT,_connectTimeout);
    curl_easy_setopt(curl,CURLOPT_SSL_VERIFYPEER,false);
    curl_easy_setopt(curl,CURLOPT_SSL_VERIFYHOST,false);
    curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,_callBackWrite);
    curl_easy_setopt(curl,CURLOPT_WRITEHEADER,&_responseHeader);
    curl_easy_setopt(curl,CURLOPT_WRITEDATA,&_responseBody);
    if(_requestHeaders != NULL){
        curl_easy_setopt(curl,CURLOPT_HTTPHEADER,_requestHeaders);
    }
    curl_easy_setopt(curl, CURLOPT_FORBID_REUSE, 1);
    _res = curl_easy_perform(curl);
    curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE,&_responseCode);
    isRequested = true;
}

0x03 POST与GET的区别

/**
 * post请求
 * @param data
 * @return
 */
bool http::postRequest(std::string data) {
    curl_easy_setopt(curl,CURLOPT_POST,1);
    curl_easy_setopt(curl,CURLOPT_POSTFIELDS,data.c_str());
    _request();
}

/**
 * get请求
 * @return
 */
bool http::getRequest() {
    _request();
}

POST比GET多两步操作,需要设置两个值

0x04 获取标签内容 - HTML解析

使用htmlcxx库,安装命令:sudo apt-get install libhtmlcxx-dev

包含头文件:#include <htmlcxx/html/ParserDom.h>

/**
 * 返回页面标题
 * @return
 */
std::string http::getTitle() {
    htmlcxx::HTML::ParserDom parserDom;
    tree<htmlcxx::HTML::Node>dom = parserDom.parseTree(_responseBody);
    tree<htmlcxx::HTML::Node>::iterator it = dom.begin();
    tree<htmlcxx::HTML::Node>::iterator end = dom.end();
    for(; it != end; ++it)
    {
        if (it->tagName()=="title") // 如果标签名为title
        {
            it++;
            _title.append(it->text()); // 返回 title 标签内容
        }
    }
    return _title;
}

0x05 根据键名获取键值 - 搜集信息

主要是字符串的操作

/**
 * 通过键名获取键值
 * @param Key
 * @return
 */
std::string http::getResponse(std::string Key) {
    std::string _del = "\r\n";
    std::string::size_type pos1, pos2;
    std::string _lineStr;
    pos2 = _responseHeader.find(_del);
    pos1 = 0;
    while(std::string::npos != pos2)
    {
        _lineStr  =  _responseHeader.substr(pos1, pos2-pos1);
        if(_lineStr.find(Key) == 0){
            return _lineStr.substr(Key.length()+2);
        }

        pos1 = pos2 + _del.size();
        pos2 = _responseHeader.find(_del, pos1);
    }
    if(pos1 != _responseHeader.length()){
        if(_lineStr.find(Key) == 0){
            return _lineStr.substr(Key.length()+2);
        }
    }
    return NULL;
}

0x06 源代码

http.h

//
// Created by payloads on 1/22/18.
//

#ifndef MYPROJECT_HTTP_H
#define MYPROJECT_HTTP_H

#include <iostream>
#include <string>
#include <vector>
#include <regex>
#include <curl/curl.h>
#include <htmlcxx/html/ParserDom.h>
#include <sstream>

class http {
public:
    http();
    http(std::string url, unsigned int port = 80);  // 设置请求地址以及端口
    bool setURL(std::string url);    // 设置请求地址
    bool setPort(unsigned int port); // 设置端口
    bool getRequest();  // get 方法
    bool postRequest(std::string data = ""); // post 方法
    bool setCookie(std::string cookie = "");
    bool addRequestHeaders(std::string header);
    std::string getTitle(); // 获取标题
    std::string getResponse(); // 获取响应内容
    std::string getResponse(std::string Key);
    ~http();
private:
    unsigned int _responseCode;
    unsigned int _timeout = 2;
    unsigned short int _connectTimeout = 1;
    bool _request();
    CURL * curl = NULL;
    CURLcode _res;
    std::string _url;
    unsigned int _port;
    std::string _title;
    std::string _responseHeader;
    std::string _responseBody;
    bool isRequested;
    curl_slist * _requestHeaders=NULL;
    curl_slist * _responseCookie=NULL;
    static size_t _callBackWrite(char *ptr, size_t n, size_t m, std::string *data);
};

#endif //MYPROJECT_HTTP_H

http.cpp

//
// Created by payloads on 1/22/18.
//

#include "http.h"

/**
 * 初始化请求对象
 */
http::http() {
    if(curl == NULL){
        isRequested = false;
        curl = curl_easy_init();
    }
}

/**
 * 回收资源
 */
http::~http() {
    curl_slist_free_all(_requestHeaders);
    curl_slist_free_all(_responseCookie);
    curl_easy_cleanup(curl);
}

/**
 * 设置请求地址和端口
 * @param url
 * @param port
 */
http::http(std::string url, unsigned int port) {
    if(curl == NULL){
        curl = curl_easy_init();
        _url  = url;
        if(url.substr(0,5) == "https"){
            _port = 443;
        }else{
            _port = port;
        }

        curl_easy_setopt(curl,CURLOPT_URL,_url.c_str());
        curl_easy_setopt(curl,CURLOPT_PORT,_port);

    }
}

/**
 * 设置请求地址
 * @param url
 * @return
 */
bool http::setURL(std::string url) {
    curl_easy_setopt(curl,CURLOPT_URL,url.c_str());
}

/**
 * 设置端口
 * @param port
 * @return
 */
bool http::setPort(unsigned int port) {
    curl_easy_setopt(curl,CURLOPT_PORT,port);
}


/**
 * 发送请求
 * @return
 */
bool http::_request() {
    curl_easy_setopt(curl,CURLOPT_TIMEOUT,_timeout);
    curl_easy_setopt(curl,CURLOPT_CONNECTTIMEOUT,_connectTimeout);
    curl_easy_setopt(curl,CURLOPT_SSL_VERIFYPEER,false);
    curl_easy_setopt(curl,CURLOPT_SSL_VERIFYHOST,false);
    curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,_callBackWrite);
    curl_easy_setopt(curl,CURLOPT_WRITEHEADER,&_responseHeader);
    curl_easy_setopt(curl,CURLOPT_WRITEDATA,&_responseBody);
    if(_requestHeaders != NULL){
        curl_easy_setopt(curl,CURLOPT_HTTPHEADER,_requestHeaders);
    }
    curl_easy_setopt(curl, CURLOPT_FORBID_REUSE, 1);
    _res = curl_easy_perform(curl);
    curl_easy_getinfo(curl,CURLINFO_RESPONSE_CODE,&_responseCode);
    isRequested = true;
}

/**
 * 回调函数
 * @param ptr
 * @param n
 * @param m
 * @param data
 * @return
 */
size_t http::_callBackWrite(char *ptr, size_t n, size_t m, std::string *data) {
        if (data == NULL) return 0;
        data->append(ptr,n*m);
        return n*m;
}

/**
 * get请求
 * @return
 */
bool http::getRequest() {
    _request();
}

/**
 * post请求
 * @param data
 * @return
 */
bool http::postRequest(std::string data) {
    curl_easy_setopt(curl,CURLOPT_POST,1);
    curl_easy_setopt(curl,CURLOPT_POSTFIELDS,data.c_str());
    _request();
}


/**
 * 设置Cookie
 * @param cookie
 * @return
 */
bool http::setCookie(std::string cookie) {
    curl_easy_setopt(curl,CURLOPT_COOKIE,cookie.c_str());
}

/**
 * 设置请求头
 * @param header
 * @return
 */
bool http::addRequestHeaders(std::string header) {
    _requestHeaders = curl_slist_append(_requestHeaders,header.c_str());
}

/**
 * 返回页面标题
 * @return
 */
std::string http::getTitle() {
    htmlcxx::HTML::ParserDom parserDom;
    tree<htmlcxx::HTML::Node>dom = parserDom.parseTree(_responseBody);
    tree<htmlcxx::HTML::Node>::iterator it = dom.begin();
    tree<htmlcxx::HTML::Node>::iterator end = dom.end();
    for(; it != end; ++it)
    {
        if (it->tagName()=="title")
        {
            it++;
            _title.append(it->text());
        }
    }
    return _title;
}

/**
 * 返回响应头
 * @return
 */
std::string http::getResponse() {
    return _responseHeader;
}

/**
 * 通过键名获取键值
 * @param Key
 * @return
 */
std::string http::getResponse(std::string Key) {
    std::string _del = "\r\n";
    std::string::size_type pos1, pos2;
    std::string _lineStr;
    pos2 = _responseHeader.find(_del);
    pos1 = 0;
    while(std::string::npos != pos2)
    {
        _lineStr  =  _responseHeader.substr(pos1, pos2-pos1);
        if(_lineStr.find(Key) == 0){
            return _lineStr.substr(Key.length()+2);
        }

        pos1 = pos2 + _del.size();
        pos2 = _responseHeader.find(_del, pos1);
    }
    if(pos1 != _responseHeader.length()){
        if(_lineStr.find(Key) == 0){
            return _lineStr.substr(Key.length()+2);
        }
    }
    return NULL;
}

main.cpp

#include <iostream>
#include "http.h"
int main(int argc,char * argv[]) {
    std::cout << "args count :" << argc << std::endl;
    http client("https://www.baidu.com/");
    if(argc >= 2){
        client.setURL(argv[1]);
    }
    client.getRequest();
    std::cout << client.getTitle()<<std::endl;
    std::cout << client.getResponse("Set-Cookie")<<std::endl;
    return 0;
}

0x07 参考

  • C++ Html解析器-HtmlCxx用户手册和源代码解析 : https://www.cnblogs.com/zhanglanyun/archive/2011/10/21/2220647.html
  • C++ 分割字符串两种方法:https://www.cnblogs.com/dfcao/p/cpp-FAQ-split.html
  • libcurl - API:https://curl.haxx.se/libcurl/c/

0x08 结语

后面继续扩展代理功能、并不是很难

希望能够把轮子造好,为后面的项目开发做准备。