[转载]crawler_基础之_httpclient 访问网络资源 - cphmvp - 博客园

[转载]crawler_基础之_httpclient 访问网络资源 – cphmvp – 博客园.

先粘贴一个 简单版的,后期再修改

pom文件

  <dependency>
      <groupId>org.apache.httpcomponents</groupId>
      <artifactId>httpasyncclient</artifactId>
      <version>4.0-alpha3</version>
      <scope>compile</scope>
    </dependency>
package com.cph.utils;

import java.io.IOException;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;

/**
 * httpclient 帮助类<br>
 * 4.1.2测试版
 * 
 * @author cphmvp
 * 
 */
public class HttpClientUtil {
    private static String encoding = "UTF-8";

    /**
     * get方式下载
     * 
     * @param url
     */
    public static String downloadGet(String url) {
        String htmls = null;
        DefaultHttpClient client = new DefaultHttpClient();
        // 新建get请求
        HttpUriRequest request = new HttpGet(url);
        // 封装请求头
        pageRequest(request);
        // 声明响应
        HttpResponse response = null;
        // 响应实体
        HttpEntity entity = null;
        try {
            response = client.execute(request);
            System.out.println("响应码: "
                    + response.getStatusLine().getStatusCode());
            if (HttpStatus.SC_OK == response.getStatusLine().getStatusCode()) {
                entity = response.getEntity();
                byte[] content = EntityUtils.toByteArray(entity);
                htmls = new String(content, encoding);
            }
        } catch (ClientProtocolException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            // 关闭客户端
            client.getConnectionManager().shutdown();
        }
        return htmls;
    }

    /**
     * 封装请求头
     * 
     * @param request
     */
    private static void pageRequest(HttpUriRequest request) {
        // 设置浏览器版本
        request.setHeader(
                "User-Agent",
                "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; MyIE 2.0 Beta 2; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E; InfoPath.3)");
        // 设置cookie refer等
        request.setHeader(
                "Cookie",
                "RK=hki7lw6qHP; wbilang_821910184=zh_CN; wbilang_10000=zh_CN; dm_login_weixin_rem=; dm_login_weixin_scan=; wb_regf=%3B0%3B%3Bwww.baidu.com%3B0; mb_reg_from=8; ts_last=t.qq.com/; ts_refer=search.t.qq.com/index.php; ts_uid=7492426386; wbilang_384871492=zh_CN; ts_last=1.t.qq.com/wolongxian; ts_refer=www.baidu.com/s; ts_uid=7492426386; pgv_pvid=1942759996; pgv_info=ssid=s5111200112; o_cookie=384871492; ptui_loginuin=821910184; ptisp=cnc; ptcz=9c03596fa66d550bcd5c8cd812f16ad5d6c2074604285851a218c478774eb6bb; luin=o0821910184; lskey=00010000b43bed256a14b910da63ac03a1c1a042994fea6a8a7078dcb2ea566d5dc09188883ddddd1f7feadb; pt2gguin=o0821910184; uin=o0821910184; skey=@xObtCqUUW; p_uin=o0821910184; p_skey=swqZymgXczQrTdTin9Qe44jMT5cTNoTeSzaXrxDjs3k_; pt4_token=OlMTg1UJSdPz-VzgfdEgFQ__; p_luin=o0821910184; p_lskey=000400001663db9b9783c84586b6d929044d17e291916d1cfcfb93c0f520f05e8c85adc89dffc94e52b1325e");
    }

    public static void main(String[] args) {
        String url = "http://www.baidu.com/";
        System.out.println(downloadGet(url));
    }
}
赞(0) 打赏
分享到: 更多 (0)

觉得文章有用就打赏一下文章作者

支付宝扫一扫打赏

微信扫一扫打赏