HttpClient/HttpUrlConnect之代理、重定向、跨域和模拟登录(session)

96
陨石坠灭
2018.11.19 11:26 字数 442

HttpClient和HttpUrlConnect都是java的第三方jar包,可以在maven仓库里面查询到,这两个库都是和强大的类,可以用来模拟浏览器的一些行为,从而实现网页抓取和接口调用。

这两个jar包使用其中一个就可以,其中HttpUrlConnectjar包体积更加小,HttpURLConnection与 HttpClient 区别/性能测试对比,在实现一些业务的时候发现了一些技术点,所以写此篇文章记录下来

下载/配置

直接下载

okhttp-urlconnection-3.11.0.jar

httpclient-4.5.3.jar

maven配置

如果使用maven, 则在pom.xml添加如下配置

<!-- https://mvnrepository.com/artifact/com.squareup.okhttp3/okhttp-urlconnection -->
<dependency>
    <groupId>com.squareup.okhttp3</groupId>
    <artifactId>okhttp-urlconnection</artifactId>
    <version>3.11.0</version>
</dependency>

如果采用HttpClient

<!-- https://mvnrepository.com/artifact/org.apache.httpcomponents/httpclient -->
<dependency>
    <groupId>org.apache.httpcomponents</groupId>
    <artifactId>httpclient</artifactId>
    <version>4.5.3</version>
</dependency>

gradle配置

如果使用gradle

compile 'com.squareup.okhttp3:okhttp-urlconnection:3.11.0'

工具类

这样的工具类有点多,直接百度 “httpClient 工具类”或者 “HttpUrlConnect 工具类”

以下是本人搜到的两篇文章

通用HttpClientUtil工具类

HttpURLConnection网络请求工具类

以下都以HttpUrlConnect为例

关于代理

Java Http连接中(HttpURLConnection)中使用代理(Proxy)及其验证(Authentication)

private static boolean useProxy = false;
private static Proxy proxy;
...
public static void setProxy(String host,int port){
    Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(host, port));
    HttpUtils.proxy = proxy;
}
    
public static void setUseProxy(boolean useProxy){
    HttpUtils.useProxy = useProxy;
}

public static HttpURLConnection getConne(URL url) throws IOException{
    if(url == null)return null;
    return useProxy && proxy != null ? (HttpURLConnection) url.openConnection(proxy) : (HttpURLConnection) url.openConnection();
}
...

关于重定向

方法1:将followRedirects设置为true即可

conn.setInstanceFollowRedirects(true);

方法2: 可以得到重定向的地址,并在重定向时做一些处理

ResponseCode为301,302

  • HttpStatus.SC_MOVED_PERMANENTLY
  • HttpStatus.SC_MOVED_TEMPORARILY
conn.setInstanceFollowRedirects(false);
...
int resCode = conn.getResponseCode();
if(resCode == 301 || resCode == 302){
  String location = conn.getHeaderField("location");
  return doGet(location);
}

关于跨域、session问题

由于模拟的是浏览器,所以不需要考虑跨域问题,除非用的是本地的网页,由于不在同一个域,才会有跨域问题,至于session,只需要将cookie信息保留即可,因为大部分服务器都是将sessionid存储在cookie中的,当然得注意一下作用域的问题

private static String cookieVal="";
...
public static String doGet(String urlStr) {
...
  if(cookieVal != null && cookieVal.trim().length() > 0){               
    conn.setRequestProperty("Cookie", cookieVal);
  }
...
  int resCode = conn.getResponseCode();
    if(resCode == 200){
      cookieVal = conn.getHeaderField("Set-Cookie");
      ...
    }
...
}

完整代码

代码参考:HttpURLConnection网络请求工具类


import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.util.Map;


/**
* 主要功能:网络请求
*/
public class HttpUtils {
    private static final int TIMEOUT_IN_MILLIONS = 5000;
    private static String cookieVal="";
    private static boolean useProxy = false;
    private static Proxy proxy;

    
    public interface CallBack {
        void onRequestComplete(String result);
    }
    
    public static void setProxy(String host,int port){
        Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(host, port));
        HttpUtils.proxy = proxy;
    }
    
    public static void setUseProxy(boolean useProxy){
        HttpUtils.useProxy = useProxy;
    }
    
    public static HttpURLConnection getConne(URL url) throws IOException{
        if(url == null)return null;
        return useProxy && proxy != null ? (HttpURLConnection) url.openConnection(proxy) : (HttpURLConnection) url.openConnection();
    }


    /**
    *  异步的Get请求
    *  @param urlStr
    *  @param callBack
    */
    public static void doGetAsyn(final String urlStr, final CallBack callBack) {
        new Thread() {
            public void run() {
                try {
                    String result = doGet(urlStr);
                    if (callBack != null) {
                        callBack.onRequestComplete(result);
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            };
        }.start();
    }


    /**
    * 异步的Post请求
    *
    * @param urlStr
    *
    * @param params
    *
    * @param callBack
    *
    * @throws Exception
    */
    public static void doPostAsyn(final String urlStr, final String params,final CallBack callBack) throws Exception {
        new Thread() {
            public void run() {
                try {
                    String result = doPost(urlStr, params);
                    if (callBack != null) {
                        callBack.onRequestComplete(result);
                    }
                } catch (Exception e) {
                    e.printStackTrace();
                }
            };
        }.start();
    }


    /**
    * Get请求,获得返回数
    * @param urlStr
    * @return
    * @throws Exception
    */
    public static String doGet(String urlStr) {
        URL url = null;
        HttpURLConnection conn = null;
        InputStream is = null;
        ByteArrayOutputStream baos = null;
        try {
            url = new URL(urlStr);
            conn = getConne(url);//(HttpURLConnection) url.openConnection();
            conn.setReadTimeout(TIMEOUT_IN_MILLIONS);
            conn.setConnectTimeout(TIMEOUT_IN_MILLIONS);
            conn.setRequestMethod("GET");
            conn.setRequestProperty("accept", "*/*");
            conn.setRequestProperty("connection", "Keep-Alive");
            conn.setInstanceFollowRedirects(true);
            if(cookieVal != null && cookieVal.trim().length() > 0){             
                conn.setRequestProperty("Cookie", cookieVal);
            }
            
            int resCode = conn.getResponseCode();
            
            if (resCode == 200) {
                is = conn.getInputStream();
                baos = new ByteArrayOutputStream();
                int len = -1;
                byte[] buf = new byte[128];
                while ((len = is.read(buf)) != -1) {
                    baos.write(buf, 0, len);
                }
                baos.flush();
                cookieVal = conn.getHeaderField("Set-Cookie");
                return baos.toString();
            } else if(resCode == 301 || resCode == 302){
                String location = conn.getHeaderField("location");
                return doGet(location);
            }else {
                throw new RuntimeException(" responseCode is not 200 ... ");
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (is != null)
                    is.close();
            } catch (IOException e) {
                
            }
            try {
                if (baos != null)
                    baos.close();
            } catch (IOException e) {

            }
            conn.disconnect();
        }
        return null;
    }

    /**
    * Get请求,获得返回数
    * @param urlStr
    * @return
    * @throws Exception
    */
    public static String doGetFile(String urlStr,String root,String filename) {
        URL url = null;
        HttpURLConnection conn = null;
        InputStream is = null;
        DataOutputStream baos = null;
        try {
            url = new URL(urlStr);
            conn = getConne(url);//(HttpURLConnection) url.openConnection();
            conn.setReadTimeout(TIMEOUT_IN_MILLIONS);
            conn.setConnectTimeout(TIMEOUT_IN_MILLIONS);
            conn.setRequestMethod("GET");
            conn.setRequestProperty("accept", "*/*");
            conn.setRequestProperty("connection", "Keep-Alive");
            conn.setInstanceFollowRedirects(true);
            if(cookieVal != null && cookieVal.trim().length() > 0){             
                conn.setRequestProperty("Cookie", cookieVal);
            }
            File file = new File(root);
            if(!file.exists()){
                file.mkdirs();
            }
            
            int resCode = conn.getResponseCode();
            
            if (resCode == 200) {
                is = conn.getInputStream();
//              baos = new ByteArrayOutputStream();
                
                File dfile = new File(file.getAbsolutePath(),filename);
                baos = new DataOutputStream(new FileOutputStream( dfile ));
                int len = -1;
                byte[] buf = new byte[128];
                while ((len = is.read(buf)) != -1) {
                    baos.write(buf, 0, len);
                }
                baos.flush();
                cookieVal = conn.getHeaderField("Set-Cookie");
                return dfile.getAbsolutePath();
            } else {
                throw new RuntimeException(" responseCode is not 200 ... ");
            }
        }catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (is != null)
                    is.close();
            } catch (IOException e) {
                
            }
            try {
                if (baos != null)
                    baos.close();
            } catch (IOException e) {

            }
            conn.disconnect();
        }
        return null;
    }
    
    public static String  doPost(String url, Map<String, String> param) {
        String paramStr = "";
        if(param != null){
            for (String key : param.keySet()) {
                paramStr += key+"="+param.get(key)+"&";
            }
        }
        if(paramStr.length() > 0){
            paramStr = paramStr.substring(0, paramStr.length()-1);
        }
        return doPost(url, paramStr);
    }

    /**
    * 向指定 URL 发送POST方法的请求
    *
    * @param url
    *        发送请求的 URL
    * @param param
    *        请求参数,请求参数应该是 name1=value1&name2=value2 的形式
    * @return 代表远程资源的响应结果
    * @throws Exception
    */
    public static String doPost(String url, String param) {
        PrintWriter out = null;
        BufferedReader in = null;
        String result = "";
        try {
            URL realUrl = new URL(url); // 打开和URL之间的连接
            HttpURLConnection conn = getConne(realUrl);
//          (HttpURLConnection) realUrl.openConnection(); // 设置通用的请求属性
            conn.setRequestProperty("accept", "*/*");
            conn.setRequestProperty("connection", "Keep-Alive");
            conn.setRequestMethod("POST");
            conn.setRequestProperty("Content-Type","application/x-www-form-urlencoded");
            conn.setRequestProperty("charset", "utf-8");
            conn.setInstanceFollowRedirects(true);
            if(cookieVal != null && cookieVal.trim().length() > 0){             
                conn.setRequestProperty("Cookie", cookieVal);
            }
            conn.setUseCaches(false); // 发送POST请求必须设置如下两行
            conn.setDoOutput(true);
            conn.setDoInput(true);
            conn.setReadTimeout(TIMEOUT_IN_MILLIONS);
            conn.setConnectTimeout(TIMEOUT_IN_MILLIONS);
            if (param != null && !param.trim().equals("")) { // 获取URLConnection对象对应的输出流
                out = new PrintWriter(conn.getOutputStream()); // 发送请求参数
                out.print(param);
                // flush输出流的缓冲
                out.flush();
            }
            int resCode = conn.getResponseCode();
            if(resCode == 200){
                // 定义BufferedReader输入流来读取URL的响应
                in = new BufferedReader(new InputStreamReader(conn.getInputStream()));
                String line;
                while ((line = in.readLine()) != null) {
                    result += line;
                }
                cookieVal = conn.getHeaderField("Set-Cookie");
            } else if(resCode == 301 || resCode == 302){
                String location = conn.getHeaderField("Location");
                return doPost(location,param);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        // 使用finally块来关闭输出流和输入流
        finally {
            try {
                if (out != null) {
                    out.close();
                }
                if (in != null) {
                    in.close();
                }
            } catch (IOException ex) {
                ex.printStackTrace();
            }
        }
        return result;
    }
    
    public void clearCookie(){
        cookieVal = "";
    }
}
日记本
Gupao