代理Ip的爬取-验证-PAC脚本服务

代码包含了3种方式的验证

  1. 通过已获取的文件IP
  2. 通过生成ip字符串
  3. 通过代理网站的爬取验证

验证成功的ip保存到一个文件
开启Socket服务器 127.0.0.1:9999
浏览器中访问返回的内容为

var ips = ["210.101.131.229:8080","47.91.138.21:3128"];
var current ;
function FindProxyForURL(url,host){
                
        if(url.indexOf("next")>0){
            var len = Math.floor( Math.random() * 2);
            current = ips[len];
        }
        
        
        if(url.indexOf("fidder")>0){
            current = "127.0.0.1:8888"; //fidder 代理
        }
        
        if(url.indexOf("direct")>0){
            current = "DIRECT";
        }
        
        if(isInNet(dnsResolve(host), "127.0.0.0", "255.255.255.0")){//如果是本地主机,localhost 直接连接
            return "DIRECT";
        }
        
        
        return "PROXY "+current;
        
}

function httpGet(callback)
{
    var xmlhttp;
    if (window.XMLHttpRequest)
    {
        xmlhttp=new XMLHttpRequest();
    }
    else
    {
        xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
    }
    xmlhttp.onreadystatechange=function()
    {
        if (xmlhttp.readyState==4 && xmlhttp.status==200)
        {
            var response = xmlhttp.responseText;
            callback.done(response);
        }
    }
    xmlhttp.open("GET","http://localhost:9999",true);// true 异步
    xmlhttp.send();
}

该内容是Pac脚本,关于Pac脚本的使用方式请参考其他教程

实现代码

验证ip是否可用的原理非常简单,只需使用代理模拟请求访问某一网站根据服务器返回结果即可判断(当然还有其他方式)

因为当时随便写写就直接用了httpclient,没有再修改,这里推荐使用Okhttp不需要写线程池并发相关的更加简单。

public class IPCheck {
    // 代理666的IP提取地址
    private static ServerSocket serverSocket = null;// 保证代理服务器只需要一个实例即可

    static final int workerNumber = 4;// 线程池保留数量,服务器为8核cpu,合适的数量应该小于8

    static final int maxPoolSize = 256;// 最大线程数量,即最大并发量

    static final int maxWorkerInQueue = 2500;// 最大工作队列数量

    static final int waitTime = 5;// 超时等待时间

    private static final ThreadPoolExecutor tpe = new ThreadPoolExecutor(workerNumber, maxPoolSize, waitTime,
            TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(maxWorkerInQueue));

    // 使用上面或下面的线程池
    private static ExecutorService executor = Executors.newFixedThreadPool(100);
    private static List<Map<String, Integer>> proxyIps = new ArrayList<>();
    private static CloseableHttpClient client = null;
    private static String host = "http://www.qq.com/robots.txt";
    private static AtomicInteger atomicInteger = new AtomicInteger();

    private static CopyOnWriteArrayList<String> ipList = new CopyOnWriteArrayList<>();

    static {

        client = HttpClients.createDefault();
    }

    public static void main(String[] args) throws Exception {

        testByFile();
//      testByCustom();
//      testByParse(1);
        executor.shutdown();

        while (true) {
            if (executor.isTerminated()) {
                System.out.println("所有的子线程都结束了!");
                break;
            }
            Thread.sleep(1000);
        }

        StringBuilder builder = new StringBuilder();
        builder.append("[");
        for (int i = 0; i < ipList.size(); i++) {
            String ipStr = ipList.get(i);
            builder.append("\"");
            builder.append(ipStr);
            builder.append("\"");

            if (i != ipList.size() - 1) {
                builder.append(",");
            }
        }
        builder.append("]");

        String js = StringUtils.readFile("src/templet.txt");

        final String response = js.replace("#ips#", builder.toString()).replace("#len#", ipList.size() + "");

        // 开启服务器
        System.out.println("=====runing at 127.0.0.1:9999 ======");
        ThreadPoolExecutor executor = ExecutorsUtils.tpe;
        serverSocket = new ServerSocket(9999);

        while (true) {

            final Socket browserSocket = serverSocket.accept();
            executor.execute(new Thread(new Runnable() {
                public void run() {
                    try {
                        System.out.println("=======处理请求========");
                        OutputStream chromeOutputStream = browserSocket.getOutputStream();
                        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(chromeOutputStream));
                        printWriter.write(response);
                        printWriter.close();

                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                }
            }));

        }

    }

    private static void testByParse(int page) {
        System.out.println("begin parse");

        // String url = "http://www.xicidaili.com/nn/"+page;
        String url = "http://www.xicidaili.com/wt/" + page;
        try {
            Connection con = Jsoup.connect(url).timeout(3000);
            con.header("User-Agent",
                    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36");
            con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");

            Document document = con.get();
            Elements table = document.select("#ip_list");
            Elements trList = table.select("tr");

            System.out.println("Size: " + trList.size());

            List<IPBean> ipBeans = new ArrayList<>();
            IPBean ipBean = null;

            trList.remove(0);
            for (int i = 0; i < trList.size(); i++) {

                Element tr = trList.get(i);

                String country = tr.select("td.country").get(0).html();// <img
                                                                        // src="http://fs.xicidaili.com/images/flag/cn.png"
                                                                        // alt="Cn">
                String ip = tr.select("td").get(1).text();// ip
                String port = tr.select("td").get(2).text();// port
                String area = tr.select("td").get(3).select("a").text();// 江苏苏州
                String type = tr.select("td").get(4).text();// 高匿还是普通
                String protocol = tr.select("td").get(5).text();// 协议类型
                String speed = tr.select("td").get(6).select("div.bar").attr("title");// 速度
                String connectTimeout = tr.select("td").get(7).select("div.bar").attr("title");// 连接时间
                String survivalTimeout = tr.select("td").get(8).text();// 存活时间
                String checkTime = tr.select("td").get(9).text();// 验证时间

                ipBean = new IPBean(country, ip, port, area, type, protocol, speed, connectTimeout, survivalTimeout,
                        checkTime);

                ipBeans.add(ipBean);
            }

            for (IPBean ip : ipBeans) {
                final String ipStr = ip.getIp();
                final int port = Integer.parseInt(ip.getPort());

                executor.submit(new Runnable() {

                    @Override
                    public void run() {
                        sendRequest(ipStr, port);

                    }
                });

            }

        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    private static void testByCustom() {

        final int port = 80;
        for (int i = 5; i < 255; i++) {
            for (int j = 0; j < 255; j++) {
                final String ip = "221.216." + i + "." + j;

                tpe.execute(new Runnable() {

                    @Override
                    public void run() {
                        sendRequest(ip, port);

                    }
                });

            }
        }
    }

    private static void testByFile() {
        initProxyIp();

        for (Map<String, Integer> map : proxyIps) {
            Entry<String, Integer> entry = map.entrySet().iterator().next();
            final String ip = entry.getKey();
            final int port = entry.getValue();

            executor.submit(new Runnable() {

                @Override
                public void run() {
                    sendRequest(ip, port);

                }
            });

        }

    }

    private static void sendRequest(String ip, int port) {
        System.out.println("当前访问的代理是:" + ip + ":" + port + "  已发送的请求数是:" + atomicInteger.incrementAndGet());

        HttpGet get = new HttpGet(host);
        // 对单个请求设置代理
        HttpHost proxy = new HttpHost(ip, port);

        RequestConfig config = RequestConfig.custom().setProxy(proxy).setConnectionRequestTimeout(5000)
                .setSocketTimeout(5000).build();

        get.setConfig(config);
        get.setHeader("Referer", "http://www.qq.com/");
        get.setHeader("Host", "www.qq.com");
        get.setHeader("Accept-Encoding", "gzip, deflate, sdch");
        get.setHeader("User-Agent",
                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36");
        get.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");

        try {
            HttpResponse response = client.execute(get);
            String content = EntityUtils.toString(response.getEntity());

            if (content.contains("http://www.qq.com/sitemap_index.xml")) {
                System.out.println(ip + ":" + port + "代理可用!!!");
                // 写入文件
                StringUtils.write(ip + ":" + port, "C:\\Users\\Administrator\\Desktop\\ip.txt", true);
                // 写入copyList
                ipList.add(ip + ":" + port);

            } else {
                System.out.println(ip + "不可用");
            }

        } catch (Exception e) {
            System.out.println(ip + " Connection timed out 不可用");
        }

    }

    private static void initProxyIp() {
        String content = StringUtils.readFile("src/ips.txt");
        String ips[] = content.split("\r\n");
        Map<String, Integer> map;
        for (String line : ips) {
            String ip_port[] = line.split(":");
            String ip = ip_port[0];
            int port = Integer.parseInt(ip_port[1]);

            map = new HashMap<>();
            map.put(ip, port);

            proxyIps.add(map);
        }

    }

}

class IPBean {
    String country;
    String ip;
    String port;
    String area;
    String type;
    String protocol;
    String speed;
    String connectTimeout;
    String survivalTimeout;
    String checkTime;

    public IPBean(String country, String ip, String port, String area, String type, String protocol, String speed,
            String connectTimeout, String survivalTimeout, String checkTime) {
        super();
        this.country = country;
        this.ip = ip;
        this.port = port;
        this.area = area;
        this.type = type;
        this.protocol = protocol;
        this.speed = speed;
        this.connectTimeout = connectTimeout;
        this.survivalTimeout = survivalTimeout;
        this.checkTime = checkTime;
    }

    @Override
    public String toString() {
        return "IPBean [country=" + country + ", ip=" + ip + ", port=" + port + ", area=" + area + ", type=" + type
                + ", protocol=" + protocol + ", speed=" + speed + ", connectTimeout=" + connectTimeout
                + ", survivalTimeout=" + survivalTimeout + ", checkTime=" + checkTime + "]";
    }

    public String getCountry() {
        return country;
    }

    public void setCountry(String country) {
        this.country = country;
    }

    public String getIp() {
        return ip;
    }

    public void setIp(String ip) {
        this.ip = ip;
    }

    public String getPort() {
        return port;
    }

    public void setPort(String port) {
        this.port = port;
    }

    public String getArea() {
        return area;
    }

    public void setArea(String area) {
        this.area = area;
    }

    public String getType() {
        return type;
    }

    public void setType(String type) {
        this.type = type;
    }

    public String getProtocol() {
        return protocol;
    }

    public void setProtocol(String protocol) {
        this.protocol = protocol;
    }

    public String getSpeed() {
        return speed;
    }

    public void setSpeed(String speed) {
        this.speed = speed;
    }

    public String getConnectTimeout() {
        return connectTimeout;
    }

    public void setConnectTimeout(String connectTimeout) {
        this.connectTimeout = connectTimeout;
    }

    public String getSurvivalTimeout() {
        return survivalTimeout;
    }

    public void setSurvivalTimeout(String survivalTimeout) {
        this.survivalTimeout = survivalTimeout;
    }

    public String getCheckTime() {
        return checkTime;
    }

    public void setCheckTime(String checkTime) {
        this.checkTime = checkTime;
    }

}

代码中使用到的模板代码如下

var ips = #ips#;
var current ;
function FindProxyForURL(url,host){
                
        if(url.indexOf("next")>0){
            var len = Math.floor( Math.random() * #len#);
            current = ips[len];
        }
        
        
        if(url.indexOf("fidder")>0){
            current = "127.0.0.1:8888"; //fidder 代理
        }
        
        if(url.indexOf("direct")>0){
            current = "DIRECT";
        }
        
        if(isInNet(dnsResolve(host), "127.0.0.0", "255.255.255.0")){//如果是本地主机,localhost 直接连接
            return "DIRECT";
        }
        
        
        return "PROXY "+current;
        
}

function httpGet(callback)
{
    var xmlhttp;
    if (window.XMLHttpRequest)
    {
        xmlhttp=new XMLHttpRequest();
    }
    else
    {
        xmlhttp=new ActiveXObject("Microsoft.XMLHTTP");
    }
    xmlhttp.onreadystatechange=function()
    {
        if (xmlhttp.readyState==4 && xmlhttp.status==200)
        {
            var response = xmlhttp.responseText;
            callback.done(response);
        }
    }
    xmlhttp.open("GET","http://localhost:9999",true);// true 异步
    xmlhttp.send();
}

最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 158,425评论 4 361
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 67,058评论 1 291
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 108,186评论 0 243
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 43,848评论 0 204
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 52,249评论 3 286
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 40,554评论 1 216
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 31,830评论 2 312
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 30,536评论 0 197
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 34,239评论 1 241
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 30,505评论 2 244
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 32,004评论 1 258
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 28,346评论 2 253
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 32,999评论 3 235
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 26,060评论 0 8
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 26,821评论 0 194
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 35,574评论 2 271
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 35,480评论 2 267

推荐阅读更多精彩内容