获取免费代理IP

# _*_ coding:utf-8 _*_


# encoding=utf8
import requests
import re
import random

list = [
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
    "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
    "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
    "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"

]

class Proxy_Get_IP(object):
    def __init__(self):
        self.proxy_list = []  #获取到的ip列表
        self.proxy_filter_list = []   #筛选取出能用的ip列表

    def get_random_header(self):
        """随机获取请求头"""
        headers = {'User-Agent': random.choice(list),
                   'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
                   'Accept-Encoding': 'gzip'}
        return headers

    def get_proxy(self):
        """IP获取"""
        for i in range(1, 20):
            # 请求西刺网站1-19页
            url = 'http://www.xicidaili.com/nn/' + str(i)
            html = requests.get(url, headers=self.get_random_header()).text
            ip_list = re.findall(
                r'''<tr.*?<td>(.*?)</td>.*?<td>(.*?)</td>.*?<td class="country">.*?</td>\s+<td>(.*?)</td>.*?</tr>''',
                html, re.S)
            print('获取到的ip列表:', ip_list)
            self.proxy_list.append(ip_list)

    def filter_proxy(self):
        """IP筛选"""
        file = open("./proxy_list", "w")
        url = "http://ip.chinaz.com/"
        proxy_num = 0
        for proxy in self.proxy_list:
            # 获取到元祖数据
            for filter_ip in proxy:
                ip_adress, request_port, request_type = filter_ip
                # 构造请求地址---------{'https': 'https://118.190.145.138:9001'}
                proxy_temp = {"{}".format(request_type.lower()): "{}://{}:{}".format(request_type.lower(), ip_adress,
                                                                                     request_port)}
                print(proxy_temp)
                try:
                    #测试格式:requests.get(url,proxies={'HTTP/HTTPS','http://ip地址:端口'})
                    result = requests.get(url, proxies=proxy_temp, timeout=2)
                    print('result:', result)  #result: <Response [200]>
                    write_proxy = ip_adress + "\n"
                    file.write(write_proxy)
                    proxy_num += 1
                except Exception as e:
                    print("代理连接超时,去除此IP:{0}".format(filter_ip))
                    continue
        print("总共可以使用ip量为{}个".format(proxy_num))

    def get_filter_proxy(self):
        """读取该可用ip文件"""
        f = open("./proxy_list", "r")
        lins = f.readlines()
        for i in lins:
            p = i.strip("\n")
            self.proxy_filter_list.append(p)
        return self.proxy_filter_list

    def main(self):
        self.get_proxy()
        self.filter_proxy()

if __name__ == "__main__":
    a = Proxy_Get_IP()
    a.get_proxy()
    a.filter_proxy()