|
28 | 28 | # for debug to disable insecureWarning
|
29 | 29 | requests.packages.urllib3.disable_warnings()
|
30 | 30 |
|
| 31 | +""" |
| 32 | +66ip.cn |
| 33 | +data5u.com |
| 34 | +ip181.com |
| 35 | +xicidaili.com |
| 36 | +goubanjia.com |
| 37 | +xdaili.cn |
| 38 | +kuaidaili.com |
| 39 | +cn-proxy.com |
| 40 | +www.mimiip.com |
| 41 | +proxy-list.org |
| 42 | +cz88.net |
| 43 | +ip181.com |
| 44 | +""" |
| 45 | + |
31 | 46 |
|
32 | 47 | class GetFreeProxy(object):
|
33 | 48 | """
|
@@ -129,7 +144,8 @@ def freeProxyFifth():
|
129 | 144 | try:
|
130 | 145 | # :符号裸放在td下,其他放在div span p中,先分割找出ip,再找port
|
131 | 146 | ip_addr = ''.join(each_proxy.xpath(xpath_str))
|
132 |
| - port = each_proxy.xpath(".//span[contains(@class, 'port')]/text()")[0] |
| 147 | + port = each_proxy.xpath( |
| 148 | + ".//span[contains(@class, 'port')]/text()")[0] |
133 | 149 | yield '{}:{}'.format(ip_addr, port)
|
134 | 150 | except Exception as e:
|
135 | 151 | pass
|
@@ -162,6 +178,63 @@ def freeProxySeventh():
|
162 | 178 | for tr in proxy_list[1:]:
|
163 | 179 | yield ':'.join(tr.xpath('./td/text()')[0:2])
|
164 | 180 |
|
| 181 | + @staticmethod |
| 182 | + def freeProxyEight(): |
| 183 | + urls = ['http://cn-proxy.com/', 'http://cn-proxy.com/archives/218'] |
| 184 | + request = WebRequest() |
| 185 | + for url in urls: |
| 186 | + r = requests.get(url) |
| 187 | + proxies = re.findall( |
| 188 | + '<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W]<td>(\d+)</td>', r.content) |
| 189 | + for proxy in proxies: |
| 190 | + yield ':'.join(proxy) |
| 191 | + |
| 192 | + @staticmethod |
| 193 | + def freeProxyNight(): |
| 194 | + urls = ['http://www.mimiip.com/gngao/%s' % n for n in range(1, 10)] |
| 195 | + request = WebRequest() |
| 196 | + for url in urls: |
| 197 | + r = requests.get(url) |
| 198 | + proxies = re.findall( |
| 199 | + '<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W].*<td>(\d+)</td>', r.content) |
| 200 | + for proxy in proxies: |
| 201 | + yield ':'.join(proxy) |
| 202 | + |
| 203 | + @staticmethod |
| 204 | + def freeProxyTenth(): |
| 205 | + urls = ['https://proxy-list.org/english/index.php?p=%s' % |
| 206 | + n for n in range(1, 10)] |
| 207 | + request = WebRequest() |
| 208 | + import base64 |
| 209 | + for url in urls: |
| 210 | + r = requests.get(url) |
| 211 | + proxies = re.findall("Proxy\('(.*?)'\)", r.content) |
| 212 | + for proxy in proxies: |
| 213 | + yield base64.b64decode(proxy) |
| 214 | + |
| 215 | + @staticmethod |
| 216 | + def freeProxyEleventh(): |
| 217 | + urls = ['http://www.cz88.net/proxy/%s' % m for m in |
| 218 | + ['index.shtml'] + ['http_%s.shtml' % n for n in range(2, 11)]] |
| 219 | + request = WebRequest() |
| 220 | + for url in urls: |
| 221 | + r = requests.get(url) |
| 222 | + proxies = re.findall( |
| 223 | + '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</div><div class="port">(\d+)</div>', r.content) |
| 224 | + for proxy in proxies: |
| 225 | + yield ':'.join(proxy) |
| 226 | + |
| 227 | + @staticmethod |
| 228 | + def freeProxy12th(): |
| 229 | + urls = ['http://www.ip181.com/daili/%s.html' % n for n in range(1, 11)] |
| 230 | + request = WebRequest() |
| 231 | + for url in urls: |
| 232 | + r = requests.get(url) |
| 233 | + proxies = re.findall( |
| 234 | + '<td>(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})</td>[\w\W]*?<td>(\d+)</td>', r.content) |
| 235 | + for proxy in proxies: |
| 236 | + yield ':'.join(proxy) |
| 237 | + |
165 | 238 |
|
166 | 239 | if __name__ == '__main__':
|
167 | 240 | gg = GetFreeProxy()
|
|
0 commit comments