Add multithreading

This commit is contained in:
Johnshall
2022-02-07 02:01:27 +08:00
parent 80cd0bad8e
commit 725116bed2

View File

@@ -3,13 +3,13 @@
'''
此脚本用于对 top500_manual.list 中网站进行评估,判断需要直连或代理
该脚本应当在内网环境中运行
TODO:并发
'''
import requests
import time
import threading
# 读入 top500 列表
# Read top500
domains = []
with open("resultant/top500_manual.list", "r", encoding='utf-8') as f:
for domain in f.readlines():
@@ -17,17 +17,23 @@ with open("resultant/top500_manual.list", "r", encoding='utf-8') as f:
continue
domains.append(domain[:-1])
# 判断直连或代理
domains_proxy = []
domains_direct = []
def UrlScaner(domain):
requests_header = {
requests_header = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Cache-Control': 'max-age=0',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-HK;q=0.6,zh-TW;q=0.4,en;q=0.2',
'Connection': 'keep-alive'
}
# thread to judge direct/proxy
class DomainScaner(threading.Thread):
def __init__(self):
threading.Thread.__init__(self)
def run(self):
while len(domains):
domain = domains.pop(0)
is_proxy = False
try:
requests.get('http://www.' + domain, timeout=10, headers=requests_header)
@@ -42,10 +48,24 @@ def UrlScaner(domain):
else:
domains_direct.append(domain)
print('Proxy %s%s' % (is_proxy, domain) )
print('[Doamins Remain: %d]\tProxy %s%s' % (len(domains), is_proxy, domain) )
for domain in domains:
UrlScaner(domain)
global scaner_thread_num
scaner_thread_num -= 1
print('top500 Script Starting...\n\n')
# Start Thread
scaner_thread_num = 0
for i in range(5):
DomainScaner().start()
scaner_thread_num += 1
# wait thread done
while scaner_thread_num:
pass
# write files