diff --git a/factory/top500_manual.py b/factory/top500_manual.py index ae4b312..b0a7035 100644 --- a/factory/top500_manual.py +++ b/factory/top500_manual.py @@ -3,13 +3,13 @@ ''' 此脚本用于对 top500_manual.list 中网站进行评估,判断需要直连或代理 该脚本应当在内网环境中运行 -TODO:并发 ''' import requests import time +import threading -# 读入 top500 列表 +# Read top500 domains = [] with open("resultant/top500_manual.list", "r", encoding='utf-8') as f: for domain in f.readlines(): @@ -17,35 +17,55 @@ with open("resultant/top500_manual.list", "r", encoding='utf-8') as f: continue domains.append(domain[:-1]) -# 判断直连或代理 domains_proxy = [] domains_direct = [] - -def UrlScaner(domain): - requests_header = { +requests_header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 'Cache-Control': 'max-age=0', 'Accept-Language': 'zh-CN,zh;q=0.8,zh-HK;q=0.6,zh-TW;q=0.4,en;q=0.2', 'Connection': 'keep-alive' } - is_proxy = False - try: - requests.get('http://www.' + domain, timeout=10, headers=requests_header) - except BaseException: - try: - requests.get('http://' + domain, timeout=10, headers=requests_header) - except BaseException: - is_proxy = True - if is_proxy: - domains_proxy.append(domain) - else: - domains_direct.append(domain) +# thread to judge direct/proxy +class DomainScaner(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) - print('Proxy %s:%s' % (is_proxy, domain) ) + def run(self): + while len(domains): + domain = domains.pop(0) + is_proxy = False + try: + requests.get('http://www.' + domain, timeout=10, headers=requests_header) + except BaseException: + try: + requests.get('http://' + domain, timeout=10, headers=requests_header) + except BaseException: + is_proxy = True -for domain in domains: - UrlScaner(domain) + if is_proxy: + domains_proxy.append(domain) + else: + domains_direct.append(domain) + + print('[Doamins Remain: %d]\tProxy %s:%s' % (len(domains), is_proxy, domain) ) + + + global scaner_thread_num + scaner_thread_num -= 1 + + +print('top500 Script Starting...\n\n') + +# Start Thread +scaner_thread_num = 0 +for i in range(5): + DomainScaner().start() + scaner_thread_num += 1 + +# wait thread done +while scaner_thread_num: + pass # write files