diff --git a/factory/resultant/top50_direct_with_Apple.list.list b/factory/resultant/top50_direct_with_Apple.list.list new file mode 100644 index 0000000..d2a15cc --- /dev/null +++ b/factory/resultant/top50_direct_with_Apple.list.list @@ -0,0 +1,156 @@ +# top500 direct list update time: 2021-12-05 16:46:12 +a1.mzstatic.com +a2.mzstatic.com +a3.mzstatic.com +a4.mzstatic.com +a5.mzstatic.com +accuweather.com +adcdownload.apple.com +adcdownload.apple.com.akadns.net +amazon.com +appldnld.apple.com +appldnld.g.aaplimg.com +appleid.cdn-apple.com +apps.apple.com +apps.mzstatic.com +bing.com +cdn-cn1.apple-mapkit.com +cdn-cn2.apple-mapkit.com +cdn-cn3.apple-mapkit.com +cdn-cn4.apple-mapkit.com +cdn.apple-mapkit.com +cdn1.apple-mapkit.com +cdn2.apple-mapkit.com +cdn3.apple-mapkit.com +cdn4.apple-mapkit.com +cds-cdn.v.aaplimg.com +cds.apple.com +cds.apple.com.akadns.net +cl1-cdn.origin-apple.com.akadns.net +cl1.apple.com +cl2-cn.apple.com +cl2.apple.com +cl2.apple.com.edgekey.net.globalredir.akadns.net +cl3-cdn.origin-apple.com.akadns.net +cl3.apple.com +cl4-cdn.origin-apple.com.akadns.net +cl4-cn.apple.com +cl4.apple.com +cl5-cdn.origin-apple.com.akadns.net +cl5.apple.com +clientflow.apple.com +clientflow.apple.com.akadns.net +cnn.com +configuration.apple.com +configuration.apple.com.akadns.net +craigslist.org +cstat.apple.com +dd-cdn.origin-apple.com.akadns.net +download.developer.apple.com +ebay.com +espn.com +fandom.com +foxnews.com +gs-loc-cn.apple.com +gs-loc.apple.com +gsp10-ssl-cn.ls.apple.com +gsp11-cn.ls.apple.com +gsp12-cn.ls.apple.com +gsp13-cn.ls.apple.com +gsp4-cn.ls.apple.com +gsp4-cn.ls.apple.com.edgekey.net +gsp4-cn.ls.apple.com.edgekey.net.globalredir.akadns.net +gsp5-cn.ls.apple.com +gsp85-cn-ssl.ls.apple.com +gspe19-cn-ssl.ls.apple.com +gspe19-cn.ls-apple.com.akadns.net +gspe19-cn.ls.apple.com +gspe21-ssl.ls.apple.com +gspe21.ls.apple.com +gspe35-ssl.ls.apple.com +hulu.com +iadsdk.apple.com +icloud-cdn.icloud.com.akadns.net +icloud.cdn-apple.com +images.apple.com +images.apple.com.akadns.net +images.apple.com.edgekey.net.globalredir.akadns.net +imdb.com +indeed.com +init-p01md-lb.push-apple.com.akadns.net +init-p01md.apple.com +init-p01st-lb.push-apple.com.akadns.net +init-p01st.push.apple.com +init-s01st-lb.push-apple.com.akadns.net +init-s01st.push.apple.com +instructure.com +iosapps.itunes.g.aaplimg.com +iphone-ld.apple.com +is1-ssl.mzstatic.com +is1.mzstatic.com +is2-ssl.mzstatic.com +is2.mzstatic.com +is3-ssl.mzstatic.com +is3.mzstatic.com +is4-ssl.mzstatic.com +is4.mzstatic.com +is5-ssl.mzstatic.com +is5.mzstatic.com +itunes-apple.com.akadns.net +itunes.apple.com +itunesconnect.apple.com +linkedin.com +live.com +mesu-cdn.apple.com.akadns.net +mesu-china.apple.com.akadns.net +mesu.apple.com +microsoft.com +msn.com +music.apple.com +netflix.com +ocsp-lb.apple.com.akadns.net +ocsp.apple.com +office.com +oscdn.apple.com +oscdn.origin-apple.com.akadns.net +pancake.apple.com +pancake.cdn-apple.com.akadns.net +paypal.com +phobos.apple.com +prod-support.apple-support.akadns.net +reserve-prime.apple.com +roblox.com +s.mzstatic.com +stocks-sparkline-lb.apple.com.akadns.net +store.apple.com +store.apple.com.edgekey.net +store.apple.com.edgekey.net.globalredir.akadns.net +store.storeimages.apple.com.akadns.net +store.storeimages.cdn-apple.com +support-china.apple-support.akadns.net +support.apple.com +swcatalog-cdn.apple.com.akadns.net +swcatalog.apple.com +swcdn.apple.com +swcdn.g.aaplimg.com +swdist.apple.com +swdist.apple.com.akadns.net +swscan-cdn.apple.com.akadns.net +swscan.apple.com +t-mobile.com +target.com +updates-http.cdn-apple.com +updates-http.cdn-apple.com.akadns.net +updates.cdn-apple.com +usps.com +valid.apple.com +valid.origin-apple.com.akadns.net +walmart.com +weather.com +worldstar.com +www.apple.com +www.apple.com.edgekey.net +www.apple.com.edgekey.net.globalredir.akadns.net +yahoo.com +zillow.com +zoom.us diff --git a/factory/resultant/top50_proxy.list b/factory/resultant/top50_proxy.list new file mode 100644 index 0000000..0fe7e8d --- /dev/null +++ b/factory/resultant/top50_proxy.list @@ -0,0 +1,21 @@ +# top500 proxy list update time: 2021-12-05 16:46:12 +discord.com +duckduckgo.com +etsy.com +facebook.com +google.com +instagram.com +microsoftonline.com +nytimes.com +pinterest.com +pornhub.com +quora.com +reddit.com +tiktok.com +twitch.tv +twitter.com +wikipedia.org +xhamster.com +xnxx.com +xvideos.com +youtube.com diff --git a/factory/top500.py b/factory/top500.py deleted file mode 100644 index 093e44a..0000000 --- a/factory/top500.py +++ /dev/null @@ -1,160 +0,0 @@ -# -*- coding: utf-8 -*- - -from bs4 import BeautifulSoup -import threading -import time -import sys -import requests -import re - - -urls = ['http://alexa.chinaz.com/Global/index.html'] -for i in range(2,21): - urls.append('http://alexa.chinaz.com/Global/index_%d.html'%i) - -urls_scan_over = False - -domains = [] - -domains_proxy = [] -domains_direct = [] - - -# thread to scan pages in urls -class UrlScaner(threading.Thread): - def __init__(self): - threading.Thread.__init__(self) - - def run(self): - global urls_scan_over, urls - - done_num = 0 - - while len(urls): - html = self.fetchHTML( urls.pop(0) ) - self.praseHTML(html) - - done_num = done_num + 25 - print('top500 List Got: %d/500'%done_num) - - time.sleep(1) - - urls_scan_over = True - print('top500 List Fetched Over.') - - - def fetchHTML(self, url): - success = False - try_times = 0 - r = None - while try_times < 5 and not success: - r = requests.get(url) - if r.status_code != 200: - time.sleep(1) - try_times = try_times + 1 - else: - success = True - break - - if not success: - sys.exit('error in request %s\n\treturn code: %d' % (url, r.status_code) ) - - r.encoding = 'utf-8' - return r.text - - - def praseHTML(self, html): - soup = BeautifulSoup(html, "lxml") - namesDom = soup.select("div.righttxt h3 span") - - for name in namesDom: - domains.append(name.string) - - -requests_header = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', - 'Cache-Control': 'max-age=0', - 'Accept-Language': 'zh-CN,zh;q=0.8,zh-HK;q=0.6,zh-TW;q=0.4,en;q=0.2', - 'Connection': 'keep-alive' -} - - -# thread to visit websites -class DomainScaner(threading.Thread): - def __init__(self): - threading.Thread.__init__(self) - - def run(self): - while not urls_scan_over or len(domains): - if len(domains) == 0: - time.sleep(2) - continue - - domain = domains.pop(0) - - if domain.endswith('.cn'): - continue - if 'google' in domain: - continue - - is_proxy = False - - try: - requests.get('http://www.' + domain, timeout=10, headers=requests_header) - except BaseException: - try: - requests.get('http://' + domain, timeout=10, headers=requests_header) - except BaseException: - is_proxy = True - - if is_proxy: - domains_proxy.append(domain) - else: - domains_direct.append(domain) - - print('[Doamins Remain: %d]\tProxy %s:%s' % (len(domains), is_proxy, domain) ) - - global scaner_thread_num - scaner_thread_num -= 1 - - -print('top500 Script Starting...\n\n') - -# Start Thread -UrlScaner().start() -scaner_thread_num = 0 -for i in range(3): - DomainScaner().start() - scaner_thread_num += 1 - -# wait thread done -while scaner_thread_num: - pass - -# write files -file_proxy = open('resultant/top500_proxy.list', 'w', encoding='utf-8') -file_direct = open('resultant/top500_direct.list', 'w', encoding='utf-8') - -now_time = time.strftime("%Y-%m-%d %H:%M:%S") -file_proxy.write('# top500 proxy list update time: ' + now_time + '\n') -file_direct.write('# top500 direct list update time: ' + now_time + '\n') - - -# 将苹果IP加入直连 -# 由于本脚本应当运行在内部环境中,可能无法访问Github,故改用staticdn.net提供的CDN节点 -# r = requests.get(url="https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf", headers=requests_header) -r = requests.get(url='https://raw.staticdn.net/felixonmars/dnsmasq-china-list/master/apple.china.conf', headers=requests_header) -for url in r.text.split("\n")[:-1]: - url = re.sub(r'(server=\/)', '', url) # 清除前缀 - url = re.sub(r'(/114.114.114.114)', '', url) # 清除后缀 - domains_direct.append(url) - -domains_direct = list( set(domains_direct) ) -domains_proxy = list( set(domains_proxy) ) -domains_direct.sort() -domains_proxy.sort() - -for domain in domains_direct: - file_direct.write(domain+'\n') -for domain in domains_proxy: - file_proxy.write(domain+'\n')