mirror of
https://github.com/Johnshall/Shadowrocket-ADBlock-Rules-Forever.git
synced 2025-12-19 00:05:12 +08:00
remove top500.py
This commit is contained in:
156
factory/resultant/top50_direct_with_Apple.list.list
Normal file
156
factory/resultant/top50_direct_with_Apple.list.list
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
# top500 direct list update time: 2021-12-05 16:46:12
|
||||||
|
a1.mzstatic.com
|
||||||
|
a2.mzstatic.com
|
||||||
|
a3.mzstatic.com
|
||||||
|
a4.mzstatic.com
|
||||||
|
a5.mzstatic.com
|
||||||
|
accuweather.com
|
||||||
|
adcdownload.apple.com
|
||||||
|
adcdownload.apple.com.akadns.net
|
||||||
|
amazon.com
|
||||||
|
appldnld.apple.com
|
||||||
|
appldnld.g.aaplimg.com
|
||||||
|
appleid.cdn-apple.com
|
||||||
|
apps.apple.com
|
||||||
|
apps.mzstatic.com
|
||||||
|
bing.com
|
||||||
|
cdn-cn1.apple-mapkit.com
|
||||||
|
cdn-cn2.apple-mapkit.com
|
||||||
|
cdn-cn3.apple-mapkit.com
|
||||||
|
cdn-cn4.apple-mapkit.com
|
||||||
|
cdn.apple-mapkit.com
|
||||||
|
cdn1.apple-mapkit.com
|
||||||
|
cdn2.apple-mapkit.com
|
||||||
|
cdn3.apple-mapkit.com
|
||||||
|
cdn4.apple-mapkit.com
|
||||||
|
cds-cdn.v.aaplimg.com
|
||||||
|
cds.apple.com
|
||||||
|
cds.apple.com.akadns.net
|
||||||
|
cl1-cdn.origin-apple.com.akadns.net
|
||||||
|
cl1.apple.com
|
||||||
|
cl2-cn.apple.com
|
||||||
|
cl2.apple.com
|
||||||
|
cl2.apple.com.edgekey.net.globalredir.akadns.net
|
||||||
|
cl3-cdn.origin-apple.com.akadns.net
|
||||||
|
cl3.apple.com
|
||||||
|
cl4-cdn.origin-apple.com.akadns.net
|
||||||
|
cl4-cn.apple.com
|
||||||
|
cl4.apple.com
|
||||||
|
cl5-cdn.origin-apple.com.akadns.net
|
||||||
|
cl5.apple.com
|
||||||
|
clientflow.apple.com
|
||||||
|
clientflow.apple.com.akadns.net
|
||||||
|
cnn.com
|
||||||
|
configuration.apple.com
|
||||||
|
configuration.apple.com.akadns.net
|
||||||
|
craigslist.org
|
||||||
|
cstat.apple.com
|
||||||
|
dd-cdn.origin-apple.com.akadns.net
|
||||||
|
download.developer.apple.com
|
||||||
|
ebay.com
|
||||||
|
espn.com
|
||||||
|
fandom.com
|
||||||
|
foxnews.com
|
||||||
|
gs-loc-cn.apple.com
|
||||||
|
gs-loc.apple.com
|
||||||
|
gsp10-ssl-cn.ls.apple.com
|
||||||
|
gsp11-cn.ls.apple.com
|
||||||
|
gsp12-cn.ls.apple.com
|
||||||
|
gsp13-cn.ls.apple.com
|
||||||
|
gsp4-cn.ls.apple.com
|
||||||
|
gsp4-cn.ls.apple.com.edgekey.net
|
||||||
|
gsp4-cn.ls.apple.com.edgekey.net.globalredir.akadns.net
|
||||||
|
gsp5-cn.ls.apple.com
|
||||||
|
gsp85-cn-ssl.ls.apple.com
|
||||||
|
gspe19-cn-ssl.ls.apple.com
|
||||||
|
gspe19-cn.ls-apple.com.akadns.net
|
||||||
|
gspe19-cn.ls.apple.com
|
||||||
|
gspe21-ssl.ls.apple.com
|
||||||
|
gspe21.ls.apple.com
|
||||||
|
gspe35-ssl.ls.apple.com
|
||||||
|
hulu.com
|
||||||
|
iadsdk.apple.com
|
||||||
|
icloud-cdn.icloud.com.akadns.net
|
||||||
|
icloud.cdn-apple.com
|
||||||
|
images.apple.com
|
||||||
|
images.apple.com.akadns.net
|
||||||
|
images.apple.com.edgekey.net.globalredir.akadns.net
|
||||||
|
imdb.com
|
||||||
|
indeed.com
|
||||||
|
init-p01md-lb.push-apple.com.akadns.net
|
||||||
|
init-p01md.apple.com
|
||||||
|
init-p01st-lb.push-apple.com.akadns.net
|
||||||
|
init-p01st.push.apple.com
|
||||||
|
init-s01st-lb.push-apple.com.akadns.net
|
||||||
|
init-s01st.push.apple.com
|
||||||
|
instructure.com
|
||||||
|
iosapps.itunes.g.aaplimg.com
|
||||||
|
iphone-ld.apple.com
|
||||||
|
is1-ssl.mzstatic.com
|
||||||
|
is1.mzstatic.com
|
||||||
|
is2-ssl.mzstatic.com
|
||||||
|
is2.mzstatic.com
|
||||||
|
is3-ssl.mzstatic.com
|
||||||
|
is3.mzstatic.com
|
||||||
|
is4-ssl.mzstatic.com
|
||||||
|
is4.mzstatic.com
|
||||||
|
is5-ssl.mzstatic.com
|
||||||
|
is5.mzstatic.com
|
||||||
|
itunes-apple.com.akadns.net
|
||||||
|
itunes.apple.com
|
||||||
|
itunesconnect.apple.com
|
||||||
|
linkedin.com
|
||||||
|
live.com
|
||||||
|
mesu-cdn.apple.com.akadns.net
|
||||||
|
mesu-china.apple.com.akadns.net
|
||||||
|
mesu.apple.com
|
||||||
|
microsoft.com
|
||||||
|
msn.com
|
||||||
|
music.apple.com
|
||||||
|
netflix.com
|
||||||
|
ocsp-lb.apple.com.akadns.net
|
||||||
|
ocsp.apple.com
|
||||||
|
office.com
|
||||||
|
oscdn.apple.com
|
||||||
|
oscdn.origin-apple.com.akadns.net
|
||||||
|
pancake.apple.com
|
||||||
|
pancake.cdn-apple.com.akadns.net
|
||||||
|
paypal.com
|
||||||
|
phobos.apple.com
|
||||||
|
prod-support.apple-support.akadns.net
|
||||||
|
reserve-prime.apple.com
|
||||||
|
roblox.com
|
||||||
|
s.mzstatic.com
|
||||||
|
stocks-sparkline-lb.apple.com.akadns.net
|
||||||
|
store.apple.com
|
||||||
|
store.apple.com.edgekey.net
|
||||||
|
store.apple.com.edgekey.net.globalredir.akadns.net
|
||||||
|
store.storeimages.apple.com.akadns.net
|
||||||
|
store.storeimages.cdn-apple.com
|
||||||
|
support-china.apple-support.akadns.net
|
||||||
|
support.apple.com
|
||||||
|
swcatalog-cdn.apple.com.akadns.net
|
||||||
|
swcatalog.apple.com
|
||||||
|
swcdn.apple.com
|
||||||
|
swcdn.g.aaplimg.com
|
||||||
|
swdist.apple.com
|
||||||
|
swdist.apple.com.akadns.net
|
||||||
|
swscan-cdn.apple.com.akadns.net
|
||||||
|
swscan.apple.com
|
||||||
|
t-mobile.com
|
||||||
|
target.com
|
||||||
|
updates-http.cdn-apple.com
|
||||||
|
updates-http.cdn-apple.com.akadns.net
|
||||||
|
updates.cdn-apple.com
|
||||||
|
usps.com
|
||||||
|
valid.apple.com
|
||||||
|
valid.origin-apple.com.akadns.net
|
||||||
|
walmart.com
|
||||||
|
weather.com
|
||||||
|
worldstar.com
|
||||||
|
www.apple.com
|
||||||
|
www.apple.com.edgekey.net
|
||||||
|
www.apple.com.edgekey.net.globalredir.akadns.net
|
||||||
|
yahoo.com
|
||||||
|
zillow.com
|
||||||
|
zoom.us
|
||||||
21
factory/resultant/top50_proxy.list
Normal file
21
factory/resultant/top50_proxy.list
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
# top500 proxy list update time: 2021-12-05 16:46:12
|
||||||
|
discord.com
|
||||||
|
duckduckgo.com
|
||||||
|
etsy.com
|
||||||
|
facebook.com
|
||||||
|
google.com
|
||||||
|
instagram.com
|
||||||
|
microsoftonline.com
|
||||||
|
nytimes.com
|
||||||
|
pinterest.com
|
||||||
|
pornhub.com
|
||||||
|
quora.com
|
||||||
|
reddit.com
|
||||||
|
tiktok.com
|
||||||
|
twitch.tv
|
||||||
|
twitter.com
|
||||||
|
wikipedia.org
|
||||||
|
xhamster.com
|
||||||
|
xnxx.com
|
||||||
|
xvideos.com
|
||||||
|
youtube.com
|
||||||
@@ -1,160 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
import threading
|
|
||||||
import time
|
|
||||||
import sys
|
|
||||||
import requests
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
urls = ['http://alexa.chinaz.com/Global/index.html']
|
|
||||||
for i in range(2,21):
|
|
||||||
urls.append('http://alexa.chinaz.com/Global/index_%d.html'%i)
|
|
||||||
|
|
||||||
urls_scan_over = False
|
|
||||||
|
|
||||||
domains = []
|
|
||||||
|
|
||||||
domains_proxy = []
|
|
||||||
domains_direct = []
|
|
||||||
|
|
||||||
|
|
||||||
# thread to scan pages in urls
|
|
||||||
class UrlScaner(threading.Thread):
|
|
||||||
def __init__(self):
|
|
||||||
threading.Thread.__init__(self)
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
global urls_scan_over, urls
|
|
||||||
|
|
||||||
done_num = 0
|
|
||||||
|
|
||||||
while len(urls):
|
|
||||||
html = self.fetchHTML( urls.pop(0) )
|
|
||||||
self.praseHTML(html)
|
|
||||||
|
|
||||||
done_num = done_num + 25
|
|
||||||
print('top500 List Got: %d/500'%done_num)
|
|
||||||
|
|
||||||
time.sleep(1)
|
|
||||||
|
|
||||||
urls_scan_over = True
|
|
||||||
print('top500 List Fetched Over.')
|
|
||||||
|
|
||||||
|
|
||||||
def fetchHTML(self, url):
|
|
||||||
success = False
|
|
||||||
try_times = 0
|
|
||||||
r = None
|
|
||||||
while try_times < 5 and not success:
|
|
||||||
r = requests.get(url)
|
|
||||||
if r.status_code != 200:
|
|
||||||
time.sleep(1)
|
|
||||||
try_times = try_times + 1
|
|
||||||
else:
|
|
||||||
success = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not success:
|
|
||||||
sys.exit('error in request %s\n\treturn code: %d' % (url, r.status_code) )
|
|
||||||
|
|
||||||
r.encoding = 'utf-8'
|
|
||||||
return r.text
|
|
||||||
|
|
||||||
|
|
||||||
def praseHTML(self, html):
|
|
||||||
soup = BeautifulSoup(html, "lxml")
|
|
||||||
namesDom = soup.select("div.righttxt h3 span")
|
|
||||||
|
|
||||||
for name in namesDom:
|
|
||||||
domains.append(name.string)
|
|
||||||
|
|
||||||
|
|
||||||
requests_header = {
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
|
|
||||||
'Cache-Control': 'max-age=0',
|
|
||||||
'Accept-Language': 'zh-CN,zh;q=0.8,zh-HK;q=0.6,zh-TW;q=0.4,en;q=0.2',
|
|
||||||
'Connection': 'keep-alive'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# thread to visit websites
|
|
||||||
class DomainScaner(threading.Thread):
|
|
||||||
def __init__(self):
|
|
||||||
threading.Thread.__init__(self)
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
while not urls_scan_over or len(domains):
|
|
||||||
if len(domains) == 0:
|
|
||||||
time.sleep(2)
|
|
||||||
continue
|
|
||||||
|
|
||||||
domain = domains.pop(0)
|
|
||||||
|
|
||||||
if domain.endswith('.cn'):
|
|
||||||
continue
|
|
||||||
if 'google' in domain:
|
|
||||||
continue
|
|
||||||
|
|
||||||
is_proxy = False
|
|
||||||
|
|
||||||
try:
|
|
||||||
requests.get('http://www.' + domain, timeout=10, headers=requests_header)
|
|
||||||
except BaseException:
|
|
||||||
try:
|
|
||||||
requests.get('http://' + domain, timeout=10, headers=requests_header)
|
|
||||||
except BaseException:
|
|
||||||
is_proxy = True
|
|
||||||
|
|
||||||
if is_proxy:
|
|
||||||
domains_proxy.append(domain)
|
|
||||||
else:
|
|
||||||
domains_direct.append(domain)
|
|
||||||
|
|
||||||
print('[Doamins Remain: %d]\tProxy %s:%s' % (len(domains), is_proxy, domain) )
|
|
||||||
|
|
||||||
global scaner_thread_num
|
|
||||||
scaner_thread_num -= 1
|
|
||||||
|
|
||||||
|
|
||||||
print('top500 Script Starting...\n\n')
|
|
||||||
|
|
||||||
# Start Thread
|
|
||||||
UrlScaner().start()
|
|
||||||
scaner_thread_num = 0
|
|
||||||
for i in range(3):
|
|
||||||
DomainScaner().start()
|
|
||||||
scaner_thread_num += 1
|
|
||||||
|
|
||||||
# wait thread done
|
|
||||||
while scaner_thread_num:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# write files
|
|
||||||
file_proxy = open('resultant/top500_proxy.list', 'w', encoding='utf-8')
|
|
||||||
file_direct = open('resultant/top500_direct.list', 'w', encoding='utf-8')
|
|
||||||
|
|
||||||
now_time = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
||||||
file_proxy.write('# top500 proxy list update time: ' + now_time + '\n')
|
|
||||||
file_direct.write('# top500 direct list update time: ' + now_time + '\n')
|
|
||||||
|
|
||||||
|
|
||||||
# 将苹果IP加入直连
|
|
||||||
# 由于本脚本应当运行在内部环境中,可能无法访问Github,故改用staticdn.net提供的CDN节点
|
|
||||||
# r = requests.get(url="https://raw.githubusercontent.com/felixonmars/dnsmasq-china-list/master/apple.china.conf", headers=requests_header)
|
|
||||||
r = requests.get(url='https://raw.staticdn.net/felixonmars/dnsmasq-china-list/master/apple.china.conf', headers=requests_header)
|
|
||||||
for url in r.text.split("\n")[:-1]:
|
|
||||||
url = re.sub(r'(server=\/)', '', url) # 清除前缀
|
|
||||||
url = re.sub(r'(/114.114.114.114)', '', url) # 清除后缀
|
|
||||||
domains_direct.append(url)
|
|
||||||
|
|
||||||
domains_direct = list( set(domains_direct) )
|
|
||||||
domains_proxy = list( set(domains_proxy) )
|
|
||||||
domains_direct.sort()
|
|
||||||
domains_proxy.sort()
|
|
||||||
|
|
||||||
for domain in domains_direct:
|
|
||||||
file_direct.write(domain+'\n')
|
|
||||||
for domain in domains_proxy:
|
|
||||||
file_proxy.write(domain+'\n')
|
|
||||||
Reference in New Issue
Block a user