mirror of
https://github.com/Johnshall/Shadowrocket-ADBlock-Rules-Forever.git
synced 2025-12-17 15:24:43 +08:00
获取最新的top500网站列表
This commit is contained in:
@@ -5,11 +5,27 @@
|
|||||||
该脚本应当在内网环境中运行
|
该脚本应当在内网环境中运行
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
import threading
|
import threading
|
||||||
|
import csv
|
||||||
|
|
||||||
|
url = 'https://moz.com/top-500/download/?table=top500Domains'
|
||||||
|
r = requests.get(url)
|
||||||
|
with open("top500Domains.csv", "wb") as code:
|
||||||
|
code.write(r.content)
|
||||||
|
|
||||||
|
with open('top500Domains_new.csv','r') as csvfile:
|
||||||
|
reader = csv.reader(csvfile)
|
||||||
|
with open("resultant/top500_manual.list", "w") as file_domain_in:
|
||||||
|
for domain_i,rows in enumerate(reader):
|
||||||
|
if domain_i != 1:
|
||||||
|
for domain_n in reader:
|
||||||
|
file_domain_in.write(domain_n[1] + '\n')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Read top500
|
|
||||||
domains = []
|
domains = []
|
||||||
with open("resultant/top500_manual.list", "r", encoding='utf-8') as f:
|
with open("resultant/top500_manual.list", "r", encoding='utf-8') as f:
|
||||||
for domain in f.readlines():
|
for domain in f.readlines():
|
||||||
@@ -17,6 +33,7 @@ with open("resultant/top500_manual.list", "r", encoding='utf-8') as f:
|
|||||||
continue
|
continue
|
||||||
domains.append(domain[:-1])
|
domains.append(domain[:-1])
|
||||||
|
|
||||||
|
|
||||||
domains_proxy = []
|
domains_proxy = []
|
||||||
domains_direct = []
|
domains_direct = []
|
||||||
requests_header = {
|
requests_header = {
|
||||||
|
|||||||
Reference in New Issue
Block a user