获取最新的top500网站列表

This commit is contained in:
mskgroup
2022-05-23 12:16:58 +08:00
committed by GitHub
parent 3a1fc5bf78
commit 480b04ed98

View File

@@ -5,11 +5,27 @@
该脚本应当在内网环境中运行
'''
import pandas as pd
import requests
import time
import threading
import csv
url = 'https://moz.com/top-500/download/?table=top500Domains'
r = requests.get(url)
with open("top500Domains.csv", "wb") as code:
code.write(r.content)
with open('top500Domains_new.csv','r') as csvfile:
reader = csv.reader(csvfile)
with open("resultant/top500_manual.list", "w") as file_domain_in:
for domain_i,rows in enumerate(reader):
if domain_i != 1:
for domain_n in reader:
file_domain_in.write(domain_n[1] + '\n')
# Read top500
domains = []
with open("resultant/top500_manual.list", "r", encoding='utf-8') as f:
for domain in f.readlines():
@@ -17,6 +33,7 @@ with open("resultant/top500_manual.list", "r", encoding='utf-8') as f:
continue
domains.append(domain[:-1])
domains_proxy = []
domains_direct = []
requests_header = {