♻️ 更换到正在更新的 Greatfire Analyzer 爬虫

This commit is contained in:
Johnshall
2022-04-07 11:22:41 +08:00
committed by GitHub
parent 6a3d5ac53b
commit eff0e9fefd

View File

@@ -4,7 +4,8 @@
# 下载并解析最新版本的 GFWList # 下载并解析最新版本的 GFWList
# 对于混合性质的网站,尽量走代理(忽略了所有的@@指令) # 对于混合性质的网站,尽量走代理(忽略了所有的@@指令)
# #
# 从 https://github.com/Loyalsoldier/cn-blocked-domain 中获取GFWList的补充 # 从 https://github.com/Johnshall/cn-blocked-domain 中获取GFWList的补充
# 感谢 https://github.com/Loyalsoldier/cn-blocked-domain
# #
@@ -88,7 +89,7 @@ def filtrate_rules(rules):
return ret return ret
# 从 https://github.com/Loyalsoldier/cn-blocked-domain 中获取GFWList的补充 # 从 https://github.com/Johnshall/cn-blocked-domain 中获取GFWList的补充
def getURLs(url): def getURLs(url):
r = requests.get(url) r = requests.get(url)
return r.text.split("\n")[:-1] return r.text.split("\n")[:-1]
@@ -101,8 +102,8 @@ rules = clear_format(rule)
rules = filtrate_rules(rules) rules = filtrate_rules(rules)
rules.extend(getURLs('https://raw.githubusercontent.com/Loyalsoldier/cn-blocked-domain/release/domains.txt')) rules.extend(getURLs('https://raw.githubusercontent.com/Johnshall/cn-blocked-domain/release/domains.txt'))
rules.extend('https://raw.githubusercontent.com/Loyalsoldier/cn-blocked-domain/release/ip.txt') rules.extend('https://raw.githubusercontent.com/Johnshall/cn-blocked-domain/release/ip.txt')
rules = list( set(rules) ) rules = list( set(rules) )
open('resultant/gfw.list', 'w', encoding='utf-8') \ open('resultant/gfw.list', 'w', encoding='utf-8') \