mirror of
https://github.com/Johnshall/Shadowrocket-ADBlock-Rules-Forever.git
synced 2025-12-18 07:44:57 +08:00
更好地解析广告规则
This commit is contained in:
@@ -41,6 +41,7 @@
|
||||
|
||||
脚本,运行所需时间较长。自动爬取生成 `top500_*.list` 文件。
|
||||
|
||||
-----------------------------------
|
||||
|
||||
**resultant/ad.list**
|
||||
|
||||
@@ -48,5 +49,5 @@
|
||||
|
||||
**ad.py**
|
||||
|
||||
脚本,从指定的 Adblock Rule 中提取广告服务器的域名和 IP 至 `ad_*.list` 文件。
|
||||
脚本,从指定的 Adblock Rule 中提取广告服务器的域名和 IP 至 `ad.list` 文件。
|
||||
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
#
|
||||
# 提取广告规则,并且只提取对全域禁止的那种规则
|
||||
#
|
||||
|
||||
import time
|
||||
import sys
|
||||
import requests
|
||||
@@ -40,27 +44,30 @@ for rule_url in rules_url:
|
||||
|
||||
rule = r.text
|
||||
|
||||
# parse html
|
||||
# parse rule
|
||||
rule = rule.split('\n')
|
||||
for row in rule:
|
||||
if not row.startswith('||') and not row.startswith('|http'):
|
||||
row = row.strip()
|
||||
|
||||
# 直接跳过
|
||||
if row.startswith('!') or row.startswith('@@') or "$" in row:
|
||||
continue
|
||||
|
||||
# del prefix
|
||||
row = re.sub(r'^\|(\||https?:\/\/)', '', row)
|
||||
# del suffix
|
||||
row = row.rstrip('/^ ')
|
||||
# 清除前缀
|
||||
row = re.sub(r'^\|?https?:\/\/', '', row)
|
||||
row = re.sub(r'^\|\|', '', row)
|
||||
row = row.lstrip('.*')
|
||||
|
||||
if re.search(r'[\$\^:\*]', row):
|
||||
continue
|
||||
if row.count('/'):
|
||||
# 清除后缀
|
||||
row = row.rstrip('/^*')
|
||||
|
||||
# 不能含有的字符
|
||||
if re.search(r'[\/\^:\*]', row):
|
||||
continue
|
||||
|
||||
if not re.match(r'\w+(\.\w+)+$', row):
|
||||
continue
|
||||
|
||||
# match
|
||||
domains.append(row)
|
||||
# 只匹配域名或 IP
|
||||
if re.match(r'^([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,9}$', row) or re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', row):
|
||||
domains.append(row)
|
||||
|
||||
print('done.')
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
# top500 direct list update time: 2017-11-26 15:20:47
|
||||
# top500 direct list update time: 2018-01-11 17:42:31
|
||||
123movies.to
|
||||
163.com
|
||||
1688.com
|
||||
@@ -19,7 +19,6 @@ adexchangeprediction.com
|
||||
adf.ly
|
||||
adnetworkperformance.com
|
||||
adobe.com
|
||||
airbnb.com
|
||||
alibaba.com
|
||||
aliexpress.com
|
||||
alipay.com
|
||||
@@ -48,7 +47,6 @@ baidu.com
|
||||
baike.com
|
||||
bankofamerica.com
|
||||
battle.net
|
||||
bbc.co.uk
|
||||
bbc.com
|
||||
behance.net
|
||||
bestbuy.com
|
||||
@@ -61,7 +59,6 @@ bitauto.com
|
||||
blackboard.com
|
||||
blastingnews.com
|
||||
blkget.com
|
||||
bongacams.com
|
||||
booking.com
|
||||
box.com
|
||||
bukalapak.com
|
||||
@@ -89,6 +86,7 @@ daum.net
|
||||
dcinside.com
|
||||
dell.com
|
||||
detail.tmall.com
|
||||
detik.com
|
||||
deviantart.com
|
||||
dictionary.com
|
||||
digikala.com
|
||||
@@ -129,7 +127,6 @@ giphy.com
|
||||
github.com
|
||||
github.io
|
||||
gizmodo.com
|
||||
globaloffers.link
|
||||
globo.com
|
||||
gmx.net
|
||||
go.com
|
||||
@@ -137,6 +134,7 @@ godaddy.com
|
||||
goo.ne.jp
|
||||
goodreads.com
|
||||
groupon.com
|
||||
gsmarena.com
|
||||
hao123.com
|
||||
haosou.com
|
||||
hatena.ne.jp
|
||||
@@ -191,6 +189,7 @@ mashable.com
|
||||
mediafire.com
|
||||
mediawhirl.net
|
||||
mega.nz
|
||||
mercadolibre.com.ar
|
||||
mercadolivre.com.br
|
||||
mi.com
|
||||
microsoft.com
|
||||
@@ -222,7 +221,6 @@ oracle.com
|
||||
orange.fr
|
||||
ouo.io
|
||||
outbrain.com
|
||||
ozock.com
|
||||
pandora.com
|
||||
paypal.com
|
||||
paytm.com
|
||||
@@ -273,7 +271,6 @@ spotscenered.info
|
||||
stackexchange.com
|
||||
stackoverflow.com
|
||||
state.gov
|
||||
steamcommunity.com
|
||||
steampowered.com
|
||||
subscene.com
|
||||
taboola.com
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
# top500 proxy list update time: 2017-11-26 15:20:47
|
||||
# top500 proxy list update time: 2018-01-11 17:42:31
|
||||
4shared.com
|
||||
airbnb.com
|
||||
archive.org
|
||||
ask.com
|
||||
bbc.co.uk
|
||||
beeg.com
|
||||
bet365.com
|
||||
blog.jp
|
||||
@@ -11,11 +13,11 @@ blogspot.com.br
|
||||
blogspot.in
|
||||
blogspot.jp
|
||||
bloomberg.com
|
||||
bongacams.com
|
||||
bp.blogspot.com
|
||||
chaturbate.com
|
||||
cloudfront.net
|
||||
dailymotion.com
|
||||
detik.com
|
||||
disqus.com
|
||||
doubleclick.net
|
||||
dropbox.com
|
||||
@@ -28,8 +30,8 @@ fbcdn.net
|
||||
fc2.com
|
||||
files.wordpress.com
|
||||
flipkart.com
|
||||
globaloffers.link
|
||||
goo.gl
|
||||
gsmarena.com
|
||||
hclips.com
|
||||
hootsuite.com
|
||||
hurriyet.com.tr
|
||||
@@ -38,10 +40,10 @@ livedoor.jp
|
||||
ltn.com.tw
|
||||
media.tumblr.com
|
||||
medium.com
|
||||
mercadolibre.com.ar
|
||||
messenger.com
|
||||
nyaa.se
|
||||
nytimes.com
|
||||
ozock.com
|
||||
pinterest.com
|
||||
pixnet.net
|
||||
pornhub.com
|
||||
@@ -53,6 +55,7 @@ scribd.com
|
||||
shutterstock.com
|
||||
slideshare.net
|
||||
spankbang.com
|
||||
steamcommunity.com
|
||||
t.co
|
||||
telegram.org
|
||||
thepiratebay.org
|
||||
|
||||
324
sr_adb.conf
324
sr_adb.conf
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
# Best Shadowrocket Rules (https://github.com/h2y/Shadowrocket-ADBlock-Rules)
|
||||
# by Moshel
|
||||
# build time: 2017-11-26 16:22:26
|
||||
# build time: 2018-01-11 17:42:43
|
||||
|
||||
[General]
|
||||
bypass-system = true
|
||||
@@ -51,10 +51,12 @@ IP-CIDR,34.224.0.0/12,Proxy
|
||||
# 手工定义的 Direct 列表
|
||||
|
||||
|
||||
# top500 proxy list update time: 2017-11-26 15:20:47
|
||||
# top500 proxy list update time: 2018-01-11 17:42:31
|
||||
DOMAIN-SUFFIX,4shared.com,Proxy
|
||||
DOMAIN-SUFFIX,airbnb.com,Proxy
|
||||
DOMAIN-SUFFIX,archive.org,Proxy
|
||||
DOMAIN-SUFFIX,ask.com,Proxy
|
||||
DOMAIN-SUFFIX,bbc.co.uk,Proxy
|
||||
DOMAIN-SUFFIX,beeg.com,Proxy
|
||||
DOMAIN-SUFFIX,bet365.com,Proxy
|
||||
DOMAIN-SUFFIX,blog.jp,Proxy
|
||||
@@ -64,11 +66,11 @@ DOMAIN-SUFFIX,blogspot.com.br,Proxy
|
||||
DOMAIN-SUFFIX,blogspot.in,Proxy
|
||||
DOMAIN-SUFFIX,blogspot.jp,Proxy
|
||||
DOMAIN-SUFFIX,bloomberg.com,Proxy
|
||||
DOMAIN-SUFFIX,bongacams.com,Proxy
|
||||
DOMAIN-SUFFIX,bp.blogspot.com,Proxy
|
||||
DOMAIN-SUFFIX,chaturbate.com,Proxy
|
||||
DOMAIN-SUFFIX,cloudfront.net,Proxy
|
||||
DOMAIN-SUFFIX,dailymotion.com,Proxy
|
||||
DOMAIN-SUFFIX,detik.com,Proxy
|
||||
DOMAIN-SUFFIX,disqus.com,Proxy
|
||||
DOMAIN-SUFFIX,doubleclick.net,Proxy
|
||||
DOMAIN-SUFFIX,dropbox.com,Proxy
|
||||
@@ -81,8 +83,8 @@ DOMAIN-SUFFIX,fbcdn.net,Proxy
|
||||
DOMAIN-SUFFIX,fc2.com,Proxy
|
||||
DOMAIN-SUFFIX,files.wordpress.com,Proxy
|
||||
DOMAIN-SUFFIX,flipkart.com,Proxy
|
||||
DOMAIN-SUFFIX,globaloffers.link,Proxy
|
||||
DOMAIN-SUFFIX,goo.gl,Proxy
|
||||
DOMAIN-SUFFIX,gsmarena.com,Proxy
|
||||
DOMAIN-SUFFIX,hclips.com,Proxy
|
||||
DOMAIN-SUFFIX,hootsuite.com,Proxy
|
||||
DOMAIN-SUFFIX,hurriyet.com.tr,Proxy
|
||||
@@ -91,10 +93,10 @@ DOMAIN-SUFFIX,livedoor.jp,Proxy
|
||||
DOMAIN-SUFFIX,ltn.com.tw,Proxy
|
||||
DOMAIN-SUFFIX,media.tumblr.com,Proxy
|
||||
DOMAIN-SUFFIX,medium.com,Proxy
|
||||
DOMAIN-SUFFIX,mercadolibre.com.ar,Proxy
|
||||
DOMAIN-SUFFIX,messenger.com,Proxy
|
||||
DOMAIN-SUFFIX,nyaa.se,Proxy
|
||||
DOMAIN-SUFFIX,nytimes.com,Proxy
|
||||
DOMAIN-SUFFIX,ozock.com,Proxy
|
||||
DOMAIN-SUFFIX,pinterest.com,Proxy
|
||||
DOMAIN-SUFFIX,pixnet.net,Proxy
|
||||
DOMAIN-SUFFIX,pornhub.com,Proxy
|
||||
@@ -106,6 +108,7 @@ DOMAIN-SUFFIX,scribd.com,Proxy
|
||||
DOMAIN-SUFFIX,shutterstock.com,Proxy
|
||||
DOMAIN-SUFFIX,slideshare.net,Proxy
|
||||
DOMAIN-SUFFIX,spankbang.com,Proxy
|
||||
DOMAIN-SUFFIX,steamcommunity.com,Proxy
|
||||
DOMAIN-SUFFIX,t.co,Proxy
|
||||
DOMAIN-SUFFIX,telegram.org,Proxy
|
||||
DOMAIN-SUFFIX,thepiratebay.org,Proxy
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
# Best Shadowrocket Rules (https://github.com/h2y/Shadowrocket-ADBlock-Rules)
|
||||
# by Moshel
|
||||
# build time: 2017-11-26 16:22:26
|
||||
# build time: 2018-01-11 17:42:43
|
||||
|
||||
[General]
|
||||
bypass-system = true
|
||||
@@ -51,7 +51,7 @@ IP-CIDR,34.224.0.0/12,Proxy
|
||||
# 手工定义的 Direct 列表
|
||||
|
||||
|
||||
# top500 direct list update time: 2017-11-26 15:20:47
|
||||
# top500 direct list update time: 2018-01-11 17:42:31
|
||||
DOMAIN-SUFFIX,123movies.to,Direct
|
||||
DOMAIN-SUFFIX,163.com,Direct
|
||||
DOMAIN-SUFFIX,1688.com,Direct
|
||||
@@ -72,7 +72,6 @@ DOMAIN-SUFFIX,adexchangeprediction.com,Direct
|
||||
DOMAIN-SUFFIX,adf.ly,Direct
|
||||
DOMAIN-SUFFIX,adnetworkperformance.com,Direct
|
||||
DOMAIN-SUFFIX,adobe.com,Direct
|
||||
DOMAIN-SUFFIX,airbnb.com,Direct
|
||||
DOMAIN-SUFFIX,alibaba.com,Direct
|
||||
DOMAIN-SUFFIX,aliexpress.com,Direct
|
||||
DOMAIN-SUFFIX,alipay.com,Direct
|
||||
@@ -101,7 +100,6 @@ DOMAIN-SUFFIX,baidu.com,Direct
|
||||
DOMAIN-SUFFIX,baike.com,Direct
|
||||
DOMAIN-SUFFIX,bankofamerica.com,Direct
|
||||
DOMAIN-SUFFIX,battle.net,Direct
|
||||
DOMAIN-SUFFIX,bbc.co.uk,Direct
|
||||
DOMAIN-SUFFIX,bbc.com,Direct
|
||||
DOMAIN-SUFFIX,behance.net,Direct
|
||||
DOMAIN-SUFFIX,bestbuy.com,Direct
|
||||
@@ -114,7 +112,6 @@ DOMAIN-SUFFIX,bitauto.com,Direct
|
||||
DOMAIN-SUFFIX,blackboard.com,Direct
|
||||
DOMAIN-SUFFIX,blastingnews.com,Direct
|
||||
DOMAIN-SUFFIX,blkget.com,Direct
|
||||
DOMAIN-SUFFIX,bongacams.com,Direct
|
||||
DOMAIN-SUFFIX,booking.com,Direct
|
||||
DOMAIN-SUFFIX,box.com,Direct
|
||||
DOMAIN-SUFFIX,bukalapak.com,Direct
|
||||
@@ -142,6 +139,7 @@ DOMAIN-SUFFIX,daum.net,Direct
|
||||
DOMAIN-SUFFIX,dcinside.com,Direct
|
||||
DOMAIN-SUFFIX,dell.com,Direct
|
||||
DOMAIN-SUFFIX,detail.tmall.com,Direct
|
||||
DOMAIN-SUFFIX,detik.com,Direct
|
||||
DOMAIN-SUFFIX,deviantart.com,Direct
|
||||
DOMAIN-SUFFIX,dictionary.com,Direct
|
||||
DOMAIN-SUFFIX,digikala.com,Direct
|
||||
@@ -182,7 +180,6 @@ DOMAIN-SUFFIX,giphy.com,Direct
|
||||
DOMAIN-SUFFIX,github.com,Direct
|
||||
DOMAIN-SUFFIX,github.io,Direct
|
||||
DOMAIN-SUFFIX,gizmodo.com,Direct
|
||||
DOMAIN-SUFFIX,globaloffers.link,Direct
|
||||
DOMAIN-SUFFIX,globo.com,Direct
|
||||
DOMAIN-SUFFIX,gmx.net,Direct
|
||||
DOMAIN-SUFFIX,go.com,Direct
|
||||
@@ -190,6 +187,7 @@ DOMAIN-SUFFIX,godaddy.com,Direct
|
||||
DOMAIN-SUFFIX,goo.ne.jp,Direct
|
||||
DOMAIN-SUFFIX,goodreads.com,Direct
|
||||
DOMAIN-SUFFIX,groupon.com,Direct
|
||||
DOMAIN-SUFFIX,gsmarena.com,Direct
|
||||
DOMAIN-SUFFIX,hao123.com,Direct
|
||||
DOMAIN-SUFFIX,haosou.com,Direct
|
||||
DOMAIN-SUFFIX,hatena.ne.jp,Direct
|
||||
@@ -244,6 +242,7 @@ DOMAIN-SUFFIX,mashable.com,Direct
|
||||
DOMAIN-SUFFIX,mediafire.com,Direct
|
||||
DOMAIN-SUFFIX,mediawhirl.net,Direct
|
||||
DOMAIN-SUFFIX,mega.nz,Direct
|
||||
DOMAIN-SUFFIX,mercadolibre.com.ar,Direct
|
||||
DOMAIN-SUFFIX,mercadolivre.com.br,Direct
|
||||
DOMAIN-SUFFIX,mi.com,Direct
|
||||
DOMAIN-SUFFIX,microsoft.com,Direct
|
||||
@@ -275,7 +274,6 @@ DOMAIN-SUFFIX,oracle.com,Direct
|
||||
DOMAIN-SUFFIX,orange.fr,Direct
|
||||
DOMAIN-SUFFIX,ouo.io,Direct
|
||||
DOMAIN-SUFFIX,outbrain.com,Direct
|
||||
DOMAIN-SUFFIX,ozock.com,Direct
|
||||
DOMAIN-SUFFIX,pandora.com,Direct
|
||||
DOMAIN-SUFFIX,paypal.com,Direct
|
||||
DOMAIN-SUFFIX,paytm.com,Direct
|
||||
@@ -326,7 +324,6 @@ DOMAIN-SUFFIX,spotscenered.info,Direct
|
||||
DOMAIN-SUFFIX,stackexchange.com,Direct
|
||||
DOMAIN-SUFFIX,stackoverflow.com,Direct
|
||||
DOMAIN-SUFFIX,state.gov,Direct
|
||||
DOMAIN-SUFFIX,steamcommunity.com,Direct
|
||||
DOMAIN-SUFFIX,steampowered.com,Direct
|
||||
DOMAIN-SUFFIX,subscene.com,Direct
|
||||
DOMAIN-SUFFIX,taboola.com,Direct
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user