mirror of
https://github.com/Johnshall/Shadowrocket-ADBlock-Rules-Forever.git
synced 2025-12-17 23:34:48 +08:00
更好地解析广告规则
This commit is contained in:
@@ -1,5 +1,9 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
#
|
||||
# 提取广告规则,并且只提取对全域禁止的那种规则
|
||||
#
|
||||
|
||||
import time
|
||||
import sys
|
||||
import requests
|
||||
@@ -40,27 +44,30 @@ for rule_url in rules_url:
|
||||
|
||||
rule = r.text
|
||||
|
||||
# parse html
|
||||
# parse rule
|
||||
rule = rule.split('\n')
|
||||
for row in rule:
|
||||
if not row.startswith('||') and not row.startswith('|http'):
|
||||
row = row.strip()
|
||||
|
||||
# 直接跳过
|
||||
if row.startswith('!') or row.startswith('@@') or "$" in row:
|
||||
continue
|
||||
|
||||
# del prefix
|
||||
row = re.sub(r'^\|(\||https?:\/\/)', '', row)
|
||||
# del suffix
|
||||
row = row.rstrip('/^ ')
|
||||
# 清除前缀
|
||||
row = re.sub(r'^\|?https?:\/\/', '', row)
|
||||
row = re.sub(r'^\|\|', '', row)
|
||||
row = row.lstrip('.*')
|
||||
|
||||
if re.search(r'[\$\^:\*]', row):
|
||||
continue
|
||||
if row.count('/'):
|
||||
# 清除后缀
|
||||
row = row.rstrip('/^*')
|
||||
|
||||
# 不能含有的字符
|
||||
if re.search(r'[\/\^:\*]', row):
|
||||
continue
|
||||
|
||||
if not re.match(r'\w+(\.\w+)+$', row):
|
||||
continue
|
||||
|
||||
# match
|
||||
domains.append(row)
|
||||
# 只匹配域名或 IP
|
||||
if re.match(r'^([a-zA-Z0-9]([a-zA-Z0-9\-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,9}$', row) or re.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$', row):
|
||||
domains.append(row)
|
||||
|
||||
print('done.')
|
||||
|
||||
|
||||
Reference in New Issue
Block a user