From 0cf4bc45d9a0bd4b810abd110c3ab04bc9aa6ebc Mon Sep 17 00:00:00 2001 From: H2Y Date: Thu, 13 Dec 2018 11:54:43 +0800 Subject: [PATCH] add chnroutes.py --- factory/chnroutes.py | 48 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 6 +++--- 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 factory/chnroutes.py diff --git a/factory/chnroutes.py b/factory/chnroutes.py new file mode 100644 index 0000000..8e23668 --- /dev/null +++ b/factory/chnroutes.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +# 爬取并生成 China Routes,目前此脚本未启用 + +import time +import re +import requests +import sys + + +apnic_ip_url = 'http://ftp.apnic.net/apnic/stats/apnic/delegated-apnic-latest' +out_file = 'resultant/chnroutes.list' + +chnroutes = [] + + +def fetchHTML(url): + print("Downloading... " + url) + + success = False + try_times = 0 + r = None + while try_times < 5 and not success: + r = requests.get(url) + if r.status_code != 200: + time.sleep(1) + try_times = try_times + 1 + else: + success = True + break + + if not success: + sys.exit('error in request %s\n\treturn code: %d' % (url, r.status_code) ) + + r.encoding = 'utf-8' + return r.text.split('\n') + + +# Main + +# apnic|CN|ipv4|116.89.240.0|1024|20170616|allocated +searchRe = r'^apnic\|CN\|ipv4\|(.+)\|(\d+)\|\d+\|\w+$' + +for ln in fetchHTML(apnic_ip_url): + reRet = re.match(searchRe, ln) + if not reRet: + continue + + print(reRet.group()) diff --git a/requirements.txt b/requirements.txt index 226bc12..484e627 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -beautifulsoup4>=4.6.0 -requests>=2.18.1,<3 -lxml>=3.8.0,<4 +beautifulsoup4 +requests +lxml