使用流快速构建爬虫
In [1]:
import streamz
from requests_html import HTMLSession
session = HTMLSession()
def get_response(url):
global session
return session.get(url)
def get_result(response):
return response.html.search('<title>{}</title>'),response.url
def get_links(response):
return response.html.absolute_links
def is_special_url(url):
return 'gndy' in url
def is_special_response(response):
return 'gndy' in …