mirror of
https://github.com/hpware/news-analyze.git
synced 2025-06-23 21:14:23 +00:00
Clean code.
This commit is contained in:
parent
bf357f1c84
commit
5d58016b1d
39 changed files with 481 additions and 381 deletions
|
@ -1,12 +1,7 @@
|
|||
import scrapy
|
||||
from urllib.request import urlopen
|
||||
|
||||
class BlogSpider(scrapy.Spider):
|
||||
name = 'blogspider'
|
||||
start_urls = ['https://news.google.com/u/4/home?hl=zh-TW&gl=TW&ceid=TW:zh-Hant&pageId=none']
|
||||
url = "https://tw.news.yahoo.com/"
|
||||
|
||||
def parse(self, response):
|
||||
for title in response.css('.oxy-post-title'):
|
||||
yield {'title': title.css('::text').get()}
|
||||
page = urlopen(url)
|
||||
|
||||
for next_page in response.css('a.next'):
|
||||
yield response.follow(next_page, self.parse)
|
||||
page
|
Loading…
Add table
Add a link
Reference in a new issue