爬取豆瓣新书速递

这个爬虫比较简单,一个固定URL,数据获取也比较有规律。最后是把获取到到书名、简介、评分、作者出版社信息写入本地的一个文件中

# coding:utf-8
import requests
from bs4 import BeautifulSoup
import sys  
  
reload(sys)  
sys.setdefaultencoding('utf8')   

def get_latest_book():
    url='https://book.douban.com/latest?icn=index-latestbook-all'
    response=requests.get(url)
    bookList=[]
    soup=BeautifulSoup(response.content,'html.parser')
    bookNameList=soup.select('.article .detail-frame a')
    bookRateList=soup.select('.article .color-lightgray')
    bookInfoList=soup.select('.article .color-gray')
    bookDetailList=soup.select('.article .detail')
    for v in range(len(bookNameList)):
        bookDict={}
        bookDict={
            'bookName':bookNameList[v].text,
            'bookRate':bookRateList[v].text,
            'bookInfo':bookInfoList[v].text,
            'bookDetail':bookDetailList[v].text
        }
        bookList.append(bookDict)
    return bookList

def write_file():
    with open('豆瓣新书速递.txt','w+') as f:
        bookList=get_latest_book()
        for book in bookList:
            bookName=book['bookName']
            bookRate=book['bookRate']
            bookInfo=book['bookInfo']
            bookDetail=book['bookDetail']
            f.write(bookName)
            f.write(bookRate)
            f.write(bookInfo)
            f.write(bookDetail)
        print '数据写入完毕'  
        
write_file()

推荐阅读更多精彩内容