完整代码:
# 导入模块
import datetime
from bs4 import BeautifulSoup
import requests
import pymysql
# 打开数据库连接,并使用cursor()建立一个游标对象
conn = pymysql.connect(host='localhost', user='root', passwd='root', db='mysql', port=3306, charset='utf8')
cursor = conn.cursor()
# 创建request对象,指定url和请求头(user-agent),目的是为了更真实的模拟浏览器
def get_temperature(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
} # 设置头文件信息
response = requests.get(url, headers=headers).content # 提交requests.get请求,传递url和headers
soup = BeautifulSoup(response, "lxml") # 用Beautifulsoup 进行解析
conmid = soup.find('div', class_='table')
condate = soup.find('div', class_='live_data_time')
condate2 = condate.find('p')
city = soup.find('div', class_='city_name')
conmid2 = conmid.find_all('tbody')
for info in conmid2:
tr_list = info.find_all('tr')[0:] # 使用切片取到第三个tr标签
for index, tr in enumerate(tr_list): # enumerate可以返回元素的位置及内容
td_list = tr.find_all('td')
POSITION = td_list[0].text.replace('\n', '')
date = condate2.text[7:26]
print(date)
AQI = td_list[1].text.replace('\n', '')
GRADE = td_list[2].text.replace('\n', '')
PM25 = td_list[4].text.replace('\n', '')
PM10 = td_list[5].text.replace('\n', '')
SO2 = td_list[10].text.replace('\n', '')
CO = td_list[6].text.replace('\n', '')
NO2 = td_list[7].text.replace('\n', '')
O3_8h = td_list[9].text.replace('\n', '')
CITYNAME = city.text
sql = "INSERT INTO aqidata(POSITION, DATE, AQI, GRADE, PM25,PM10,SO2,CO,NO2,O3_8h,CITY) VALUES ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (
POSITION, date, AQI, GRADE, PM25, PM10, SO2, CO, NO2, O3_8h, CITYNAME)
cursor.execute(sql)
if __name__ == '__main__':
urls = ['http://www.pm25.in/jiangyin', 'http://www.pm25.in/suzhou']
for url in urls:
get_temperature(url)
conn.commit()