发布于2020-02-10 17:32 阅读(790) 评论(0) 点赞(24) 收藏(1)
Python.rar
基于Python爬取新冠统计数据,包含全国统计数据,城市统计数据,新闻数据,数据来源自腾讯新闻数据。
本文主要为帮助科研人员,用于收集并分析新型冠状病毒相关信息使用,如涉及版权等其他问题,请联系作者删除。
本文使用Python语言获取疫情统计数据(来源腾讯新闻),和新闻数据(来源腾讯新闻,丁香园),并写入到SqlServer中,可自行修改写入Excel或者其他文件中
其中获取中国统计数据方式如下
#获取中国每天的汇总统计数据
import requests
import re
import json
import openpyxl
import time
import pymssql
import time
lastUpdateTime=''#更新时间
data_china = []#国家统计数据
data_chinatimeline=[]#国家按时间统计数据
Get_City_V2=r"https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback"#地区数据2
def GetHtmlText(url):
try:
res = requests.get(url,timeout = 30)
res.raise_for_status()
res.encoding = res.apparent_encoding
return res.text
except:
return "Error"
City_json= GetHtmlText(Get_City_V2)
City_Data = json.loads(City_json)
City_Data = City_Data["data"]
def GetCityData(CitysJson):#获取精确信息,返回成员长度
global lastUpdateTime#更新时间
global data_china#国家统计数据
data = json.loads(CitysJson)
lastUpdateTime = data.get('lastUpdateTime') #更新时间
#获取每天汇总信息
chinaDayList=data.get('chinaDayList') #全国每天汇总信息
chinaLen=len(chinaDayList)
#获取按时间全国统计数据
chinaTotal=data.get('chinaTotal')
chinaTotalLen=len(chinaTotal)
#第一次取全部数据
for chinaIndex in range(0,chinaLen):
chinadata= chinaDayList[chinaIndex]
data_china.append((chinadata['date'],int(chinadata['confirm']),int(chinadata['suspect']),int(chinadata['dead']),int(chinadata['heal'])))
data_china.append((chinadata['date'],chinadata['date'],int(chinadata['confirm']),int(chinadata['suspect']),int(chinadata['dead']),int(chinadata['heal'])))
#获取全国时间线数据
data_chinatimeline.append((int(chinaTotal['confirm']),int(chinaTotal['suspect']),int(chinaTotal['dead']),int(chinaTotal['heal']),lastUpdateTime,int(chinaTotal['confirm']),int(chinaTotal['suspect']),int(chinaTotal['dead']),int(chinaTotal['heal'])))
GetCityData(City_Data)
#写入数据
#连接数据库
server = ""#服务器名称
user = ""#用户名
password = ""#密码
database = ""#数据库名称
conn = pymssql.connect(server, user, password, database)
cursor = conn.cursor()
if not cursor:
raise(NameError,"连接数据库失败")
else:
print('OK')
sql_china="if not exists(select * from SARI_ChinaSta where sdate=%s) insert into SARI_ChinaSta ([sdate],[sconfirm],[ssuspect],[sdead],[sheal]) VALUES (%s,%d,%d,%d,%d)"
cursor.executemany(sql_china, data_china)
#写入时间线数据
sql_chinaLine="if not exists(select * from SARI_CTLine where sconfirm=%s and ssuspect=%s and sdead=%s and sheal=%s) insert into SARI_CTLine ([lastUpdateTime],[sconfirm],[ssuspect],[sdead],[sheal]) VALUES (%s,%d,%d,%d,%d)"
cursor.executemany(sql_chinaLine, data_chinatimeline)
# 如果没有指定autocommit属性为True的话就需要调用commit()方法
conn.commit()
print(time,'写入统计数据成功')
conn.close()#关闭数据库
#写入日志
f = "log_getchina.txt"
with open(f,"a") as file: #只需要将之前的”w"改为“a"即可,代表追加内容
file.write("执行时间:"+time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))+"\n")
获取城市统计数据代码如下
#获取城市统计数据v2.0
import requests
import re
import json
import openpyxl
import time
import pymssql
import time
lastUpdateTime=''#更新时间
country =[] #国家
area = []#省市
city = []#城市
today_dead = []#死亡人数
today_confirm = []#确诊人数
today_suspect = []#疑似人数
today_heal = []#治愈人数
total_dead = []#死亡人数
total_confirm = []#确诊人数
total_suspect = []#疑似人数
total_heal = []#治愈人数
data_china = []#国家统计数据
Get_City_V2=r"https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback"#地区数据2
def GetHtmlText(url):
try:
res = requests.get(url,timeout = 30)
res.raise_for_status()
res.encoding = res.apparent_encoding
return res.text
except:
return "Error"
City_json= GetHtmlText(Get_City_V2)
City_Data = json.loads(City_json)
City_Data = City_Data["data"]
#City_Data = re.findall(r"{[^}]+}",City_Data)#NewsJson
def GetCityData(CitysJson):#获取精确信息,返回成员长度
global country # 国家
global area # 城镇
global city # 城市
global today_dead #死亡人数
global today_confirm #确诊人数
global today_suspect #疑似人数
global today_heal #治愈人数
global total_dead #死亡人数
global total_confirm #确诊人数
global total_suspect #疑似人数
global total_heal #治愈人数
global lastUpdateTime#更新时间
data = json.loads(CitysJson)
areaTree= data.get('areaTree') #地区统计数据
lastUpdateTime = data.get('lastUpdateTime') #更新时间
#记录全国汇总数据
#遍历数据
i = len(areaTree)#获取json数据有多少个成员
for j in range(0,i):
citydata = areaTree[j]#一级所有数据
cname=citydata.get('name')#一级国家名
fcity=citydata.get('children')#省市二级
if(fcity!=None):#有省市一级的
fcount=len(fcity)
for findex in range(0,fcount):
fcitydata = fcity[findex]#二级所有数据
fname=fcitydata.get('name')#二级省市名字
scity=fcitydata.get('children')#地区三级
if(scity!=None):#有地区一级的
scount=len(scity)
for sindex in range(0,scount):
scitydata =scity[sindex]#三级所有数据
sname=scitydata.get('name')#二级省市名字
country.append (cname)
area.append (fname)
city.append (sname)
today_dead.append (scitydata['today']['dead'])
today_confirm.append (scitydata['today']['confirm'])
today_suspect.append (scitydata['today']['suspect'])
today_heal.append (scitydata['today']['heal'])
total_dead.append (scitydata['total']['dead'])
total_confirm.append (scitydata['total']['confirm'])
total_suspect.append (scitydata['total']['suspect'])
total_heal.append (scitydata['total']['heal'])
else:#没有地区一级的
country.append (cname)
area.append (fname)
city.append (fname)
today_dead.append (fcitydata['today']['dead'])
today_confirm.append (fcitydata['today']['confirm'])
today_suspect.append (fcitydata['today']['suspect'])
today_heal.append (fcitydata['today']['heal'])
total_dead.append (fcitydata['total']['dead'])
total_confirm.append (fcitydata['total']['confirm'])
total_suspect.append (fcitydata['total']['suspect'])
total_heal.append (fcitydata['total']['heal'])
else:#国家级别的数据
country.append (cname)
area.append (cname)
city.append (cname)
today_dead.append (citydata['today']['dead'])
today_confirm.append (citydata['today']['confirm'])
today_suspect.append (citydata['today']['suspect'])
today_heal.append (citydata['today']['heal'])
total_dead.append (citydata['total']['dead'])
total_confirm.append (citydata['total']['confirm'])
total_suspect.append (citydata['total']['suspect'])
total_heal.append (citydata['total']['heal'])
return i
GetCityData(City_Data)
length=len(country)
#写入数据
data_xj = []#城市统计数据
for n in range(0,length):
data_xj.append((lastUpdateTime,country[n],area[n],city[n],country[n],area[n],city[n],today_dead[n],today_confirm[n],today_suspect[n],today_heal[n],
total_dead[n],total_confirm[n],total_suspect[n],total_heal[n],lastUpdateTime))
#连接数据库
server = ""#服务器名称
user = ""#用户名
password = ""#密码
database = ""#数据库名称
conn = pymssql.connect(server, user, password, database)
cursor = conn.cursor()
if not cursor:
raise(NameError,"连接数据库失败")
else:
print('OK')
sql_xj = "if not exists(select * from SARI_detail where lastUpdateTime=%s and cname=%s and fname=%s and sname=%s) INSERT INTO SARI_detail ([cname],[fName],[sName],[today_dead],[today_confirm],[today_suspect],[today_heal],[total_dead],[total_confirm],[total_suspect],[total_heal],[lastUpdateTime]) VALUES (%s,%s,%s,%d,%d,%d,%d,%d,%d,%d,%d,%s)"
cursor.executemany(sql_xj, data_xj)
# 如果没有指定autocommit属性为True的话就需要调用commit()方法
conn.commit()
print(time,'写入统计数据成功')
conn.close()#关闭数据库
#写入日志
f = "log_getcityv2.txt"
with open(f,"a") as file: #只需要将之前的”w"改为“a"即可,代表追加内容
file.write("执行时间:"+time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))+"\n")
如果要写入Excel文件,可以使用openpyxl库写入。获取新闻数据的代码及本文完整代码,已经上传至csdn。
作者:进击的巨人
链接:https://www.pythonheidong.com/blog/article/231022/0cd966b048718817903f/
来源:python黑洞网
任何形式的转载都请注明出处,如有侵权 一经发现 必将追究其法律责任
昵称:
评论内容:(最多支持255个字符)
---无人问津也好,技不如人也罢,你都要试着安静下来,去做自己该做的事,而不是让内心的烦躁、焦虑,坏掉你本来就不多的热情和定力
Copyright © 2018-2021 python黑洞网 All Rights Reserved 版权所有,并保留所有权利。 京ICP备18063182号-1
投诉与举报,广告合作请联系vgs_info@163.com或QQ3083709327
免责声明:网站文章均由用户上传,仅供读者学习交流使用,禁止用做商业用途。若文章涉及色情,反动,侵权等违法信息,请向我们举报,一经核实我们会立即删除!