|
[ 本帖最后由 aixce1 于 2021-12-1 17:14 编辑 ]\n\n[ 本帖最后由 aixce1 于 2021-12-1 17:14 编辑 ]\n\n
async def getBrief(url,header):
async with aiohttp.ClientSession(headers=header) as session:
async with session.get(url) as response:
text = await response.content.read()
text = text.decode()
item = {}
title = re.findall(r'<title>(.*?) _ 基金经理档案 _ 天天基金网</title>',text)
brief= re.findall(r'<span class="strong">基金经理简介:</span>(.*?)</p>', text,re.S)
if title:
item['title'] = title[0]
item['brief'] = brief[0]
async with aiofiles.open(f'基金经理简介.txt', mode='a',encoding='utf-8') as f:
await f.write(json.dumps(item,ensure_ascii=False) + '\n')
async def getDetailPages(url,header ):
async with aiohttp.ClientSession(headers=header) as session:
async with session.get(url) as response:
text = await response.content.read()
text = re.findall(r'var returnjson= (.*?),record:2825,pages:.*,curpage:.*}',text.decode())[0]
text = text + "}"
text = text.replace('data','"data"')
data = json.loads(text)
task = []
for item in data['data']:
detail_id = item[0]
detail_url = f"http://fund.eastmoney.com/manager/{detail_id}.html"
task.append(getBrief(detail_url,header))
await asyncio.wait(task)
async def getUrl(header):
task = []
for page in range(1,20):
url = "http://fund.eastmoney.com/Data/FundDataPortfolio_Interface.aspx?dt=14&mc=returnjson&ft=all&pn=50&pi={page}&sc=abbname&st=asc".format(page=page)
task.append(getDetailPages(url,header))
await asyncio.wait(task)
def main():
header = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
(KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36"}
loop = asyncio.get_event_loop()
loop.run_until_complete(getUrl(header))
if __name__ == '__main__':
main()
|
本帖子中包含更多资源
您需要 登录 才可以下载或查看,没有帐号?立即注册
x
|
共 1 个关于本帖的回复 最后回复于 2021-12-1 17:13