Ускорение парсера на python с asyncio
Не могу понять как можно ускорить мой парсер
import asyncio
import aiohttp
from bs4 import BeautifulSoup
import time
import requests
async def get_html(url):
async with aiohttp.ClientSession() as client:
async with client.get(url) as r:
return await r.text()
async def main():
t0 = time.time()
tasks = []
count = 0
for i in range(1,201):
url = "https://cspromogame.ru/avatars/?page="
url = url + str(i)
task = asyncio.create_task(get_html(url))
tasks.append(task)
p = await asyncio.gather(*tasks)
for text in p:
soup = BeautifulSoup(text, 'html.parser')
for a in soup.findAll('a',class_='avatars__link'):
link = a.get("href")
await makePhoto(link)
count += 1
print(time.time()-t0)
print(count)
async def makePhoto(url):
name = str(url.split("/")[-1:]).replace(".jpg","")
print(url)
async with aiohttp.ClientSession(raise_for_status=True) as client:
async with client.get(url) as r:
with open(f"{name}.jpg","wb") as f:
f.write(await r.text())
if __name__ == "__main__":
asyncio.run(main())
Ответы (1 шт):
Автор решения: Сергей Ш
→ Ссылка
import asyncio
import time
import aiohttp
from bs4 import BeautifulSoup
async def get_html(url, client):
async with client.get(url) as response:
return await response.text()
async def download(url, client):
*_, filename = url.split("/")
response = await client.get(url)
with open(f"avat/{filename}", "wb") as file:
file.write(await response.read())
async def main():
t0 = time.monotonic()
async with aiohttp.ClientSession() as session:
tasks = []
for page_number in range(1, 201):
url = f"https://cspromogame.ru/avatars/?page={page_number}"
task = asyncio.create_task(get_html(url, session))
tasks.append(task)
pages = await asyncio.gather(*tasks)
tasks = []
for page in pages:
soup = BeautifulSoup(page, 'html.parser')
for avatar_link in soup.findAll('a', class_='avatars__link'):
link = avatar_link.get("href")
task = asyncio.create_task(download(link, session))
tasks.append(task)
await asyncio.gather(*tasks)
print(time.monotonic() - t0)
if __name__ == "__main__":
asyncio.run(main())