Ускорение парсера на python с asyncio

Question

Не могу понять как можно ускорить мой парсер

import asyncio
import aiohttp
from bs4 import BeautifulSoup
import time
import requests

async def get_html(url):
    async with aiohttp.ClientSession() as client:
        async with client.get(url) as r:
            return await r.text()


async def main():
    t0 = time.time()
    tasks = []
    count = 0
    for i in range(1,201):
        url = "https://cspromogame.ru/avatars/?page="
        url = url  + str(i)
        task = asyncio.create_task(get_html(url))
        tasks.append(task)
    p = await asyncio.gather(*tasks)
    for text in p:
        soup = BeautifulSoup(text, 'html.parser')
        for a in soup.findAll('a',class_='avatars__link'):
            link = a.get("href")
            await makePhoto(link)
            count += 1
    print(time.time()-t0)
    print(count)

async def makePhoto(url):
    name = str(url.split("/")[-1:]).replace(".jpg","")
    print(url)
    async with aiohttp.ClientSession(raise_for_status=True) as client:
        async with client.get(url) as r:
            with open(f"{name}.jpg","wb") as f:
                f.write(await r.text())
    

if __name__ == "__main__":
    asyncio.run(main())

Answer 1

import asyncio
import time
import aiohttp
from bs4 import BeautifulSoup

async def get_html(url, client):
    async with client.get(url) as response:
        return await response.text()

async def download(url, client):
    *_, filename = url.split("/")
    response = await client.get(url)
    with open(f"avat/{filename}", "wb") as file:
        file.write(await response.read())

async def main():
    t0 = time.monotonic()
    async with aiohttp.ClientSession() as session:
        tasks = []
        for page_number in range(1, 201):
            url = f"https://cspromogame.ru/avatars/?page={page_number}"
            task = asyncio.create_task(get_html(url, session))
            tasks.append(task)
        pages = await asyncio.gather(*tasks)

        tasks = []
        for page in pages:
            soup = BeautifulSoup(page, 'html.parser')
            for avatar_link in soup.findAll('a', class_='avatars__link'):
                link = avatar_link.get("href")
                task = asyncio.create_task(download(link, session))
                tasks.append(task)

        await asyncio.gather(*tasks)
    print(time.monotonic() - t0)

if __name__ == "__main__":
    asyncio.run(main())

БЛОГ НА HUSL

Ускорение парсера на python с asyncio

Ответы (1 шт):