Парсинг цен с торговой площадки Steam
Hello comrades/ В коде парсю предметы стим из кс-го со страничек https://steamcommunity.com/market/search?appid=730#p{page_num}_popular_desc. В результате код пробегает только первую страничку, но 2000 раз, кажется это напрямую связано с асинхронностью. В общем помогите найти ошибку пожалуйста
import asyncio
from aiohttp.client_exceptions import ClientConnectionError
import data.orm.async_orm as orm
import aiohttp
from bs4 import BeautifulSoup as bs
from typing import Tuple, List, Coroutine, Any
from threading import Thread
DEFAULT_URL = 'https://steamcommunity.com/market/search?appid=730'
async def pagination_limit()->Tuple[int, int]: #количество страниц с предметами
async with aiohttp.ClientSession(trust_env=True) as session:
async with session.get(DEFAULT_URL+'#p1_popular_desc') as response:
soup = bs(await response.text(), "lxml")
nb_listings = soup.find("span", {"id": "searchResults_total"}).text.split(",")
nb_listings = int("".join(nb_listings))
return nb_listings // 10, nb_listings % 10
async def get_prices(pg: int)->None: #проход одной страницы
while True: #пока успешно не выполнится
try:
async with aiohttp.ClientSession(trust_env=True) as session:
async with session.get(f'https://steamcommunity.com/market/search?appid=730#p{pg}_popular_desc') as response:
tex = await response.text()
print(tex)
soup = bs(tex, "lxml")
listings = soup.find_all("a", {"class": "market_listing_row_link"})
task_list = []
for listing in listings:
ref = listing.get("href")
name = listing.find("div", {"class": "market_listing_row market_recent_listing_row market_listing_searchresult"}).get("data-hash-name")
price = int(listing.find("span", {"class": "market_table_value normal_price"}).find("span", {"class": "normal_price"}).get("data-price"))
imgs = listing.find("img")
img_small = imgs.get("src")
img_big = img_small + "dpx2x"
print(name)
task_list.append(orm.async_insert_listing(name=name, val=price, ref_small_image=img_small, ref_big_image=img_big, ref_gun=ref))
await asyncio.gather(*task_list)
break
except ClientConnectionError as err: #блокирует сервер - ждём одну секунду
print(pg, "Error occured")
await asyncio.sleep(1)
async def table_update(pages) -> None:
list_of_tasks = []
[list_of_tasks.append(get_prices(i)) for i in range(1, pages[0]+1)]
await asyncio.gather(*list_of_tasks)
if __name__ == "__main__":
loop = asyncio.get_event_loop()
pages = loop.run_until_complete(pagination_limit())
loop.run_until_complete(table_update(pages))