Проблема с ассинхронным скриптом python
есть вот такой код
from distutils import errors
from distutils.log import error
import encodings
from fileinput import close
from random import random
from traceback import print_tb
from bs4 import BeautifulSoup
import requests
import json
import asyncio
import aiohttp
import time
from fake_useragent import UserAgent
import random
import unicodedata
import sys
import gc
if sys.version_info[0] == 3 and sys.version_info[1] >= 8 and sys.platform.startswith('win'):
policy = asyncio.WindowsSelectorEventLoopPolicy()
asyncio.set_event_loop_policy(policy)
# -*- coding:utf-8 -*-
useragent=UserAgent()
CategoryDict={}
All_pages_list=[]
All_items_list=[]
All_items_list_try=[]
semaph=asyncio.Semaphore(15)
async def get_site_content(url):# Список ссылок на товары
async with semaph:
headers={'User-Agent':useragent.random}
connector = aiohttp.TCPConnector(limit=30)
async with aiohttp.ClientSession(headers=headers,connector=connector) as session:
async with session.request(method="GET",url=url) as resp:
text = await resp.text(encoding="utf-8",errors="replace")
SoupText= BeautifulSoup(text,"lxml")
header=SoupText.find('h1').text
current_page=SoupText.find(class_='pgsn').text
itemslist=SoupText.findAll(class_='desc')
await asyncio.sleep(random.randint(1,3))
for el in itemslist:
All_items_list.append("https://069.net.ua"+el.a.get('href'))
print(f"{header} {current_page} done")
return BeautifulSoup(text,"lxml")
async def get_item_content(url): #разбор страницы товара
semaph1=asyncio.Semaphore(1)
connector = aiohttp.TCPConnector(limit=30)
async with semaph1:
headers={'User-Agent':useragent.random}
async with aiohttp.ClientSession(headers=headers,connector=connector) as session:
async with session.request(method="GET",url=url) as resp:
text = await resp.text(encoding="utf-8",errors="replace")
SoupText= BeautifulSoup(text,"lxml")
item_name=SoupText.find('h1').text
item_code=SoupText.find_all(class_='g-r')[-2].text
photo_link="https://069.net.ua"+SoupText.find(id="s-gal-bp").img.get("src")
item_color=SoupText.find_all(class_='g-r')[-2].text
size_list=SoupText.find_all('option')
temp_list=[]
rand=random.randint(1,5)
if rand == 3:
All_items_list.append(asyncio.ensure_future(asyncio.sleep(7)))
for item in size_list:
if item.text!="Виберіть":
temp_list.append(item.text)
size_list=temp_list
item_category=SoupText.find_all(itemprop='name')[1].text
try:
item_price=SoupText.find(class_="prcn").text
except:
item_price="no name"
if len(size_list)!=0:
item_availability="В наявності"
else:
item_availability="Відсутній"
item_brand=SoupText.find_all(itemprop='name')[2].text
await asyncio.sleep(random.randint(1,10))
print()
print("===========================================")
print("назва товару -"+str(item_name))
print("код товару -"+str(item_code))
print("посилання на фото товару -"+str(photo_link))
print("колір - "+str(item_color))
print("розміри- "+str(size_list))
print("категорія -"+str(item_category))
print("ціна - "+str(item_price))
print("наявність - "+str(item_availability))
print("бренд - "+str(item_brand))
print("===========================================")
with open(f"try/{time.time()}.html","w", encoding="utf-8") as file :
file.write(text)
await asyncio.sleep(random.randint(3,5))
return BeautifulSoup(text,"lxml")
url="https://069.net.ua/ua/"
req = requests.get(url)
src=req.text
Categories_soup=BeautifulSoup(src,"lxml").find(id="tabs")
CategoriesList=Categories_soup.find_all(class_='sta')
for category in CategoriesList:
categorySoup=BeautifulSoup(str(category),"lxml")
name=categorySoup.text
href="https://069.net.ua"+category['href']
CategoryDict[name]=href
with open("files/Categories.json","w", encoding="utf-8") as file :
json.dump((CategoryDict),file,indent=4,ensure_ascii=False)
for key in CategoryDict:
url=CategoryDict[key]
req = requests.get(url)
src=req.text
LastLink=BeautifulSoup(src,"lxml").find(id='flt-opt').find_all(class_="pgs")
LastLink=int(LastLink[-2].text)
for a in range(1,LastLink+1):
page_url=url+f"?page={a}/"
rand=random.randint(0,10)
if rand == 3:
All_pages_list.append(asyncio.ensure_future(asyncio.sleep(5)))
All_pages_list.append(asyncio.ensure_future(get_site_content(page_url)))
print(key+" "+"done")
if __name__ == "__main__":
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.gather(*All_pages_list))
print('--------------------------')
print('second loop')
print('--------------------------')
print(len(All_items_list))
gc.collect()
loop= asyncio.get_event_loop()
for el in All_items_list:
All_items_list_try.append(asyncio.ensure_future(get_item_content(el)))
-# loop.run_until_complete(asyncio.gather(*All_items_list_try))
выдает вот такую ошибку -
d:\Fakework\69_parser\try.py:139: DeprecationWarning: There is no current event loop
loop= asyncio.get_event_loop()
d:\Fakework\69_parser\try.py:141: DeprecationWarning: There is no current event loop
All_items_list_try.append(asyncio.ensure_future(get_item_content(el)))
Traceback (most recent call last):
File "d:\Fakework\69_parser\try.py", line 142, in <module>
loop.run_until_complete(asyncio.gather(*All_items_list_try))
File "C:\Users\Chikita\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 633, in run_until_complete
self.run_forever()
File "C:\Users\Chikita\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 600, in run_forever
self._run_once()
File "C:\Users\Chikita\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 1860, in _run_once
event_list = self._selector.select(timeout)
File "C:\Users\Chikita\AppData\Local\Programs\Python\Python310\lib\selectors.py", line 324, in select
r, w, _ = self._select(self._readers, self._writers, [], timeout)
File "C:\Users\Chikita\AppData\Local\Programs\Python\Python310\lib\selectors.py", line 315, in _select
r, w, x = select.select(r, w, w, timeout)
ValueError: too many file descriptors in select()
Что делать я уже не знаю,перерыл много источников,надеюсь на совет знатаков Строка вызывающая ошибку помечена "-#"