Как сделать так чтобы, парсер на selenium заработал?
У меня был написал рабочий код на bs4, бот при получении сообщения от пользователя с городом начинал парсить объявления на авито, после чего бот отправлял ссылки на объявления и цену. Но мне нужно этот код сделать рабочим и на selenium. В этом я профан, так что если кто может, то помогите пожалуйста
# aiogram
from aiogram import types
from aiogram.types.message import ParseMode
from bot import dp
from bot import db
from aiogram.utils.markdown import hbold, hlink
import cfscrape
import fake_headers
# python
import re
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium_stealth import stealth
from selenium.webdriver.common.by import By
import time
# default для mvc
city = "belgorod"
city2 = "voronezh"
city3 = "kursk"
radius = 300
allowed_data = ['часов', 'часа','час']
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options)
stealth(
driver,
languages=["en-US", "en"],
vendor="Google Inc.",
platform="Win32",
webgl_vendor="Intel Inc.",
renderer="Intel Iris OpenGL Engine",
fix_hairline=True,
)
@dp.message_handler(lambda message: message.text == "Воронеж")
async def avito_list(message: types.Message):
try:
# Проверим есть ли вообще подписки
if db.follows_exists(message.from_user.id):
# Выводим все объявления по подпискам
follows = db.show_subs(message.from_user.id)
for follow in follows:
line = follow[1][0][0]
with db.connection:
marka = db.cursor.execute(f"SELECT `avito_mark_name` FROM `marks` WHERE `name` = ?", (line,)).fetchall()
marka = marka[0][0]
min_price = follow[2][0][0]
if follow[3][0][0] == None:
max_price = follow[2][0][0]*1000
else:
max_price = follow[3][0][0]
#model = "2114_samara"
#url = f"https://www.avito.ru/{city}/avtomobili/{marka}/{model}?radius={radius}"
url = f"https://www.avito.ru/valuyki/avtomobili/{marka}-ASgBAgICAUTgtg3GmSg?cd=1&radius=200"
driver.get(url)
time.sleep(5)
print(url)
main_container = driver.find_elements(By.CSS_SELECTOR,".iva-item-content")
for index, content in enumerate(main_container):
contaier_of_content = content.find_element(By.CSS_SELECTOR, ".iva-item-body")
ad_post = contaier_of_content.find_element(By.CLASS_NAME, "item-line")
#fresh_car = contaier_of_content.find_element(By.CLASS_NAME,{"data-marker":"item-date"}).text.split()[1]
price = content.find_element(By.CSS_SELECTOR, ".price-price").find_element(By.CLASS_NAME, "price").get_attribute("content")
if int(price) <= max_price and int(price) >= min_price: # проверка на сегодняшний день и на подхождение по цене
title_info = contaier_of_content.find_element(By.CLASS_NAME,'.iva-item-title').get_attribute("title").split(',')
town_info = contaier_of_content.find_element(By.CLASS_NAME,'.geo-root').find_element(By.TAG_NAME,"span").text
datePost_info = contaier_of_content.find_element(By.CLASS_NAME,'.iva-item-dateInfo').find_element(By.TAG_NAME,"div").text
#link = content.find('a', class_=re.compile('iva-item-sliderLink*'))['href']
currency = content.find_element(By.CSS_SELECTOR,'.price-price').find_element(By.TAG_NAME,'meta').get_attribute("content")
car_info = title_info[0]
was_created = title_info[1]
city_on_sale = town_info
card = f'{hlink(car_info+" - "+was_created,"https://avito.ru"+link)}\n' \
f'{hbold("Город: ", city_on_sale)}\n' \
f'{hbold("Цена: ", price, currency)}'
await message.answer(card)
except Exception as ex:
print(ex)
finally:
driver.close()
driver.quit()
В консоль он выводит лишь это:
INFO:aiogram:Bot: BETTINGPRO [@betingpro_bot]
WARNING:aiogram:Updates were skipped successfully.
INFO:aiogram.dispatcher.dispatcher:Start polling.
https://www.avito.ru/valuyki/avtomobili/volkswagen-ASgBAgICAUTgtg3GmSg?cd=1&radius=200
https://www.avito.ru/valuyki/avtomobili/vaz_lada-ASgBAgICAUTgtg3GmSg?cd=1&radius=200
Вот рабочий код на bs4
# aiogram
from aiogram import types
from aiogram.types.message import ParseMode
from bot import dp
from bot import db
from aiogram.utils.markdown import hbold, hlink
import cfscrape
import fake_headers
# python
import re
import requests
from bs4 import BeautifulSoup
# default для mvc
city = "belgorod"
city2 = "voronezh"
city3 = "kursk"
radius = 300
allowed_data = ['часов', 'часа','час']
proxies = {
'http': 'http://84.42.62.212:80'
}
user_agent = ("Mozila/5.0 (X11; Ubuntu; Linux x86_64; rv:84.0) Gecko/20100101 Firefox/84.0")
def get_session(url):
session = requests.Session()
session.headers = {'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:50.0) '
'Gecko/20100101 Firefox/50.0'
}
return cfscrape.create_scraper(sess=session)
@dp.message_handler(lambda message: message.text == "Белгород")
async def avito_list(message: types.Message):
# Проверим есть ли вообще подписки
if db.follows_exists(message.from_user.id):
# Выводим все объявления по подпискам
follows = db.show_subs(message.from_user.id)
for follow in follows:
line = follow[1][0][0]
with db.connection:
marka = db.cursor.execute(f"SELECT `avito_mark_name` FROM `marks` WHERE `name` = ?", (line,)).fetchall()
marka = marka[0][0]
min_price = follow[2][0][0]
if follow[3][0][0] == None:
max_price = follow[2][0][0]*1000
else:
max_price = follow[3][0][0]
#model = "2114_samara" valuyki
#url = f"https://www.avito.ru/{city}/avtomobili/{marka}/{model}?radius={radius}"
url = f"https://www.avito.ru/valuyki/avtomobili/{marka}/?radius=200"
s = get_session(url)
print(url)
response = s.get(url)
print(response.status_code)
soup = BeautifulSoup(response.text,'lxml')
main_container = soup.find_all('div',class_= re.compile('iva-item-content*'))
for index, content in enumerate(main_container):
contaier_of_content = content.find("div",class_=re.compile("iva-item-body*"))
ad_post = contaier_of_content.find("div", {"data-marker": "item-line"})
fresh_car = contaier_of_content.find("div",{"data-marker":"item-date"}).text.split()[1]
price = content.find('span', class_=re.compile('price-price-*')).find('meta', itemprop="price")[
'content']
if int(price) <= max_price and int(price) >= min_price: # проверка на сегодняшний день и на подхождение по цене
title_info = contaier_of_content.find('a',class_=re.compile('iva-item-title*'))['title'].split(',')
town_info = contaier_of_content.find('div',class_=re.compile('geo-root*')).find('span').text
datePost_info = contaier_of_content.find('div',class_=re.compile('iva-item-dateInfo*')).find('div').text
link = content.find('a', class_=re.compile('iva-item-sliderLink*'))['href']
currency = content.find('span', class_=re.compile('price-price-*')).find('meta')['content']
car_info = title_info[0]
was_created = title_info[1]
city_on_sale = town_info
card = f'{hlink(car_info+" - "+was_created,"https://avito.ru"+link)}\n' \
f'{hbold("Город: ", city_on_sale)}\n' \
f'{hbold("Цена: ", price, currency)}'
await message.answer(card)
@dp.message_handler(lambda message: message.text == "Воронеж")
async def avito_list(message: types.Message):
# Проверим есть ли вообще подписки
if db.follows_exists(message.from_user.id):
# Выводим все объявления по подпискам
follows = db.show_subs(message.from_user.id)
for follow in follows:
line = follow[1][0][0]
with db.connection:
marka = db.cursor.execute(f"SELECT `avito_mark_name` FROM `marks` WHERE `name` = ?", (line,)).fetchall()
marka = marka[0][0]
min_price = follow[2][0][0]
if follow[3][0][0] == None:
max_price = follow[2][0][0]*1000
else:
max_price = follow[3][0][0]
#model = "2114_samara"
#url = f"https://www.avito.ru/{city}/avtomobili/{marka}/{model}?radius={radius}"
url = f"https://www.avito.ru/valuyki/avtomobili/{marka}-ASgBAgICAUTgtg3GmSg?cd=1&radius=200"
s = get_session(url)
print(url)
response = s.get(url)
print(response.status_code)
soup = BeautifulSoup(response.text,'lxml')
main_container = soup.find_all('div',class_= re.compile('iva-item-content*'))
for index, content in enumerate(main_container):
contaier_of_content = content.find("div",class_=re.compile("iva-item-body*"))
ad_post = contaier_of_content.find("div", {"data-marker": "item-line"})
fresh_car = contaier_of_content.find("div",{"data-marker":"item-date"}).text.split()[1]
price = content.find('span', class_=re.compile('price-price-*')).find('meta', itemprop="price")[
'content']
if int(price) <= max_price and int(price) >= min_price: # проверка на сегодняшний день и на подхождение по цене
title_info = contaier_of_content.find('a',class_=re.compile('iva-item-title*'))['title'].split(',')
town_info = contaier_of_content.find('div',class_=re.compile('geo-root*')).find('span').text
datePost_info = contaier_of_content.find('div',class_=re.compile('iva-item-dateInfo*')).find('div').text
link = content.find('a', class_=re.compile('iva-item-sliderLink*'))['href']
currency = content.find('span', class_=re.compile('price-price-*')).find('meta')['content']
car_info = title_info[0]
was_created = title_info[1]
city_on_sale = town_info
card = f'{hlink(car_info+" - "+was_created,"https://avito.ru"+link)}\n' \
f'{hbold("Город: ", city_on_sale)}\n' \
f'{hbold("Цена: ", price, currency)}'
await message.answer(card)
@dp.message_handler(lambda message: message.text == "Курск")
async def avito_list(message: types.Message):
# Проверим есть ли вообще подписки
if db.follows_exists(message.from_user.id):
# Выводим все объявления по подпискам
follows = db.show_subs(message.from_user.id)
for follow in follows:
line = follow[1][0][0]
with db.connection:
marka = db.cursor.execute(f"SELECT `avito_mark_name` FROM `marks` WHERE `name` = ?", (line,)).fetchall()
marka = marka[0][0]
min_price = follow[2][0][0]
if follow[3][0][0] == None:
max_price = follow[2][0][0]*1000
else:
max_price = follow[3][0][0]
#model = "2114_samara"
#url = f"https://www.avito.ru/{city}/avtomobili/{marka}/{model}?radius={radius}"
url = f"https://www.avito.ru/kursk/avtomobili/{marka}/?radius=200"
s = get_session(url)
print(url)
response = s.get(url)
print(response.status_code)
soup = BeautifulSoup(response.text,'lxml')
main_container = soup.find_all('div',class_= re.compile('iva-item-content*'))
for index, content in enumerate(main_container):
contaier_of_content = content.find("div",class_=re.compile("iva-item-body*"))
ad_post = contaier_of_content.find("div", {"data-marker": "item-line"})
fresh_car = contaier_of_content.find("div",{"data-marker":"item-date"}).text.split()[1]
price = content.find('span', class_=re.compile('price-price-*')).find('meta', itemprop="price")[
'content']
if int(price) <= max_price and int(price) >= min_price: # проверка на сегодняшний день и на подхождение по цене
title_info = contaier_of_content.find('a',class_=re.compile('iva-item-title*'))['title'].split(',')
town_info = contaier_of_content.find('div',class_=re.compile('geo-root*')).find('span').text
datePost_info = contaier_of_content.find('div',class_=re.compile('iva-item-dateInfo*')).find('div').text
link = content.find('a', class_=re.compile('iva-item-sliderLink*'))['href']
currency = content.find('span', class_=re.compile('price-price-*')).find('meta')['content']
car_info = title_info[0]
was_created = title_info[1]
city_on_sale = town_info
card = f'{hlink(car_info+" - "+was_created,"https://avito.ru"+link)}\n' \
f'{hbold("Город: ", city_on_sale)}\n' \
f'{hbold("Цена: ", price, currency)}'
await message.answer(card)
#Проверка на то, что есть ли какие то автомобили по нужной цене
if not check_empty:
await message.answer("Нету автомобилей с нужной вам ценой")
else:
await message.answer("У вас нет ни одной подписки")