BS4 парсит 10-11 товаров из примерно 30 товаров в HTML коде
Пишу парсер для мегамаркета. BS4 парсит только 10-11 товаров из целого HTML, где содержится около 30 товаров
def get_source_html(url):
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.maximize_window()
try:
driver.get(url=url)
WebDriverWait(driver, 60).until(ec.presence_of_element_located((By.CLASS_NAME, "catalog-item")))
with open('source-page.html', 'w', encoding='utf-8') as file:
file.write(driver.page_source)
except Exception as ex:
print(ex)
finally:
driver.close()
driver.quit()
def get_items(file_path):
with open(file_path, encoding='utf-8') as file:
src = file.read()
soup = BeautifulSoup(src, 'lxml')
items_divs = soup.find_all('div')
items = {}
for item in items_divs:
if 'catalog-item' in item.get('class', []):
item_block = item.find('div', class_='item-block')
item_link = item.find('div', class_='item-image')
if item_link:
link_block = item_link.find('a')
link = baseURL + link_block['href']
item_info = item_block.find('div', class_='item-info')
item_price_block = item_info.find(
'div', class_='inner catalog-item__prices-container')
item_money = item_price_block.find('div', class_='item-money')
item_price = item_money.find('div', class_='item-price')
item_price_result = item_price.find('span').get_text()
item_bonus = item_money.find('div', class_='item-bonus')
if item_bonus:
item_bonus_loyalty = item_bonus.find(
'div', class_='money-bonus sm money-bonus_loyalty')
item_bonus_percent = item_bonus_loyalty.find(
'span', class_='bonus-percent').get_text()
item_bonus_amount = item_bonus_loyalty.find(
'span', class_='bonus-amount').get_text()
else:
item_bonus_percent = ''
item_bonus_amount = ''
bonus = int(item_bonus_amount.replace(' ', '')) if item_bonus_amount else 0
price = int(item_price_result[0:-1].replace(' ', ''))
k = round(price / bonus, 3) if bonus else 0
items[k] = {'price': item_price_result[0:-2], 'bonus amount':
item_bonus_amount, 'bonus percent': item_bonus_percent,
'link': link}
items = dict(sorted(items.items(), key=lambda x: x[0]))
return items