как спарсить все значения с сайта

Question

Задумка в том что бы забрать все ценники с сайта , положить в лист и в последствии запихнуть в json

from email.header import Header
from bs4 import BeautifulSoup
import requests
import json
Headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
    }

def get_data(url):
    all_links = []
    price_game = []
    
    s = requests.Session()
    response = s.get(url=url, headers=Headers)
    soup = BeautifulSoup(response.text, 'lxml')
    pagination = int(soup.find('div', class_="pagination hg-block").find('span', class_="last").find('a').text)

    for page in range(1 , 2):
    # for page in range(1 , pagination + 1):
        url = f"https://hot-game.info/platforms=pc,windows,linux,mac;only_available=1/{page}"
        response = s.get(url=url, headers=Headers)
        soup = BeautifulSoup(response.text, 'lxml')
        all_games = soup.find('section',class_="yui3-cssreset result-block content-table").find_all('div', class_="game-preview hg-block")
        
        for gg in all_games:
            link = f"https://hot-game.info{gg.find('a').get('href')}"
            all_links.append(link)

    with open('links.text' , 'w') as file:
       for url in all_links:
            file.write(f'{url}\n')

    with open('links.text') as file:
        urls_list =  [line.strip() for line in file.readlines()]
        void = []
        s = requests.Session()
        for url in urls_list:
            respons = s.get(url=url, headers=Headers)
            soup = BeautifulSoup(respons.text, 'lxml')
            name = soup.find('div', class_="hg-block short-game-description").find(itemprop="name").text
            all_price = soup.find_all('span', class_="price-value")
            for item in all_price:
                
                price = item.text 
            
            print(url)
            print(name)
            print(price)
            print('#'* 40)
            

#             for link in link:
#                 respone = s.get(url= link , headers=Headers)
#                 soup = BeautifulSoup(response.text, 'lxml')
#                 print(soup)
def main():
    get_data(url='https://hot-game.info/platforms=pc,windows,linux,mac;only_available=1')


if __name__ == '__main__':
    main()

ссылка на сайт

что бы на выходе было как то так

Answer 1

Вашу задачу можно решить также если использовать selenium.

# скачиваем selenium webdriver и прописываем к нему пути через \\
driver=webdriver.Chrome(executable_path="C:\\chromedriver.exe")
driver.maximize_window()
driver.get("https://hot-game.info/game/stellaris") # открываем вашу страничку
time.sleep(5)
# находим имя бренда
name = driver.execute_script("""
         let name = document.querySelector('.game-price-title h1').innerHTML;
         console.log('name:',name);
         return name;
            """)

index = name.find(' ') # получаем Купить Stellaris и находим пробел
name = name[index:] # забираем Stellaris
name = name.lower().strip() # делаем слово маленькими буквами
print(name)

stellaris

# получаем лист с ценами
prices =driver.execute_script("""
       let array = document.querySelectorAll('.price-value');
       console.log('prices:',array);
       let prices = [];
       for (let i =0; array.length > i; i++){ 
            prices.push(array[i].innerText);
                }
                return prices;
            """)

# конвертируем лист в строку
prices = ','.join([str(i) for i in prices])

# cоздаем словарь
my_dict = {"name":name,"price":prices }

print(my_dict)

{'name': 'stellaris','price':'3,4,26,30,36,156,159,166,169,176,179,184,208,643}

БЛОГ НА HUSL

как спарсить все значения с сайта

Ответы (1 шт):