import requests
from bs4 import BeautifulSoup
import data_client
import pandas
class Parser:
links_to_parse = [
'https://www.kufar.by/l/mebel',
'https://www.kufar.by/l/mebel?cursor=eyJ0IjoiYWJzIiwiZiI6dHJ1ZSwicCI6Mn0%3D',
'https://www.kufar.by/l/mebel?cursor=eyJ0IjoiYWJzIiwiZiI6dHJ1ZSwicCI6M30%3D',
'https://www.kufar.by/l/mebel?cursor=eyJ0IjoiYWJzIiwiZiI6dHJ1ZSwicCI6NH0%3D'
]
data_client_imp = data_client.CsvClient()
@staticmethod
def get_mebel_by_link(link):
response = requests.get(link)
mebel_data = response.text
mebel_items = []
to_parse = BeautifulSoup(mebel_data, 'html.parser')
for elem in to_parse.find_all('a', class_='styles_wrapper__yaLfq'):
try:
price, description = elem.text.split('р.')
mebel_items.append((
elem['href'],
int(price.replace(' ', '')),
description
))
except:
print(f'Цена не была указана. {elem.text}')
return mebel_items
def save_to_postgres(self, mebel_items):
connection = self.data_client_imp.get_connection()
self.data_client_imp.create_mebel_table(connection)
for item in mebel_items:
self. data_client_imp.insert(connection, item[0], item[1], item[2])
def run(self):
mebel_items = []
for link in Parser.links_to_parse:
mebel_items.extend(self.get_mebel_by_link(link))
self.save_to_postgres(mebel_items)
Parser().run()
class CsvClient(DataClient):
FILENAME = 'mebel.csv'
def get_connection(self):
return None
def create_mebel_table(self, conn):
pass
def get_items(self, conn, price_from=0, price_to=100000):
items = []
with open(self.FILENAME, 'r', newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
price = int(row['price'])
if price >= price_from and price <= price_to:
items.append(row)
return items
def insert(self, conn, link, price, description):
with open(self.FILENAME, 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([link, price, description])
# data_client = PostgresClient()
# data_client = Sqlite3Client()
data_client = CsvClient()
data_client.run_test()