Как сохранить данные парсинга в csv файл, чтобы его можно было читать и записывать?
столкнулся с проблемой у меня есть код который должен сохранять данные полученные с парсера в CSV файл уметь его читать и записывать, но у меня не получется положить результаты парсинга в data: Код парсера parser.py:
import requests
from bs4 import BeautifulSoup
import pandas
import data_client
class Parser:
links_to_parse = [
'https://www.kufar.by/l/mebel',
'https://www.kufar.by/l/mebel?cursor=eyJ0IjoiYWJzIiwiZiI6dHJ1ZSwicCI6MiwicGl0IjoiMjg0NjI5MDcifQ%3D%3D',
'https://www.kufar.by/l/mebel?cursor=eyJ0IjoiYWJzIiwiZiI6dHJ1ZSwicCI6MywicGl0IjoiMjg0NjI5MDcifQ%3D%3D',
'https://www.kufar.by/l/mebel?cursor=eyJ0IjoiYWJzIiwiZiI6dHJ1ZSwicCI6NiwicGl0IjoiMjg0NjI5MDcifQ%3D%3D'
]
data_client_imp = data_client.CsvClient()
@staticmethod
def get_mebel_by_link(link):
response = requests.get(link)
mebel_data = response.text
mebel_items = []
to_parse = BeautifulSoup(mebel_data, 'html.parser')
for elem in to_parse.find_all('a', class_='styles_wrapper__5FoK7'):
try:
price, decription = elem.text.split('р.')
mebel_items.append((
elem['href'],
int(price.replace(' ', '')),
decription
))
except:
print(f'Цена не была указана. {elem.text}')
return mebel_items
def save_to_postgres(self, mebel_items):
connection = self.data_client_imp.get_connection()
self.data_client_imp.create_mebel_table(connection)
for item in mebel_items:
self. data_client_imp.insert(connection, item[0], item[1], item[2])
def run(self):
mebel_items = []
for link in Parser.links_to_parse:
mebel_items.extend(self.get_mebel_by_link(link))
self.save_to_postgres(mebel_items)
Parser().run()
КОД Data_Client
import csv
import sqlite3
from sqlite3 import Error
# pip install psycopg2
import psycopg2
from abc import ABC, abstractmethod
import pandas as pd
import os
from parser import Parser
class DataClient(ABC):
@abstractmethod
def get_connection(self):
pass
@abstractmethod
def create_mebel_table(self, conn):
pass
@abstractmethod
def get_items(self, conn, price_from=0, price_to=100000):
pass
@abstractmethod
def insert(self, conn, link, price, description):
pass
def run_test(self):
data = []
conn = self.get_connection()
self.create_mebel_table(conn)
parsed_data = Parser.get_mebel_by_link()
for item in parsed_data:
data.append({
'link': item[0],
'price': item[1],
'description': item[2]
})
for item in data:
self.insert(conn, item['link'], item['price'], item['description'])
items = self.get_items(conn, price_from=10, price_to=30)
for item in items:
print(item)
class PostgresClient(DataClient):
USER = "postgres"
PASSWORD = "postgres"
HOST = "localhost"
PORT = "5432"
def get_connection(self):
try:
connection = psycopg2.connect(
user=self.USER,
password=self.PASSWORD,
host=self.HOST,
port=self.PORT
)
return connection
except Error:
print(Error)
def create_mebel_table(self, conn):
cursor_object = conn.cursor()
cursor_object.execute(
"""
CREATE TABLE IF NOT EXISTS mebel
(
id serial PRIMARY KEY,
link text,
price integer,
description text
)
"""
)
conn.commit()
def get_items(self, conn, price_from=0, price_to=100000):
cursor = conn.cursor()
cursor.execute(f'SELECT * FROM mebel WHERE price >= {price_from} and price <= {price_to}')
return cursor.fetchall()
def insert(self, conn, link, price, description):
cursor = conn.cursor()
cursor.execute(f"INSERT INTO mebel (link, price, description) VALUES ('{link}', '{price}', '{description}')")
conn.commit()
class Sqlite3Client(DataClient):
DB_NAME = "kufar.db"
def get_connection(self):
try:
conn = sqlite3.connect(self.DB_NAME)
return conn
except Error:
print(Error)
def create_mebel_table(self, conn):
cursor_object = conn.cursor()
cursor_object.execute(
"""
CREATE TABLE IF NOT EXISTS mebel
(
id integer PRIMARY KEY autoincrement,
link text,
price integer,
description text
)
"""
)
conn.commit()
def get_items(self, conn, price_from=0, price_to=100000):
cursor = conn.cursor()
cursor.execute(f'SELECT * FROM mebel WHERE price >= {price_from} and price <= {price_to}')
return cursor.fetchall()
def insert(self, conn, link, price, description):
cursor = conn.cursor()
cursor.execute(f"INSERT INTO mebel (link, price, description) VALUES ('{link}', '{price}', '{description}')")
conn.commit()
class CsvClient(DataClient):
FILENAME = 'mebel.csv'
def get_connection(self):
return None
def create_mebel_table(self, conn):
pass
class CsvClient(DataClient):
FILENAME = 'mebel.csv'
def get_connection(self):
return None
def create_mebel_table(self, conn):
pass
def get_items(self, conn, price_from=0, price_to=100000):
items = []
with open(self.FILENAME, 'r', newline='') as file:
reader = csv.DictReader(file)
for row in reader:
price = int(row['price'])
if price >= price_from and price <= price_to:
items.append(row)
return items
def insert(self, conn, link, price, description):
with open(self.FILENAME, 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow([link, price, description])
data_client = CsvClient()
data_client.run_test()
Пробовал импортировать parser.py и передать в дата функцию get_mebel_by_link(), но выдаёт ошибку и данные парсинга не ложаться в data