Ввод url для парсинга
from PyQt5.Qt import *
from pickle import TRUE
import requests
from bs4 import BeautifulSoup
import json
class ThreadM(QThread):
stepChanged = pyqtSignal(int, int)
finished = pyqtSignal(list)
error = pyqtSignal(str)
def __init__(self, url, file, HEADERS):
super().__init__()
self.url = url
self.file = file
self.HEADERS = HEADERS
def run(self):
self.parseM()
def parseM(self):
html = self.get_html()
if not html:
if html != False:
self.error.emit(
f'Error: status_code={html.status_code}'
)
return
if html.status_code == 200:
products = []
pages_count = self.get_pages_count(html.text)
for page in range(1, pages_count + 1):
self.stepChanged.emit(page, pages_count)
html = self.get_html(params={'page': page})
products.extend(self.get_content(html.text))
self.msleep(50)
self.finished.emit(products)
else:
self.error.emit(f'Error: status_code={html.status_code}')
def get_html(self, params=None):
try:
r = requests.get(self.url, headers=self.HEADERS, params=params)
return r
except:
self.error.emit(f'Error: Что-то пошло не так.')
return False
def get_pages_count(self, html):
soup = BeautifulSoup(html, 'html.parser')
pagination = soup.select('span.block')
if pagination:
return int(pagination[-1].get_text().replace('\n', ''))
else:
return 1
def get_content(self, html):
rl = requests.get(self.url)
data = []
for j in rl.json()['data']['items']:
title = j['title']
ids = j['id']
data.append({
'product_ids': ids,
'title': title
})
data = sorted(data, key=lambda x: x['product_ids'])
res = {'product_ids': ','.join(str(i.get('product_ids')) for i in data)}
rs = requests.post('https://www.mechta.kz/api/new/mindbox/actions/catalog', data=res).json()['data']
data2 = []
for item, k in rs.items():
price = k['prices']['discounted_price']
old_price = k['prices']['base_price']
if old_price == price:
old_price = 'Скидки нет'
data2.append({
'price': price,
'old price': old_price
})
return [{**x, **y} for x, y in zip(data, data2)]
Имеется такой код, нужно чтобы когда я вводил такой url ( https://www.mechta.kz/section/smartfony/ ), то он у меня парсил эту страницу ( https://www.mechta.kz/api/new/catalog?properties=&page=2§ion=smartfony ) и чтобы проходился по всем имеющимся страницам в нем, как это можно реальзовать?
При таком коде, когда я пытаюсь вводить первый url, то он мне выдает весь json
import sys
import requests
from selenium import webdriver
import csv
import tkinter as tk
from tkinter import ttk
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.Qt import *
from Сайты.Sulpak import ThreadS
from Сайты.Mechta import ThreadM
from pickle import TRUE
class Ui_Form(object):
def setupUi(self, Form):
Form.setObjectName("Form")
Form.resize(700, 700)
font = QtGui.QFont()
font.setPointSize(1)
Form.setFont(font)
Form.setLayoutDirection(QtCore.Qt.LeftToRight)
Form.setStyleSheet("background: rgb(112, 112, 112);")
self.pushButton = QtWidgets.QPushButton(Form)
self.pushButton.setGeometry(QtCore.QRect(200, 400, 300, 61))
font = QtGui.QFont()
font.setPointSize(1)
font.setBold(True)
font.setWeight(75)
self.pushButton.setFont(font)
self.pushButton.setStyleSheet(" QPushButton{\n"
"\n"
"background: rgb(61,181,233);\n"
" height: 50px;\n"
" border-radius: 10px;\n"
" text-align: center;\n"
" font-size: 20px;\n"
" font-weight: bold;\n"
"}\n"
"\n"
"QPushButton:hover{\n"
"background: rgb(52, 148, 189)\n"
"}")
self.pushButton.setObjectName("pushButton")
self.lineEdit = QtWidgets.QLineEdit(Form)
self.lineEdit.setGeometry(QtCore.QRect(180, 320, 340, 50))
self.lineEdit.setStyleSheet(" QLineEdit{\n"
" border-radius: 10px;\n"
" font-size: 20px;\n"
" font-weight: bold;\n"
" background: white;\n"
"}\n"
"\n"
" QLineEdit:hover {\n"
" border: 3px solid rgb(61,181,233);\n"
" }")
self.lineEdit.setInputMask("")
self.lineEdit.setAlignment(QtCore.Qt.AlignCenter)
self.lineEdit.setObjectName("lineEdit")
self.lineEdit_2 = QtWidgets.QLineEdit(Form)
self.lineEdit_2.setGeometry(QtCore.QRect(180, 240, 340, 50))
self.lineEdit_2.setStyleSheet(" QLineEdit{\n"
" border-radius: 10px;\n"
" font-size: 20px;\n"
" font-weight: bold;\n"
" background: white;\n"
"}\n"
"\n"
" QLineEdit:hover {\n"
" border: 3px solid rgb(61,181,233);\n"
" }")
self.lineEdit_2.setInputMask("")
self.lineEdit_2.setAlignment(QtCore.Qt.AlignCenter)
self.lineEdit_2.setObjectName("lineEdit_2")
self.comboBox = QtWidgets.QComboBox(Form)
self.comboBox.setGeometry(QtCore.QRect(175, 130, 350, 60))
font = QtGui.QFont()
font.setPointSize(1)
font.setBold(True)
font.setWeight(75)
self.comboBox.setFont(font)
self.comboBox.setMouseTracking(False)
self.comboBox.setLayoutDirection(QtCore.Qt.LeftToRight)
self.comboBox.setAutoFillBackground(False)
self.comboBox.setStyleSheet("QComboBox{\n"
" border-radius: 30px;\n"
" padding-left: 140px;\n"
" background:rgb(56, 56, 56);\n"
" border-bottom: 5px solid rgb(89, 133, 255);\n"
" font-size: 20px;\n"
" color: #fff;\n"
" font-weight: bold;\n"
"}\n"
"\n"
"QComboBox QAbstractItemView {\n"
" text-align: center;\n"
" border-radius: 20px;\n"
" background-color:rgb(56, 56, 56);\n"
" color: white;\n"
"}\n"
"\n"
"QComboBox::drop-down {\n"
" width: 25px;\n"
" height: 25px;\n"
" top: 15px;\n"
" right: 15px;\n"
"}")
self.comboBox.setInputMethodHints(QtCore.Qt.ImhNone)
self.comboBox.setObjectName("comboBox")
self.comboBox.addItem("")
self.comboBox.addItem("")
self.widget = QtWidgets.QTextEdit(Form)
self.widget.setGeometry(QtCore.QRect(150, 499, 400, 151))
font = QtGui.QFont()
font.setPointSize(12)
self.widget.setFont(font)
self.widget.setStyleSheet("background: #fff;\n"
"color: black;")
self.widget.setObjectName("widget")
self.retranslateUi(Form)
QtCore.QMetaObject.connectSlotsByName(Form)
def retranslateUi(self, Form):
_translate = QtCore.QCoreApplication.translate
Form.setWindowTitle(_translate("Form", "Form"))
self.pushButton.setText(_translate("Form", "Начать"))
self.lineEdit.setPlaceholderText(_translate("Form", "Введите URL каталога"))
self.lineEdit_2.setPlaceholderText(_translate("Form", "Введите название файла"))
self.comboBox.setCurrentText(_translate("Form", "Sulpak"))
self.comboBox.setItemText(0, _translate("Form", "Sulpak"))
self.comboBox.setItemText(1, _translate("Form", "Мечта"))
class MainWindow(QtWidgets.QWidget, Ui_Form):
HEADERS = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36',
'accept' : '*/*'
}
def __init__(self):
super().__init__()
self.setupUi(self)
self.pushButton.clicked.connect(self.btn)
self.comboBox.currentTextChanged.connect(self.on_combobox_changed)
self.lineEdit_2.setText('.csv')
self.threads=[]
self.combo_status=0
def on_combobox_changed(self, index):
index = self.comboBox.currentIndex()
if index == 0:
self.combo_status=index
elif index == 1:
self.combo_status=index
def btn(self):
if not self.lineEdit.text() or not self.lineEdit_2.text():
msg = QMessageBox.information(self, 'Внимание', 'Заполните поля ввода.')
return
self.pushButton.setEnabled(False)
self.r = self.lineEdit.text()
self.file = self.lineEdit_2.text()
if self.combo_status == 0:
self.thread = ThreadS(self.r, self.file, self.HEADERS)
elif self.combo_status == 1:
self.thread = ThreadM(self.r, self.file, self.HEADERS)
self.threads.append(self.thread)
self.thread.stepChanged.connect(self.onStepChanged)
self.thread.finished.connect(self.save_file)
self.thread.error.connect(self.error)
self.thread.start()
def error(self, error):
self.widget.append(error)
msg = QMessageBox.information(self, 'Error', error)
self.pushButton.setEnabled(True)
def onStepChanged(self, page, pages_count):
self.widget.append(f'Парсинг страницы {page} из {pages_count}...')
def save_file(self, items):
if self.combo_status == 0:
self.combobox_name = "Спаршенные данные/Sulpak"
elif self.combo_status == 1:
self.combobox_name = "Спаршенные данные/Mechta"
file_s=self.combobox_name +'/'+self.file
with open(file_s, 'w', newline='') as file:
writer = csv.writer(file, delimiter=';')
writer.writerow(['Модель', 'Цена', 'Цена без скидки'])
for item in items:
writer.writerow([item['title'],item['price'], item['old price']])
self.widget.append(f'Получено {len(items)} товаров')
self.pushButton.setEnabled(True)
if __name__ == "__main__":
app = QApplication(sys.argv)
w = MainWindow()
w.show()
sys.exit(app.exec_())
Ответы (1 шт):
Автор решения: Сергей Шашко
→ Ссылка
class ThreadM(QThread):
#....код....
def parseM(self):
#....код....
products = []
section = self.url.split('/')[4]
#....код....
#html = self.get_html(params={'page': page})
products.extend(self.get_content(page, section))
#....код....
def get_content(self, page, section):
#rl = requests.get(self.url)
url = f'https://www.mechta.kz/api/new/catalog?properties=&page={page}§ion={section}'
rl = requests.get(url)
#....код....
