Программ в рандомный момент может остановится без ошибки на вечно
import requests
from bs4 import BeautifulSoup
import lxml
import time
import json
import sqlite3
from fake_useragent import UserAgent
ua = UserAgent(browsers=['chrome'])
def download_img(url,data_id):
try:
pyt_do_file = f"C:/Users/ACER/Desktop/diplom/IMG_BASE/filee_IMG_{data_id}.jpg"
response = requests.get(url=url)
with open(pyt_do_file, 'wb') as file:
file.write(response.content)
return pyt_do_file
except Exception as _ex:
print(_ex)
return 'the file is not installed .'
def download_file(data_id):
#'https://mcbuild.org/schematics/14495:amphitheatre-arena',
try:
cookies = {
'_ga': 'GA1.1.181025297.1713956099',
'_pk_id.78.90ce': 'eff7a471c03e4887.1713956100.',
'PHPSESSID': 'd04933f9eabea1b76bfd1247fb31f270',
'_pk_ses.78.90ce': '1',
'_ga_PT57S4LEXC': 'GS1.1.1714573040.18.1.1714573303.0.0.0',
'FCNEC': '%5B%5B%22AKsRol-5DouioWBi4EHE9XuU6m0GRGrn5Fp7cpBM23AMwbaFWwhvpE1hd7PFOtzKhW8mZ190hL75xunqYeTtZJsubx9tCxP2ej_Q-LpH5V9hFLggtgLVDq3560la15Iy3rFbxA8Au2MyEoj08ok7UbpjpxAy8dXoGA%3D%3D%22%5D%5D',
'__gads': 'ID=2c17f14da703eb96:T=1713956101:RT=1714573351:S=ALNI_MYWB4XIXp5dIiU4sWQigy7hWVbIWg',
'__gpi': 'UID=00000dfdb98c2625:T=1713956101:RT=1714573351:S=ALNI_Mb0ZgMl2axV8rOhTX1js0iV5JdCVg',
'__eoi': 'ID=2aa11ed4288f8e9b:T=1713956101:RT=1714573351:S=AA-AfjbfMART8WjkvaR2nKyUDjbu',
}
headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
'cache-control': 'max-age=0',
# 'cookie': '_ga=GA1.1.181025297.1713956099; _pk_id.78.90ce=eff7a471c03e4887.1713956100.; PHPSESSID=d04933f9eabea1b76bfd1247fb31f270; _pk_ses.78.90ce=1; _ga_PT57S4LEXC=GS1.1.1714573040.18.1.1714573303.0.0.0; FCNEC=%5B%5B%22AKsRol-5DouioWBi4EHE9XuU6m0GRGrn5Fp7cpBM23AMwbaFWwhvpE1hd7PFOtzKhW8mZ190hL75xunqYeTtZJsubx9tCxP2ej_Q-LpH5V9hFLggtgLVDq3560la15Iy3rFbxA8Au2MyEoj08ok7UbpjpxAy8dXoGA%3D%3D%22%5D%5D; __gads=ID=2c17f14da703eb96:T=1713956101:RT=1714573351:S=ALNI_MYWB4XIXp5dIiU4sWQigy7hWVbIWg; __gpi=UID=00000dfdb98c2625:T=1713956101:RT=1714573351:S=ALNI_Mb0ZgMl2axV8rOhTX1js0iV5JdCVg; __eoi=ID=2aa11ed4288f8e9b:T=1713956101:RT=1714573351:S=AA-AfjbfMART8WjkvaR2nKyUDjbu',
'priority': 'u=0, i',
'referer': 'https://mcbuild.org/schematics/',
'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': f'{ua.random}',
}
response = requests.get(f'https://mcbuild.org/download/schematic={data_id}',timeout=30, cookies=cookies, headers=headers)
soup = BeautifulSoup (response.text,'lxml')
file_url = str(soup.find_all('script')[3]).split("'")[1]
response = requests.get(f"https://mcbuild.org{file_url})",timeout=30, headers={'Referer': f'https://mcbuild.org/download/schematic={data_id}'})
pyt_do_file = f'C:/Users/ACER/Desktop/diplom/shema_base/filee_shem_{data_id}.schematic'
with open(pyt_do_file, 'wb') as file:
file.write(response.content)
return pyt_do_file
except Exception as _ex:
print(_ex)
return 'the file is not installed .'
def Sqlite_base_loading(name,soup,pytt_file_imgg,pytt_file_schem,PAGE,connection):
cursor = connection.cursor()
obgect = soup.find('div',class_='col-md-4 col-6 mb-4 pb-2')
tip = obgect.find('div',class_='w-100 mt-auto').find('p',class_='fs-7 card-text text-white').text
tip = tip.replace('Flying','')
tip = tip.replace(' ','')
if PAGE == 0 :
cursor.execute(f'''
CREATE TABLE IF NOT EXISTS {tip} (
id INTEGER PRIMARY KEY,
name TEXT NOT NULL,
pytt_imgg TEXT NOT NULL,
pytt_schem TEXT NOT NULL
)
''')
connection.commit()
cursor.execute(f'INSERT INTO {tip} (name,pytt_imgg,pytt_schem) VALUES (?, ?, ?)', (name,pytt_file_imgg,pytt_file_schem))
connection.commit()
def click_more() :
#response = requests.get('https://mcbuild.org/schematics/')
#soup = BeautifulSoup (response.text,'lxml')
#kol_tipov = len(soup.find('ul',class_='nav nav-pills mb-4 justify-content-start fs-7').find_all('li',class_='nav-item me-2 mb-2'))
kol_tipov = 20
Id = 1
PAGE = 0
n = 0
liust = []
connection = sqlite3.connect('C:/Users/ACER/Desktop/diplom/postrouki_data_base_pars.db')
while True :
if Id == 13:
Id += 1
cookies = {
'_ga': 'GA1.1.181025297.1713956099',
'_pk_id.78.90ce': 'eff7a471c03e4887.1713956100.',
'PHPSESSID': 'd04933f9eabea1b76bfd1247fb31f270',
'_pk_ses.78.90ce': '1',
'__gads': 'ID=2c17f14da703eb96:T=1713956101:RT=1714238239:S=ALNI_MYWB4XIXp5dIiU4sWQigy7hWVbIWg',
'__gpi': 'UID=00000dfdb98c2625:T=1713956101:RT=1714238239:S=ALNI_Mb0ZgMl2axV8rOhTX1js0iV5JdCVg',
'__eoi': 'ID=2aa11ed4288f8e9b:T=1713956101:RT=1714238239:S=AA-AfjbfMART8WjkvaR2nKyUDjbu',
'_ga_PT57S4LEXC': 'GS1.1.1714238238.9.1.1714238300.0.0.0',
'FCNEC': '%5B%5B%22AKsRol_Mi2U-YgkZs3vyrMk5UsRTTEeNRx1v6LGOqAzWJRRi-4EoINMNxh6WnFCg6h6J3UNqDfvKqonh5l4delYN0gjP8g1Tk3PyYZ70UczjfkeoOyR9gvIj0whYCDNBGxtzX4p6z-1SuAjm7QkmD0eJBNjRV9vCVw%3D%3D%22%5D%5D',
}
headers = {
'accept': '*/*',
'accept-language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
# 'cookie': '_ga=GA1.1.181025297.1713956099; _pk_id.78.90ce=eff7a471c03e4887.1713956100.; PHPSESSID=d04933f9eabea1b76bfd1247fb31f270; _pk_ses.78.90ce=1; __gads=ID=2c17f14da703eb96:T=1713956101:RT=1714238239:S=ALNI_MYWB4XIXp5dIiU4sWQigy7hWVbIWg; __gpi=UID=00000dfdb98c2625:T=1713956101:RT=1714238239:S=ALNI_Mb0ZgMl2axV8rOhTX1js0iV5JdCVg; __eoi=ID=2aa11ed4288f8e9b:T=1713956101:RT=1714238239:S=AA-AfjbfMART8WjkvaR2nKyUDjbu; _ga_PT57S4LEXC=GS1.1.1714238238.9.1.1714238300.0.0.0; FCNEC=%5B%5B%22AKsRol_Mi2U-YgkZs3vyrMk5UsRTTEeNRx1v6LGOqAzWJRRi-4EoINMNxh6WnFCg6h6J3UNqDfvKqonh5l4delYN0gjP8g1Tk3PyYZ70UczjfkeoOyR9gvIj0whYCDNBGxtzX4p6z-1SuAjm7QkmD0eJBNjRV9vCVw%3D%3D%22%5D%5D',
'origin': 'https://mcbuild.org',
'priority': 'u=1, i',
'referer': 'https://mcbuild.org/schematics/',
'sec-ch-ua': '"Chromium";v="124", "Google Chrome";v="124", "Not-A.Brand";v="99"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-origin',
'user-agent': f'{ua.random}',
'x-requested-with': 'XMLHttpRequest',
}
data = {
'id': f'{Id}',
'page': f'{PAGE}',
}
response = requests.post('https://mcbuild.org/items/', cookies=cookies, headers=headers, data=data)
soup = BeautifulSoup (response.text,'lxml')
obgect_list = soup.find_all('div',class_='col-md-4 col-6 mb-4 pb-2')
if kol_tipov + 1 == Id :
connection.close()
break
if not obgect_list :
Id += 1
PAGE = 0
#print(Id)
continue
print(Id)
for i in obgect_list :
img = i.find('img',class_='card-img').get('src')
name = i.find('div',class_='w-100 mt-auto').find('h5',class_='fs-6 card-title text-white').text
url_img = f'https://mcbuild.org{img}'
data_id = i.find('div').get('data-id')
pytt_file_imgg = download_img(url_img,data_id)
pytt_file_schem = download_file(data_id)
print(f'{n} {name} {pytt_file_imgg} {pytt_file_schem}')
Sqlite_base_loading(name,soup,pytt_file_imgg,pytt_file_schem,PAGE,connection)
n += 1
PAGE +=1
click_more()
Просто парсер парсит названия, тип построек, ссылки изоброжений и файлов со схемами и с помощью функций выше скачивает фото и схемы а потом функции возвращают путь до этих файлов и это все записывается в бд , но не могу понять почему программа в рандомный момент может просто остановится на вечно без ошибок .