Traceback (most recent call last):
File "C:\Users\Zahar\PycharmProjects\pythonProject1\Proekt.py", line 51, in <module>
req = requests.get(url= categories_href, headers= headerss)
File "C:\Users\Zahar\PycharmProjects\pythonProject1\venv\lib\site-packages\requests\api.py", line 73, in get
return request("get", url, params=params, **kwargs)
File "C:\Users\Zahar\PycharmProjects\pythonProject1\venv\lib\site-packages\requests\api.py", line 59, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\Zahar\PycharmProjects\pythonProject1\venv\lib\site-packages\requests\sessions.py", line 573, in request
prep = self.prepare_request(req)
File "C:\Users\Zahar\PycharmProjects\pythonProject1\venv\lib\site-packages\requests\sessions.py", line 484, in prepare_request
p.prepare(
File "C:\Users\Zahar\PycharmProjects\pythonProject1\venv\lib\site-packages\requests\models.py", line 368, in prepare
self.prepare_url(url, params)
File "C:\Users\Zahar\PycharmProjects\pythonProject1\venv\lib\site-packages\requests\models.py", line 439, in prepare_url
raise MissingSchema(
requests.exceptions.MissingSchema: Invalid URL '//move.ru/objects/moskva_utochkina_d_8k1_6908574342/': No scheme supplied. Perhaps you meant http:////move.ru/objects/moskva_utochkina_d_8k1_6908574342/?
Process finished with exit code 1
import json
import requests
from bs4 import BeautifulSoup
url = "https://move.ru/arenda_kvartir/"
req = requests.get(url)
src= req.text
TaBL = [" Объявление"," Метраж"," Адресс"," Ссылка"]
headers ={
"Accept" : "*/*",
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.2271 YaBrowser/23.9.0.2271 Yowser/2.5 Safari/537.36"
}
req = requests.get(url)
src= req.text
with open("index.html","w")as file :
file.write(src)
with open("index.html") as file:
src= file.read()
soup = BeautifulSoup(src,"lxml")
all_price= soup.find_all(class_="search-item__price hidden-max-540")
all_name_hat = soup.find_all(class_ ="search-item__title-link search-item__item-link")
all_categories_dict={}
for item in all_name_hat:
item_text= item.text
item_href = item.get("href")
print(f'{item_text}:{item_href}')
all_categories_dict[item_text] = item_href
with open('all_categories_dict.json',"w",encoding='utf-8') as file:
json.dump(all_categories_dict,file,indent=4,ensure_ascii=False)
with open('all_categories_dict.json',encoding='utf-8') as file:
all_categories = json.load(file)
count = 0
for categories_name, categories_href in all_categories.items():
if count == 0 :
rep = ["Сдам","Сдается","Аренда","Сдаю","Сдаем"]
rep2 = ["комнатную","комнатной"]
rep3 = ["студии","квартиры","квартиру","студию"]
for item in rep:
if item in categories_name:
categories_name = categories_name.replace(item," ")
for item in rep2:
if item in categories_name:
categories_name = categories_name.replace(item, "комнатная")
for item in rep3:
if item in categories_name:
categories_name = categories_name.replace(item, "квартира")
req = requests.get(url= categories_href, headers= headers)
src = req.text
with open(f"data/{count}_{categories_name}.html","w") as file:
file.write(src)
count += 1```