Нету класса у блока span
из за того что на некоторых отелях нету оценки, выдает ошибку что у блока нету атрибута текст. Как можно обойти это
код:
import requests
import lxml
from bs4 import BeautifulSoup
import csv
def get_data(url):
headers = {
'User Agent': 'Mozilla / 5.0(WindowsNT10.0; Win64;x64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 110.0.0.0 Safari / 537.36'
}
req = requests.get(url=url)
#with open('index.html', 'w', encoding='utf8') as file:
#file.write(req.text)
soup = BeautifulSoup(req.text, 'lxml')
hotel_cards = soup.find_all('li', class_='item')
#print(hotel_cards)
for hotel_url in hotel_cards:
hotel_url = 'https://101hotels.com' + hotel_url.find('a').get('href')
#print(hotel_url)
for title_hotels in hotel_cards:
title_hotels = title_hotels.find('a').text
#print(title_hotels)
for hotel_price_result in hotel_cards:
hotel_price = hotel_price_result.find('span', class_='price-highlight').text.strip()
hotel_price2 = hotel_price_result.find('span', class_='currency').text.strip()
price_with_currency = f'{hotel_price} {hotel_price2}'
#print(price_with_currency)
for hotel_rating in hotel_cards:
hotel_rating = hotel_rating.find('span', {"itemprop": "ratingValue"}).text.strip()
print(hotel_rating)
def main():
get_data('https://101hotels.com/main/cities/yuzhno-sakhalinsk')
if __name__ == '__main__':
main()
Ответы (1 шт):
Автор решения: arnold
→ Ссылка
Можно исправить следующим образом:
import requests
import lxml
from bs4 import BeautifulSoup
import csv
def get_data(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'
}
req = requests.get(url=url, headers=headers)
soup = BeautifulSoup(req.text, 'lxml')
hotel_cards = soup.find_all('li', class_='item')
hotels = []
for hotel_card in hotel_cards:
hotel_url = 'https://101hotels.com' + hotel_card.find('a').get('href')
title_hotel = hotel_card.find('a').text
hotel_price_result = hotel_card.find('span', class_='price-highlight')
hotel_price = hotel_price_result.text.strip() if hotel_price_result else ''
hotel_price_currency = hotel_card.find('span', class_='currency')
hotel_price_currency = hotel_price_currency.text.strip() if hotel_price_currency else ''
hotel_price_with_currency = f'{hotel_price} {hotel_price_currency}'
hotel_rating_result = hotel_card.find('span', {'itemprop': 'ratingValue'})
hotel_rating = hotel_rating_result.text.strip() if hotel_rating_result else ''
hotels.append({
'url': hotel_url,
'title': title_hotel,
'price': hotel_price_with_currency,
'rating': hotel_rating
})
return hotels
def main():
hotels = get_data('https://101hotels.com/main/cities/yuzhno-sakhalinsk')
with open('hotels.csv', 'w', encoding='utf8', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Title', 'URL', 'Price', 'Rating'])
for hotel in hotels:
writer.writerow([hotel['title'], hotel['url'], hotel['price'], hotel['rating']])
if __name__ == '__main__':
main()