Как поймать данные которые добавляются в файл?
имеется парсер при первом запуске он собирает полный список информации и сохраняет её в json файл. В дальнейшем ставлю на повтор выполнения цикла для обновления информации и добавления разницы в словар.После того как он добавляет информацию отправляю уведомление в телеграм. Но бот отправляет всю информацию из файла, отсюда вытекает вопрос как сделать что бы он отправлял только ту информацию которой ещё не было, а не все блоки? Заранее спасибо за помощь. Код парсера:
soup = bs(page, 'lxml')
applications_cards = soup.find_all(class_=re.compile("OrderSnippetContainerStyles__Container"))
with open ("new_applications.json") as file:
new_applications = json.load(file)
fresh_applications = {}
for application in applications_cards:
try:
application_title = application.find('h3', class_=re.compile("SubjectAndPriceStyles")).text.strip()
except Exception:
application_title = "Нет заголовка"
try:
application_snippet = application.find('p', class_=re.compile("SnippetBodyStyles__MainInfo")).text.strip().replace('\n', ' ')
except Exception:
application_snippet = "Нет описания"
try:
application_location = application.find('span', class_=re.compile("LocationAndScheduleStyles__TextContainer")).text.strip()
except Exception:
application_location = "Адрес не указан"
try:
application_shedule = application.find('span', class_=re.compile("LocationAndScheduleStyles__ScheduleText")).text.strip().replace('\n', ' ')
except Exception:
application_shedule= "Дата не указана"
application_name_client = application.find('span', class_=re.compile("StatusAndClientInfoStyles__Name")).text.strip()
application_link_req = application.find('a', class_=re.compile("SnippetBodyStyles__Container"))
application_link = application_link_req.get('href')
application_id = application_link_req.get('id')
new_applications [application_id] = {
"application_title":application_title,
"desc":application_snippet,
"location":application_location,
"time_application":application_shedule,
"name":application_name_client,
"url":config.URL + application_link,
"application_id":application_id
}
with open ("new_applications.json", "w") as file:
json.dump(new_applications, file, indent =4, ensure_ascii=False)
Код обновления и добавления информации:
page = driver.page_source
soup = bs(page, 'lxml')
applications_cards = soup.find_all(class_=re.compile("OrderSnippetContainerStyles__Container"))
for application in applications_cards:
application_link_req = application.find('a', class_=re.compile("SnippetBodyStyles__Container"))
application_id = application_link_req.get('id')
try:
application_title = application.find('h3', class_=re.compile("SubjectAndPriceStyles")).text.strip()
except Exception:
application_title = "Нет заголовка"
try:
application_snippet = application.find('p', class_=re.compile("SnippetBodyStyles__MainInfo")).text.strip().replace('\n', ' ')
except Exception:
application_snippet = "Нет описания"
try:
application_location = application.find('span', class_=re.compile("LocationAndScheduleStyles__TextContainer")).text.strip()
except Exception:
application_location = "Адрес не указан"
try:
application_shedule = application.find('span', class_=re.compile("LocationAndScheduleStyles__ScheduleText")).text.strip().replace('\n', ' ')
except Exception:
application_shedule= "Дата не указана"
application_name_client = application.find('span', class_=re.compile("StatusAndClientInfoStyles__Name")).text.strip()
application_link = application_link_req.get('href')
task_application = (application_id, application_title, application_snippet,application_location,application_shedule,application_name_client)
if task_application not in new_applications:
new_applications [application_id] = {
"application_title":application_title,
"desc":application_snippet,
"location":application_location,
"time_application":application_shedule,
"name":application_name_client,
"url":config.URL + application_link,
"application_id":application_id
}
with open ("new_applications.json", "w") as file:
json.dump(new_applications, file, indent =4, ensure_ascii=False)
Код отправки уведомления телеграм боту
for k, v in fresh_applications.items():
applications = f"{v['application_title']}\n{v['desc']}\n"
bot.send_message(message.chat.id, applications)