При парсинге сайта в тг бота, в чат пишутся странные символы

скрин чата телеграмм бота

import json
import requests
from bs4 import BeautifulSoup as b
import telebot
def get_first_news ():
    headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"}
    url="https://animego.org/anime"
    r=requests.get(url=url, headers=headers)
    soup=b(r.text, "lxml")
    articles_cards=soup.find_all("div", class_="animes-list-item media")
    
    news_dict={}
    
    for article in articles_cards:
        article_title=article.find("div",class_="h5 font-weight-normal mb-1").text.strip()
        article_url = article.select_one('a')['href']
        article_id = article_url.split("-")[-1] 
           
        
        news_dict[article_id]={
            "article_title": article_title,
            "article_url": article_url
        }
    with open("news_dict.json","w") as file:
        json.dump(news_dict, file, indent=4, ensure_ascii=False)

def check_news_update():
    with open("news_dict.json") as file:
        news_dict = json.load(file)

    headers={"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"}
    url="https://animego.org/anime"
    r=requests.get(url=url, headers=headers)
    soup=b(r.text, "lxml")
    articles_cards=soup.find_all("div", class_="animes-list-item media")


    fresh_news = {}
    for article in articles_cards:
        article_url = article.select_one('a')['href']
        article_id = article_url.split("-")[-1]

        if article_id in news_dict:
            continue
        else:
            article_title=article.find("div",class_="h5 font-weight-normal mb-1").text.strip()
            news_dict[article_id]={
                "article_title": article_title,
                "article_url": article_url
            }
            fresh_news[article_id] = {
                "article_title": article_title,
                "article_url": article_url
            }
    with open("news_dict.json", "w") as file:
        json.dump(news_dict, file, indent=4, ensure_ascii=False)

    return fresh_news

def main():
    #get_first_news()
    print(check_news_update())
    
if __name__=='__main__':
    main()

json файл


Ответы (1 шт):

Автор решения: Dmitry Nekrasov

При работе с json файлами, которые содержат кириллицу, старайтесь указывать везде кодировку utf=8. Например, в вашем случае:

with open("news_dict.json","w", encoding='utf-8') as file:
        json.dump(news_dict, file, indent=4, ensure_ascii=False)
→ Ссылка