Ревью кода для парсинга статистики

Question

На днях начал изучать bs4. Первой небольшой программой стал парсинг статистики ковида. Сделайте рекомендации или замечания по поводу моего кода, сам не могу судить о качестве парсинга. Сначала я хотел сделать через регулярки, но потом понял, что лучше не стоит). Вот мой код:

from bs4 import BeautifulSoup
from requests import get
from prettytable import PrettyTable
import tkinter as tk

SOURSE = r'https://стопкоронавирус.рф/'

def get_code(sourse):
    html = get(sourse).text
    contents = BeautifulSoup(html, 'html.parser')
    return contents

def get_container(contents):
    return contents.find('div', class_="cv-countdown")

def get_items(container):
    return container.find_all('div', class_="cv-countdown__item")

def remove_br(code):
    return code.replace('<br>', ' ').replace('<br/>', ' ').replace('</br>', ' ')

def to_dict(items):
    
    def key(i):
        i = i.find('div', class_='cv-countdown__item-label')
        for x in i.select('div'):
            x.decompose()
        i = i.get_text().strip()
        return i
    
    def value(i):
        return i.find('div', class_='cv-countdown__item-value').span.get_text()
    
    return {key(i): value(i) for i in items}

def get_stats():
    return to_dict(get_items(get_container(get_code(SOURSE))))

def convert_to_table(dict_):
    table = PrettyTable()
    table.field_names = ['Параметр', 'Значение (чел.)']
    arr = zip(dict_.keys(), dict_.values())
    table.add_rows(arr)
    return table

def show_in_window(data, title='Stats', font_size=15):
    root = tk.Tk()
    root.title(title)
    label = tk.Label(text=data, font=f'Consolas {font_size}')
    label.pack()
    root.mainloop()

stats = get_stats()
table = convert_to_table(stats)
show_in_window(table)
print(table)

UPDATE: обновил код. Теперь элементы ищутся не кучей методов, а по вложенным css селекторам.

from bs4 import BeautifulSoup as bs
from requests import get
from prettytable import PrettyTable
import tkinter as tk

SOURSE = r'https://стопкоронавирус.рф/'
HTML = get(SOURSE).text
SOUP = bs(HTML, 'html.parser')
HTML_PATH = 'html body .cv-countdown .cv-countdown__item'


def get_items():
    items = SOUP.select(HTML_PATH)
    return items


def to_dict(items):
    
    def key(i):
        i = i.find('div', class_='cv-countdown__item-label')
        for x in i.select('div'):
            x.decompose()
        i = i.get_text().strip()
        return i
    
    def value(i):
        return i.find('div', class_='cv-countdown__item-value').span.get_text()
    
    return {key(i): value(i) for i in items}


def get_stats():
    return to_dict(get_items())


def convert_to_table(dict_):
    table = PrettyTable()
    table.field_names = ['Параметр', 'Значение (чел.)']
    arr = zip(dict_.keys(), dict_.values())
    table.add_rows(arr)
    return table


def show_in_window(data, title='Stats', font_size=15):
    root = tk.Tk()
    root.title(title)
    label = tk.Label(text=data, font=f'Consolas {font_size}')
    label.pack()
    root.mainloop()

stats = get_stats()
table = convert_to_table(stats)
show_in_window(table)
print(table)

Answer 1

from bs4 import BeautifulSoup
import requests
from prettytable import PrettyTable
import tkinter as tk

def get_html(url):
    html = requests.get(url).text
    contents = BeautifulSoup(html, 'html.parser')
    return contents

def  get_stats(html):
    stats = html.find_all('div', class_='cv-countdown__item')
    dict_stat = {}
    for stat in stats:
        label = stat.find('div', class_='cv-countdown__item-label').text.strip().split('\n')[0]
        value = stat.find('div', class_='cv-countdown__item-value').text
        dict_stat[label] = value
    return dict_stat

def convert_to_table(dict_):
    table = PrettyTable()
    table.field_names = ['Параметр', 'Значение (чел.)']
    arr = zip(dict_.keys(), dict_.values())
    table.add_rows(arr)
    return table

def show_in_window(data, title='Stats', font_size=15):
    root = tk.Tk()
    root.title(title)
    label = tk.Label(text=data, font=f'Consolas {font_size}')
    label.pack()
    root.mainloop()

url = 'https://стопкоронавирус.рф/'
html = get_html(url)
stats = get_stats(html)
table = convert_to_table(stats)
print(table)
show_in_window(table)

БЛОГ НА HUSL

Ревью кода для парсинга статистики

Ответы (1 шт):