При записи данных в файл TXT в Python, данные не записываются. Не подскажете почему?

Question

import requests
from bs4 import BeautifulSoup


url = 'https://etp-ets.ru/organization/catalog/customer?page=2&limit=25'
data = requests.get(url).text
block = BeautifulSoup(data, 'lxml')
heads = block.find('tbody').find_all('tr')
# print(len(heads))
for i in heads:
    get_url = i.find_next('a').get('href')
    # print('https://etp-ets.ru'+get_url)
    my_url = ('https://etp-ets.ru' + get_url)
    gook = requests.get(my_url).text
    hdgef = BeautifulSoup(gook, 'lxml')
    stock = hdgef.find_all('div', class_='form-group')
    # print(len(stock))
    for w in stock:
        print(w.text.strip())
        sumn = (w.text.strip())
    print('\n')

    with open('seoll.txt', 'a+', encoding='utf-8') as f:
        f.write(sumn)

Answer 1

Я бы на вашем месте поместил запись файла в конец итерации в цикле + добавил пробелы после каждой записи для чтения. Возможно, вы этого результата добиваетесь.

import requests
from bs4 import BeautifulSoup


url = 'https://etp-ets.ru/organization/catalog/customer?page=2&limit=25'
data = requests.get(url).text
block = BeautifulSoup(data, 'lxml')
heads = block.find('tbody').find_all('tr')
# print(len(heads))
for i in heads:
    get_url = i.find_next('a').get('href')
    # print('https://etp-ets.ru'+get_url)
    my_url = ('https://etp-ets.ru' + get_url)
    gook = requests.get(my_url).text
    hdgef = BeautifulSoup(gook, 'lxml')
    stock = hdgef.find_all('div', class_='form-group')
    # print(len(stock))

    # Открываем файл на запись
    f = open('seoll.txt', 'a+', encoding='utf-8')
    for w in stock:
        print(w.text.strip())
        sumn = (w.text.strip())
        # В конце итерации записываем
        f.write(sumn + '\n\n')

    # Выходим из цикла и закрываем файл
    f.close()
    print('\n')

Answer 2

Хочу порекомендовать немного другую концепцию.

Не нужно записывать каждую строку в файл. Нужно сначала собрать все строки и уже потом записать их все в файл. Ниже полностью готовый парсер для одной страницы с записью данных в корректный csv открывающийся в Excel:

import csv

from bs4 import BeautifulSoup as Soup
from fake_useragent import UserAgent
from requests import Session
from tqdm.auto import tqdm
from unicodedata import normalize

ua = UserAgent()

s = Session()
s.headers.update(
    {
        'User-Agent': ua.random
    }
)

base_url = 'https://etp-ets.ru'

response = s.get(
    base_url + '/organization/catalog/customer',
    params={
        'page': 2,
        'limit': 25
    }
)

soup = Soup(response.content, 'html.parser')

tab_headers = list()


def info_parser(url_part):
    page_response = s.get(base_url + url_part)
    page_soup = Soup(page_response.content, 'html.parser')
    customer_info = page_soup.find('fieldset', {'id': 'CustomerInfo-element'})

    for elem in customer_info.find_all('div', {'class': 'form-group'}):
        label = ''
        if label_tag := elem.find('label', {'class': 'control-label'}):
            label = label_tag.get_text(strip=True)
            if label not in tab_headers:
                tab_headers.append(label)

            label_tag.replace_with('')

        if label_tag:
            yield normalize('NFKC', label), normalize('NFKC', elem.get_text(strip=True))


lines = []

for item in tqdm(
        soup.find_all(
            'tr',
            class_=['odd', 'even']
        )
):
    if (a := item.find('a')) and (link := a.get('href')):
        lines.append(
            dict(
                info_parser(
                    link
                )
            )
        )

with open('result.csv', 'w', encoding='utf-8-sig', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=tab_headers, dialect=csv.unix_dialect)
    writer.writeheader()
    writer.writerows(lines)

БЛОГ НА HUSL

При записи данных в файл TXT в Python, данные не записываются. Не подскажете почему?

Ответы (2 шт):

Хочу порекомендовать немного другую концепцию.