есть скрипт который отправляет запрос на сайт, получает ответ, если добавились новые элементы на сайте, то начинает их искать перебором в цикле

Смотрите, сайт обновляется в рандомное время. Скрипт постоянно ищет новые элементы и выводит их в консоль, но при по индексном сравнении что-то сбивается и выводит уже старые элементы. Сверять именно так нужно для производительности. https://www.nl.go.kr/seoji/

-*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import StaleElementReferenceException
from googletrans import Translator
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from seleniumrequests import Firefox
from seleniumrequests.request import RequestMixin
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.ui import Select
import sys
from datetime import datetime
import gc
#import datetime
import time
import numpy
import pytz
from pytz import all_timezones
from pytz import country_timezones
import urllib.request
import os
import numpy as np
import pickle
import traceback
import urllib.request
import urllib
import re
import requests
import lxml
import cchardet
import requests
import aiohttp
import asyncio


titles = []
titles2 = []


cookies = {
    'PCID': '77a75160-93ed-e108-b921-7852f64f2511-1634605438412',
    '_ga': 'GA1.3.1244550291.1634605439',
    '_INSIGHT_CK_1101': 'e0e2860fab3ab16ca978df95f6a3dc4c_14698|a8749426e3d8f0df71f0df95f6a3dc4c_14698:1644217287000',
    'WMONID': 'NJ1rbZbMcQG',
    'JSESSIONID': '"ZqCHwLVq080CsBs7lW8kMFTNnlpwLeAZyon8yo3E.NLSEOJIWAS1:nl_seoji_1"',
    'JSESSIONID_NL_USER': '"EK6AyZukS0G1MuJ4T2T3W9PUOCI_X8OLrFRjub2r.NLUWAS1:nl_main_1"',
}

headers = {
    'Connection': 'keep-alive',
    'sec-ch-ua': '";Not A Brand";v="99", "Chromium";v="94"',
    'Accept': '*/*',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'X-Requested-With': 'XMLHttpRequest',
    'sec-ch-ua-mobile': '?0',
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36',
    'sec-ch-ua-platform': '"Linux"',
    'Origin': 'https://nl.go.kr',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Dest': 'empty',
    'Referer': 'https://nl.go.kr/seoji/contents/S80100000000.do',
    'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
    # Requests sorts cookies= alphabetically
    # 'Cookie': 'PCID=77a75160-93ed-e108-b921-7852f64f2511-1634605438412; _ga=GA1.3.1244550291.1634605439; _INSIGHT_CK_1101=e0e2860fab3ab16ca978df95f6a3dc4c_14698|a8749426e3d8f0df71f0df95f6a3dc4c_14698:1644217287000; WMONID=NJ1rbZbMcQG; JSESSIONID="ZqCHwLVq080CsBs7lW8kMFTNnlpwLeAZyon8yo3E.NLSEOJIWAS1:nl_seoji_1"; JSESSIONID_NL_USER="EK6AyZukS0G1MuJ4T2T3W9PUOCI_X8OLrFRjub2r.NLUWAS1:nl_main_1"',
}

data = {
    'searchUrl': 'search?select=cip_id,rec_key,cip_key,form,set_expression,subject,series_no,ea_isbn,ea_add_code,ebook_yn,bib_yn,set_isbn,set_add_code,title,vol,author,publisher,series_title,edition_stmt,pre_price,publish_year,publish_predate,input_date,update_date,book_size,page,deposit_yn,real_publish_date,real_price,publisher_key,import_date,changed_date,title_url,kdc,ddc,publisher_url,book_introduction_url,book_summary_url,book_tb_cnt_url,control_no,cip_yn,index_series_title,index_title,index_author,index_publisher,related_isbn,form_detail,form_detail_version,kolis_control_no,kolis_img_path,book_introduction,book_tb_cnt,book_summary&from=cip.cip&where=text_idx%3D%22%EC%97%B0%EC%9E%AC%22%20allword%20and%20ebook_yn%3D%22Y%22%20and%20subject%3D%226%22%20order%20by%20publish_predate%20desc&offset=0&limit=100',
}

def Get_Start_Info():
    SERVER = int(input("Номер сервера: "))

    START_TIME_H = int(input("Через сколько часов: "))
    START_TIME_H = START_TIME_H * 3600

    START_TIME_M = int(input("Через сколько минут: "))
    START_TIME_M = START_TIME_M * 60

    START_SERVICES = int(input("Сеоджи(1)"))

    START_TIME_END = START_TIME_H + START_TIME_M

    return SERVER, START_TIME_END, START_SERVICES




bull = 0
h1 = 0
pop = 0
ooo = 0
response1 = requests.post('https://www.nl.go.kr/seoji/module/S80100000000_intgr_select_search_engine_data.ajax', headers=headers, cookies=cookies, data=data)
paux1 = np.array(response1.json()['result']['rows'])
sum_titles = response1.json()['result']['total_count']
deleter = 0
indexes = []

SIM = 0


async def Checker_num(srv, all_checker, checker):
    global inf_tab_1
    global inf_tab_2
    global inf_tab_3
    global cur_tab
    global ooo
    global h1
    global SIM
    global paux1
    global sum_titles
    global pop
    global bull
    global summa_title
    global titles
    global response1
    global deleter

    if checker == sum_titles: # Одинаковое ли кол-во элементов
        deleter = deleter + 1
        del all_checker
        del checker

        if deleter == 100: # чтобы память не засоряло
            gc.collect()
            deleter = 0
        pass

    else: # ТУТ ОШИБКА
        paux2 = np.array(all_checker['result']['rows']) # перевод в np список для скорости
        bull = 0 
        
        for i in range(99):           
            try: #  (주)에브리웨이
                if paux1[i - bull]['fields']['title'] != paux2[i]['fields']['title']:
                    bull = bull + 1
                    print(paux2[i]['fields']['title'], i)
                
            except:
                gc.collect()
                pass

        h1 = 0
        paux1 = paux2
        bull = 0
        summa_title = 0
        sum_titles = requests.post('https://www.nl.go.kr/seoji/module/S80100000000_intgr_select_search_engine_data.ajax', headers=headers, cookies=cookies, data=data)
        sum_titles = sum_titles.json()['result']['total_count'] # сохраняет новое кол-во элементов
        gc.collect()

        #print("Скорее Всего Анонсов - ", l)
        #print("Всего новых - ", summa_title)

        
        


async def get_pokemon(srv, session, url, headers, cookies, data): # отправляет кол-во элементов на проверку в Checker_num
    async with session.post(url, headers=headers, cookies=cookies, data=data) as resp:
        pokemon = await resp.json()
        asyncio.ensure_future(Checker_num(srv ,pokemon ,pokemon['result']['total_count']))
        return pokemon['result']['total_count']





async def main(srv): # делает запрос каждые 0.2 секуды и передаёт в get_pokemon
    global sum_titles   
    async with aiohttp.ClientSession() as session:
        tasks = []
        while True:
            url = 'https://www.nl.go.kr/seoji/module/S80100000000_intgr_select_search_engine_data.ajax'
            tasks.append(asyncio.ensure_future(get_pokemon(srv, session, url, headers=headers, cookies=cookies, data=data)))
            await asyncio.sleep(0.2)

        original_pokemon = await asyncio.gather(*tasks)



try:
    GLOBAL_INFO = Get_Start_Info()
    SERVER_GLOBAL = GLOBAL_INFO[0]
    TIME_START = GLOBAL_INFO[1]
    #print("Сервер -",SERVER_GLOBAL, "| Через", TIME_START, "секунд")
    asyncio.run(main(SERVER_GLOBAL))



except:
    print('Ошибка:\n', traceback.format_exc())
finally:
    driver.quit()

Ответы (0 шт):