Как собрать данные с сайтов за самый короткий промежуток времени
Толком в интернете не нашёл никакого способа быстро собрать информацию c сайтов. У меня есть 20000 url и надо за самый короткий промежуток времени собрать цены с сайта. Выходит, что через requests выходит на 100 страниц 240 секунд, grequests 190 секунд. Можно ли как-то укоротить это время раз так-то в 10. counts=176321160,176288467,175966708,176042493...
import time
import grequests
import json
import re
import openpyxl
import datetime
import random
urls=[]
xlsx = openpyxl.reader.excel.load_workbook(filename='list8.0.xlsx', data_only=True)
sheet = xlsx.active
num, item2= 2, 2
i=1
try:
for main in range(102,2102,100):
t1me=time.time()
urls.clear()
for url in range(main-100,main):
counts=sheet['G'+str(item2)].value
urls.append(f"https://steamcommunity.com/market/itemordershistogram?country=RU&language=russian¤cy=5&item_nameid={counts}&two_factor=0")
item2 +=1
responce=[grequests.get(u) for u in urls]
maps=grequests.map(responce)
print(round(time.time()-t1me,2),maps)
for count in maps:
t0=time.time()
js = json.loads(count.text)
order_price = quantily = normal_price = margin = difference = None
if 'buy_order_graph' in js:
order_price = js.get('buy_order_graph')[0][0]
if 'sell_order_summary' in js:
quantily = int(str(re.findall("\d+", js.get('sell_order_summary'))[0]).replace("{'", '').replace("'}", ''))
if 'sell_order_graph' in js:
normal_price = float(js.get('sell_order_graph')[0][0])
if order_price != None or normal_price != None:
margin = str(round(((normal_price * 0.87 / order_price) - 1) * 100, 2))
difference = str(round((normal_price) * 0.87 - order_price, 2))
i+=1
sheet['C' + str(num)] = str(normal_price)
sheet['D' + str(num)] = str(order_price)
sheet['E' + str(num)] = margin
sheet['F' + str(num)] = difference
sheet['H' + str(num)] = quantily
sheet['I' + str(num)]= datetime.datetime.today().strftime("%d/%m/%Y")
xlsx.save('list8.0.xlsx')
num+=1
print(f'{i - 1}->{i} -- {order_price} -- {normal_price} -- {quantily} -- {margin} -- {difference} -- {round(time.time() - t0, 2)}')
except KeyboardInterrupt as Key:
print('Keyboardinterrupt')
xlsx.save('list8.0.xlsx')
xlsx.close()
finally:
xlsx.save('list8.0.xlsx')
xlsx.close()
import openpyxl
import requests
import json
import re
import time
import datetime
xlsx = openpyxl.reader.excel.load_workbook(filename='list8.0.xlsx',data_only=True)
sheet = xlsx.active
try:
i = 1
for Item in range(2,102):
t0 = time.time()
sheet = xlsx.active
url = 'https://steamcommunity.com/market/itemordershistogram?country=RU&language=russian¤cy=5&item_nameid=' + str(sheet["G"+str(Item)].value) + '&two_factor=1'
r=requests.get(url)
if r.status_code==200:
js=json.loads(r.text)
order_price = quantily = normal_price = margin = difference = None
if 'buy_order_graph' in js:
order_price = js.get('buy_order_graph')[0][0]
if 'sell_order_summary' in js:
quantily = int(str(re.findall("\d+", js.get('sell_order_summary'))[0]).replace("{'", '').replace("'}", ''))
if 'sell_order_graph' in js:
normal_price = float(js.get('sell_order_graph')[0][0])
if order_price != None or normal_price != None:
margin = str(round(((normal_price * 0.87 / order_price) - 1)*100,2)) + '%'
difference = str(round((normal_price) * 0.87 - order_price, 2)) + '₽'
i+=1
sheet['C'+str(Item)]=str(normal_price) + str('₽')
sheet['D'+str(Item)]=str(order_price) + str('₽')
sheet['E'+str(Item)]=margin
sheet['F'+str(Item)]=difference
sheet['H' + str(Item)] = quantily
sheet['I' + str(Item)]= datetime.datetime.today().strftime("%m/%d/%Y")
xlsx.save('list8.0.xlsx')
print(f'{i - 1}->{i}={round(time.time() - t0,2)} -- {order_price} -- {normal_price} -- {quantily} -- {margin} -- {difference}')
else:
print('!!!ERROR!!!')
time.sleep(3)
xlsx.save('')
except KeyboardInterrupt:
xlsx.save('list8.0.xlsx')
xlsx.close()
finally:
xlsx.save('list8.0.xlsx')
xlsx.close()