Вычислить рабочее время между 2-мя датами
Имеется датафрейм с 4 колонками: Идентификатор задачи, Начальная дата, Конечная дата, часовой пояс.
EJ3232 28.02.2019 15:49 29.02.2019 08:49:38 Etc/GMT-3
EJ3233 01.03.2019 10:49 01.03.2019 15:49 Etc/GMT-4
EJ3234 02.03.2019 7:49 02.03.2019 18:49 Etc/GMT-5
И список выходных\праздничных дней.
'2024': {'1': [1, 2, 3, 4, 5, 6, 7, 8, 13, 14, 20, 21, 27, 28],
'2': [3, 4, 10, 11, 17, 18, 23, 24, 25],
'3': [2, 3, 8, 9, 10, 16, 17, 23, 24, 30, 31],
'4': [6, 7, 13, 14, 20, 21, 28, 29, 30],
'5': [1, 2, 3, 7, 8, 9, 10, 11, 12, 18, 19, 25, 26],
'6': [1, 2, 8, 9, 12, 15, 16, 22, 23, 29, 30],
'7': [6, 7, 13, 14, 20, 21, 27, 28],
'8': [3, 4, 10, 11, 17, 18, 24, 25, 31],
'9': [1, 7, 8, 14, 15, 21, 22, 28, 29],
'10': [5, 6, 12, 13, 19, 20, 26, 27],
'11': [3, 4, 9, 10, 16, 17, 23, 24],
'12': [1, 7, 8, 14, 15, 21, 22, 29, 30, 31]}
Необходима функция которая вычисляла бы рабочее время с учетом выходных\праздничных дней и часового пояса.
Вот функция которую я пытаюсь адаптировать
#функция подсчета продолжительности
def WorkHours(row, name_start, name_end):
start_date = row[name_start]
end_date = row[name_end]
#start_date = row['StartDate']
#start_date = dt.datetime(start_date.year,start_date.month,start_date.day,start_date.hour,start_date.minute,start_date.second)
#end_date = row['EndDate']
#end_date = dt.datetime(end_date.year,end_date.month,end_date.day,end_date.hour,end_date.minute,end_date.second)
#tb = row['TB_CM']
tz = row['CM_TimeZone']
end_hour = row['end_hour']
start_hour = row['start_hour']
DWD = 0
if tz == 'Etc/GMT-10' :
tstart = dt.time(3,0,0,0)
tend = dt.time(11,0,0,0)
elif tz == 'Etc/GMT-9' :
tstart = dt.time(4,0,0,0)
tend = dt.time(12,0,0,0)
elif tz == 'Etc/GMT-8' :
tstart = dt.time(5,0,0,0)
tend = dt.time(13,0,0,0)
elif tz == 'Etc/GMT-7' :
tstart = dt.time(6,0,0,0)
tend = dt.time(14,0,0,0)
elif tz == 'Etc/GMT-6' :
tstart = dt.time(7,0,0,0)
tend = dt.time(15,0,0,0)
elif tz == 'Etc/GMT-5' :
tstart = dt.time(8,0,0,0)
tend = dt.time(16,0,0,0)
elif tz == 'Etc/GMT-4' :
tstart = dt.time(9,0,0,0)
tend = dt.time(17,0,0,0)
elif tz == 'Etc/GMT-3' :
tstart = dt.time(10,0,0,0)
tend = dt.time(18,0,0,0)
elif tz == 'Etc/GMT-2' :
tstart = dt.time(11,0,0,0)
tend = dt.time(19,0,0,0)
elif tz == 'Etc/GMT-11' :
tstart = dt.time(2,0,0,0)
tend = dt.time(10,0,0,0)
elif tz == 'Etc/GMT-12' :
tstart = dt.time(1,0,0,0)
tend = dt.time(9,0,0,0)
else :
tstart = dt.time(10,0,0,0)
tend = dt.time(18,0,0,0)
DWD = (pd.to_timedelta(end_hour) - pd.to_timedelta(start_hour))
WH = pd.to_timedelta('0:00:00')
#начало и конец периода к границам рабочего дня
if start_date.time()<tstart :
start_date = dt.datetime(start_date.year,start_date.month,
start_date.day,tstart.hour,0,0)
if start_date.time()>tend :
start_date=start_date+dt.timedelta(days=1)
start_date = dt.datetime(start_date.year,start_date.month,start_date.day,tstart.hour,0,0)
if end_date.time()<tstart :
end_date=end_date-dt.timedelta(days=1)
end_date = dt.datetime(end_date.year,end_date.month,end_date.day,tend.hour,0,0)
if end_date.time()>tend :
end_date = dt.datetime(end_date.year,end_date.month,end_date.day,tend.hour,0,0)
if start_date>end_date:
WH = pd.to_timedelta('0:00:00')
else:
WH = pd.to_timedelta('0:00:00')
startdelta = pd.to_timedelta(start_hour)
enddelta = pd.to_timedelta(end_hour)
#период укладывается в одни календарные сутки
if start_date.date() == end_date.date():
if len(cal[cal['Дата']==start_date.date()])==0:
if start_date.date().isoweekday() in range(1,6):
WH = WH + DWD - pd.to_timedelta(start_date.strftime('%H:%M:%S') - startdelta) - pd.to_timedelta(enddelta - end_date.strftime('%H:%M:%S'))
elif start_date.date().isoweekday() in range(6,8):
WH = WH
else:
t = cal[cal['Дата']==start_date.date()]['РВ'].iloc[0]
if t ==0:
WH = WH
else:
WH = WH + pd.to_timedelta(t, unit = 'h') - pd.to_timedelta(start_date.strftime('%H:%M:%S')- startdelta) - pd.to_timedelta(enddelta-pd.to_timedelta(1,unit = 'h') - end_date.strftime('%H:%M:%S'))
#период не укладывается в одни календарные сутки
else:
#первый день
tabdate = cal[cal['Дата']==start_date.date()]
if len(tabdate)==0:
if start_date.date().isoweekday() in range(1,6):
WH = WH + DWD - pd.to_timedelta(start_date.strftime('%H:%M:%S')- startdelta)
elif start_date.date().isoweekday() in range(6,8):
WH = WH
else:
t = cal[cal['Дата']==start_date.date()]['РВ'].iloc[0]
WH = WH + max(pd.to_timedelta(t, unit = 'h')
- pd.to_timedelta(start_date.strftime('%H:%M:%S')- startdelta),
pd.to_timedelta(0, unit = 'h'))
#последний день
tabdate = cal[cal['Дата']==end_date.date()]
if len(tabdate)==0:
if end_date.date().isoweekday() in range(1,6):
WH = WH + DWD - pd.to_timedelta(enddelta - end_date.strftime('%H:%M:%S'))
elif start_date.date().isoweekday() in range(6,8):
WH = WH
else:
t = cal[cal['Дата']==end_date.date()]['РВ'].iloc[0]
WH = WH + max(pd.to_timedelta(t, unit = 'h') - pd.to_timedelta(enddelta - end_date.strftime('%H:%M:%S')),pd.to_timedelta(0, unit = 'h'))
period = pd.Series(pd.date_range(start_date.date()+pd.to_timedelta(1, unit = 'd'),
end_date.date()-pd.to_timedelta(1, unit = 'd')))
period = pd.to_datetime(period).dt.strftime("%Y-%m-%d")
cshort = period.loc[period.isin(calShort['Дата'])==True].count()
indexes = period.loc[period.isin(calShort['Дата'])==True].index
period = period.drop(indexes, axis = 0)
indexes1 = period.loc[period.isin(calNull['Дата'])==True].index
period = period.drop(indexes1, axis = 0)
per1 = pd.DataFrame(period)
per1['Day']=per1[0].apply(lambda x: pd.to_datetime(x).date().isoweekday())
WH = WH + pd.to_timedelta(per1.loc[(per1['Day']!=6) & (per1['Day']!=7)][0].count()*8,unit = 'h')+pd.to_timedelta(cshort*7,unit = 'h')
#
return WH
Ответы (1 шт):
В общем, алгоритм такой. при исходных данных:
import pandas as pd
import numpy as np
holidays = {'2019': {'1': [1, 2, 3, 4, 5, 6, 7, 8, 13, 14, 20, 21, 27, 28],
'2': [3, 4, 10, 11, 17, 18, 23, 24, 25],
'3': [2, 3, 8, 9, 10, 16, 17, 23, 24, 30, 31],
'4': [6, 7, 13, 14, 20, 21, 28, 29, 30],
'5': [1, 2, 3, 7, 8, 9, 10, 11, 12, 18, 19, 25, 26],
'6': [1, 2, 8, 9, 12, 15, 16, 22, 23, 29, 30],
'7': [6, 7, 13, 14, 20, 21, 27, 28],
'8': [3, 4, 10, 11, 17, 18, 24, 25, 31],
'9': [1, 7, 8, 14, 15, 21, 22, 28, 29],
'10': [5, 6, 12, 13, 19, 20, 26, 27],
'11': [3, 4, 9, 10, 16, 17, 23, 24],
'12': [1, 7, 8, 14, 15, 21, 22, 29, 30, 31]}}
df = pd.DataFrame({"task":["EJ3232", "EJ3233", "EJ3234"],
"start":["27.02.2019 15:49", "01.03.2019 10:49", "02.03.2019 7:49"],
"end":["28.02.2019 08:49:38", "01.03.2019 15:49", "02.03.2019 18:49"],
"tz": ["Etc/GMT-3", "Etc/GMT-4", "Etc/GMT-5"]})
сначала приводим календарь выходных в удобный вид:
hc = []
for year in holidays.keys():
for month in holidays[year]:
for day in holidays[year][month]:
hc.append(f"{year}-{month.zfill(2)}-{day:02d}")
hc = [pd.Timestamp(x) for x in hc]
затем временные промежутки с учетом начала, конца и временной зоны, и переводим их в серии:
df["ranges"] = df.apply(lambda x: pd.date_range(start=pd.to_datetime(x["start"], dayfirst=True, format='mixed').tz_localize(x["tz"]),
end = pd.to_datetime(x["end"], dayfirst=True, format='mixed').tz_localize(x["tz"]), freq="h"), axis=1)
dtranges = df.explode("ranges").drop(columns=["start", "end", "tz"])
dtranges["ranges"] = pd.to_datetime(dtranges["ranges"], utc=True)
затем выбираем только те дни, которые не являются выходными, группируем по датам и номерам задач (можно только по задачам) и вычисляем продолжительность
bh_df = dtranges[~pd.to_datetime(dtranges["ranges"].dt.date).isin(hc)]
res = bh_df.groupby(["task", bh_df["ranges"].dt.date])["ranges"].apply(lambda x: x.max()-x.min())
получаем res, где в третьей колонке как раз продолжительность задачи за каждый день:
task ranges
EJ3232 2019-02-27 0 days 11:00:00
2019-02-28 0 days 05:00:00
EJ3233 2019-03-01 0 days 05:00:00
Name: ranges, dtype: timedelta64[ns]
или, при
res = bh_df.groupby(["task"]).apply(lambda x: x.max()-x.min())
получим общее время выполнения задачи:
ranges
task
EJ3232 0 days 17:00:00
EJ3233 0 days 05:00:00