holiday.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514
  1. from datetime import datetime, timedelta
  2. import warnings
  3. from dateutil.relativedelta import FR, MO, SA, SU, TH, TU, WE # noqa
  4. import numpy as np
  5. from pandas.compat import add_metaclass
  6. from pandas.errors import PerformanceWarning
  7. from pandas import DateOffset, Series, Timestamp, date_range
  8. from pandas.tseries.offsets import Day, Easter
  9. def next_monday(dt):
  10. """
  11. If holiday falls on Saturday, use following Monday instead;
  12. if holiday falls on Sunday, use Monday instead
  13. """
  14. if dt.weekday() == 5:
  15. return dt + timedelta(2)
  16. elif dt.weekday() == 6:
  17. return dt + timedelta(1)
  18. return dt
  19. def next_monday_or_tuesday(dt):
  20. """
  21. For second holiday of two adjacent ones!
  22. If holiday falls on Saturday, use following Monday instead;
  23. if holiday falls on Sunday or Monday, use following Tuesday instead
  24. (because Monday is already taken by adjacent holiday on the day before)
  25. """
  26. dow = dt.weekday()
  27. if dow == 5 or dow == 6:
  28. return dt + timedelta(2)
  29. elif dow == 0:
  30. return dt + timedelta(1)
  31. return dt
  32. def previous_friday(dt):
  33. """
  34. If holiday falls on Saturday or Sunday, use previous Friday instead.
  35. """
  36. if dt.weekday() == 5:
  37. return dt - timedelta(1)
  38. elif dt.weekday() == 6:
  39. return dt - timedelta(2)
  40. return dt
  41. def sunday_to_monday(dt):
  42. """
  43. If holiday falls on Sunday, use day thereafter (Monday) instead.
  44. """
  45. if dt.weekday() == 6:
  46. return dt + timedelta(1)
  47. return dt
  48. def weekend_to_monday(dt):
  49. """
  50. If holiday falls on Sunday or Saturday,
  51. use day thereafter (Monday) instead.
  52. Needed for holidays such as Christmas observation in Europe
  53. """
  54. if dt.weekday() == 6:
  55. return dt + timedelta(1)
  56. elif dt.weekday() == 5:
  57. return dt + timedelta(2)
  58. return dt
  59. def nearest_workday(dt):
  60. """
  61. If holiday falls on Saturday, use day before (Friday) instead;
  62. if holiday falls on Sunday, use day thereafter (Monday) instead.
  63. """
  64. if dt.weekday() == 5:
  65. return dt - timedelta(1)
  66. elif dt.weekday() == 6:
  67. return dt + timedelta(1)
  68. return dt
  69. def next_workday(dt):
  70. """
  71. returns next weekday used for observances
  72. """
  73. dt += timedelta(days=1)
  74. while dt.weekday() > 4:
  75. # Mon-Fri are 0-4
  76. dt += timedelta(days=1)
  77. return dt
  78. def previous_workday(dt):
  79. """
  80. returns previous weekday used for observances
  81. """
  82. dt -= timedelta(days=1)
  83. while dt.weekday() > 4:
  84. # Mon-Fri are 0-4
  85. dt -= timedelta(days=1)
  86. return dt
  87. def before_nearest_workday(dt):
  88. """
  89. returns previous workday after nearest workday
  90. """
  91. return previous_workday(nearest_workday(dt))
  92. def after_nearest_workday(dt):
  93. """
  94. returns next workday after nearest workday
  95. needed for Boxing day or multiple holidays in a series
  96. """
  97. return next_workday(nearest_workday(dt))
  98. class Holiday(object):
  99. """
  100. Class that defines a holiday with start/end dates and rules
  101. for observance.
  102. """
  103. def __init__(self, name, year=None, month=None, day=None, offset=None,
  104. observance=None, start_date=None, end_date=None,
  105. days_of_week=None):
  106. """
  107. Parameters
  108. ----------
  109. name : str
  110. Name of the holiday , defaults to class name
  111. offset : array of pandas.tseries.offsets or
  112. class from pandas.tseries.offsets
  113. computes offset from date
  114. observance: function
  115. computes when holiday is given a pandas Timestamp
  116. days_of_week:
  117. provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday
  118. Monday=0,..,Sunday=6
  119. Examples
  120. --------
  121. >>> from pandas.tseries.holiday import Holiday, nearest_workday
  122. >>> from dateutil.relativedelta import MO
  123. >>> USMemorialDay = Holiday('MemorialDay', month=5, day=24,
  124. offset=pd.DateOffset(weekday=MO(1)))
  125. >>> USLaborDay = Holiday('Labor Day', month=9, day=1,
  126. offset=pd.DateOffset(weekday=MO(1)))
  127. >>> July3rd = Holiday('July 3rd', month=7, day=3,)
  128. >>> NewYears = Holiday('New Years Day', month=1, day=1,
  129. observance=nearest_workday),
  130. >>> July3rd = Holiday('July 3rd', month=7, day=3,
  131. days_of_week=(0, 1, 2, 3))
  132. """
  133. if offset is not None and observance is not None:
  134. raise NotImplementedError("Cannot use both offset and observance.")
  135. self.name = name
  136. self.year = year
  137. self.month = month
  138. self.day = day
  139. self.offset = offset
  140. self.start_date = Timestamp(
  141. start_date) if start_date is not None else start_date
  142. self.end_date = Timestamp(
  143. end_date) if end_date is not None else end_date
  144. self.observance = observance
  145. assert (days_of_week is None or type(days_of_week) == tuple)
  146. self.days_of_week = days_of_week
  147. def __repr__(self):
  148. info = ''
  149. if self.year is not None:
  150. info += 'year={year}, '.format(year=self.year)
  151. info += 'month={mon}, day={day}, '.format(mon=self.month, day=self.day)
  152. if self.offset is not None:
  153. info += 'offset={offset}'.format(offset=self.offset)
  154. if self.observance is not None:
  155. info += 'observance={obs}'.format(obs=self.observance)
  156. repr = 'Holiday: {name} ({info})'.format(name=self.name, info=info)
  157. return repr
  158. def dates(self, start_date, end_date, return_name=False):
  159. """
  160. Calculate holidays observed between start date and end date
  161. Parameters
  162. ----------
  163. start_date : starting date, datetime-like, optional
  164. end_date : ending date, datetime-like, optional
  165. return_name : bool, optional, default=False
  166. If True, return a series that has dates and holiday names.
  167. False will only return dates.
  168. """
  169. start_date = Timestamp(start_date)
  170. end_date = Timestamp(end_date)
  171. filter_start_date = start_date
  172. filter_end_date = end_date
  173. if self.year is not None:
  174. dt = Timestamp(datetime(self.year, self.month, self.day))
  175. if return_name:
  176. return Series(self.name, index=[dt])
  177. else:
  178. return [dt]
  179. dates = self._reference_dates(start_date, end_date)
  180. holiday_dates = self._apply_rule(dates)
  181. if self.days_of_week is not None:
  182. holiday_dates = holiday_dates[np.in1d(holiday_dates.dayofweek,
  183. self.days_of_week)]
  184. if self.start_date is not None:
  185. filter_start_date = max(self.start_date.tz_localize(
  186. filter_start_date.tz), filter_start_date)
  187. if self.end_date is not None:
  188. filter_end_date = min(self.end_date.tz_localize(
  189. filter_end_date.tz), filter_end_date)
  190. holiday_dates = holiday_dates[(holiday_dates >= filter_start_date) &
  191. (holiday_dates <= filter_end_date)]
  192. if return_name:
  193. return Series(self.name, index=holiday_dates)
  194. return holiday_dates
  195. def _reference_dates(self, start_date, end_date):
  196. """
  197. Get reference dates for the holiday.
  198. Return reference dates for the holiday also returning the year
  199. prior to the start_date and year following the end_date. This ensures
  200. that any offsets to be applied will yield the holidays within
  201. the passed in dates.
  202. """
  203. if self.start_date is not None:
  204. start_date = self.start_date.tz_localize(start_date.tz)
  205. if self.end_date is not None:
  206. end_date = self.end_date.tz_localize(start_date.tz)
  207. year_offset = DateOffset(years=1)
  208. reference_start_date = Timestamp(
  209. datetime(start_date.year - 1, self.month, self.day))
  210. reference_end_date = Timestamp(
  211. datetime(end_date.year + 1, self.month, self.day))
  212. # Don't process unnecessary holidays
  213. dates = date_range(start=reference_start_date,
  214. end=reference_end_date,
  215. freq=year_offset, tz=start_date.tz)
  216. return dates
  217. def _apply_rule(self, dates):
  218. """
  219. Apply the given offset/observance to a DatetimeIndex of dates.
  220. Parameters
  221. ----------
  222. dates : DatetimeIndex
  223. Dates to apply the given offset/observance rule
  224. Returns
  225. -------
  226. Dates with rules applied
  227. """
  228. if self.observance is not None:
  229. return dates.map(lambda d: self.observance(d))
  230. if self.offset is not None:
  231. if not isinstance(self.offset, list):
  232. offsets = [self.offset]
  233. else:
  234. offsets = self.offset
  235. for offset in offsets:
  236. # if we are adding a non-vectorized value
  237. # ignore the PerformanceWarnings:
  238. with warnings.catch_warnings():
  239. warnings.simplefilter("ignore", PerformanceWarning)
  240. dates += offset
  241. return dates
  242. holiday_calendars = {}
  243. def register(cls):
  244. try:
  245. name = cls.name
  246. except AttributeError:
  247. name = cls.__name__
  248. holiday_calendars[name] = cls
  249. def get_calendar(name):
  250. """
  251. Return an instance of a calendar based on its name.
  252. Parameters
  253. ----------
  254. name : str
  255. Calendar name to return an instance of
  256. """
  257. return holiday_calendars[name]()
  258. class HolidayCalendarMetaClass(type):
  259. def __new__(cls, clsname, bases, attrs):
  260. calendar_class = super(HolidayCalendarMetaClass, cls).__new__(
  261. cls, clsname, bases, attrs)
  262. register(calendar_class)
  263. return calendar_class
  264. @add_metaclass(HolidayCalendarMetaClass)
  265. class AbstractHolidayCalendar(object):
  266. """
  267. Abstract interface to create holidays following certain rules.
  268. """
  269. __metaclass__ = HolidayCalendarMetaClass
  270. rules = []
  271. start_date = Timestamp(datetime(1970, 1, 1))
  272. end_date = Timestamp(datetime(2030, 12, 31))
  273. _cache = None
  274. def __init__(self, name=None, rules=None):
  275. """
  276. Initializes holiday object with a given set a rules. Normally
  277. classes just have the rules defined within them.
  278. Parameters
  279. ----------
  280. name : str
  281. Name of the holiday calendar, defaults to class name
  282. rules : array of Holiday objects
  283. A set of rules used to create the holidays.
  284. """
  285. super(AbstractHolidayCalendar, self).__init__()
  286. if name is None:
  287. name = self.__class__.__name__
  288. self.name = name
  289. if rules is not None:
  290. self.rules = rules
  291. def rule_from_name(self, name):
  292. for rule in self.rules:
  293. if rule.name == name:
  294. return rule
  295. return None
  296. def holidays(self, start=None, end=None, return_name=False):
  297. """
  298. Returns a curve with holidays between start_date and end_date
  299. Parameters
  300. ----------
  301. start : starting date, datetime-like, optional
  302. end : ending date, datetime-like, optional
  303. return_name : bool, optional
  304. If True, return a series that has dates and holiday names.
  305. False will only return a DatetimeIndex of dates.
  306. Returns
  307. -------
  308. DatetimeIndex of holidays
  309. """
  310. if self.rules is None:
  311. raise Exception('Holiday Calendar {name} does not have any '
  312. 'rules specified'.format(name=self.name))
  313. if start is None:
  314. start = AbstractHolidayCalendar.start_date
  315. if end is None:
  316. end = AbstractHolidayCalendar.end_date
  317. start = Timestamp(start)
  318. end = Timestamp(end)
  319. holidays = None
  320. # If we don't have a cache or the dates are outside the prior cache, we
  321. # get them again
  322. if (self._cache is None or start < self._cache[0] or
  323. end > self._cache[1]):
  324. for rule in self.rules:
  325. rule_holidays = rule.dates(start, end, return_name=True)
  326. if holidays is None:
  327. holidays = rule_holidays
  328. else:
  329. holidays = holidays.append(rule_holidays)
  330. self._cache = (start, end, holidays.sort_index())
  331. holidays = self._cache[2]
  332. holidays = holidays[start:end]
  333. if return_name:
  334. return holidays
  335. else:
  336. return holidays.index
  337. @staticmethod
  338. def merge_class(base, other):
  339. """
  340. Merge holiday calendars together. The base calendar
  341. will take precedence to other. The merge will be done
  342. based on each holiday's name.
  343. Parameters
  344. ----------
  345. base : AbstractHolidayCalendar
  346. instance/subclass or array of Holiday objects
  347. other : AbstractHolidayCalendar
  348. instance/subclass or array of Holiday objects
  349. """
  350. try:
  351. other = other.rules
  352. except AttributeError:
  353. pass
  354. if not isinstance(other, list):
  355. other = [other]
  356. other_holidays = {holiday.name: holiday for holiday in other}
  357. try:
  358. base = base.rules
  359. except AttributeError:
  360. pass
  361. if not isinstance(base, list):
  362. base = [base]
  363. base_holidays = {holiday.name: holiday for holiday in base}
  364. other_holidays.update(base_holidays)
  365. return list(other_holidays.values())
  366. def merge(self, other, inplace=False):
  367. """
  368. Merge holiday calendars together. The caller's class
  369. rules take precedence. The merge will be done
  370. based on each holiday's name.
  371. Parameters
  372. ----------
  373. other : holiday calendar
  374. inplace : bool (default=False)
  375. If True set rule_table to holidays, else return array of Holidays
  376. """
  377. holidays = self.merge_class(self, other)
  378. if inplace:
  379. self.rules = holidays
  380. else:
  381. return holidays
  382. USMemorialDay = Holiday('MemorialDay', month=5, day=31,
  383. offset=DateOffset(weekday=MO(-1)))
  384. USLaborDay = Holiday('Labor Day', month=9, day=1,
  385. offset=DateOffset(weekday=MO(1)))
  386. USColumbusDay = Holiday('Columbus Day', month=10, day=1,
  387. offset=DateOffset(weekday=MO(2)))
  388. USThanksgivingDay = Holiday('Thanksgiving', month=11, day=1,
  389. offset=DateOffset(weekday=TH(4)))
  390. USMartinLutherKingJr = Holiday('Dr. Martin Luther King Jr.',
  391. start_date=datetime(1986, 1, 1), month=1, day=1,
  392. offset=DateOffset(weekday=MO(3)))
  393. USPresidentsDay = Holiday('President''s Day', month=2, day=1,
  394. offset=DateOffset(weekday=MO(3)))
  395. GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])
  396. EasterMonday = Holiday("Easter Monday", month=1, day=1,
  397. offset=[Easter(), Day(1)])
  398. class USFederalHolidayCalendar(AbstractHolidayCalendar):
  399. """
  400. US Federal Government Holiday Calendar based on rules specified by:
  401. https://www.opm.gov/policy-data-oversight/
  402. snow-dismissal-procedures/federal-holidays/
  403. """
  404. rules = [
  405. Holiday('New Years Day', month=1, day=1, observance=nearest_workday),
  406. USMartinLutherKingJr,
  407. USPresidentsDay,
  408. USMemorialDay,
  409. Holiday('July 4th', month=7, day=4, observance=nearest_workday),
  410. USLaborDay,
  411. USColumbusDay,
  412. Holiday('Veterans Day', month=11, day=11, observance=nearest_workday),
  413. USThanksgivingDay,
  414. Holiday('Christmas', month=12, day=25, observance=nearest_workday)
  415. ]
  416. def HolidayCalendarFactory(name, base, other,
  417. base_class=AbstractHolidayCalendar):
  418. rules = AbstractHolidayCalendar.merge_class(base, other)
  419. calendar_class = type(name, (base_class,), {"rules": rules, "name": name})
  420. return calendar_class