StatisticalFeaturesAveragedOverTimePeriods.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Nov 7 15:11:21 2018
  5. @author: tanya
  6. """
  7. import pandas as pd
  8. from libraries.feature_engineering.in_memory_feature_engineering import StatisticalFeaturesOverTime
  9. class StatisticalFeaturesAveragedOverTimePeriods(StatisticalFeaturesOverTime):
  10. '''
  11. '''
  12. def __init__(data, index_cols, date_col, split_date, period_length, past_or_future = 'past', freq = 'days', n_periods = 1, path_to_log = None):
  13. '''
  14. '''
  15. super(StatisticalFeaturesAveragedOverTimePeriods).__init__(data = data.copy(deep = True),
  16. index_cols = index_cols,
  17. date_col = date_col,
  18. split_date = split_date,
  19. period_length = n_periods*period_length,
  20. past_or_future = past_or_future,
  21. freq = freq,
  22. path_to_log)
  23. self.period_number_col = 'period_number'
  24. while period_number_col in data.columns:
  25. self.period_number_col += '&'
  26. perid_numbers = self.data[self.index_cols + [date_col]].drop_duplicates()\
  27. .groupby(index_cols)[date_col].cumcount()\
  28. .reset_index()\
  29. .assign(period_number = lambda x: x[0]/period_length)\
  30. .rename(columns = {'period_number' : self.period_number_col})
  31. self.data = pd.merge(self, data, period_numbers, how = 'left', on = self.index_cols)
  32. self.initial_index_cols = self.index_cols.copy()
  33. self.index_cols.append(self.period_number_col)
  34. def _aggregate_over_time_periods(df):
  35. '''
  36. '''
  37. return df.drop(self.period_number_col, axis = 1)\
  38. .groupby(self.initial_index_cols)\
  39. .mean()\
  40. .reset_index()
  41. def get_kpis_by_aggregation(self, **args):
  42. '''
  43. '''
  44. return self._aggregate_over_time_periods(super(StatisticalFeaturesAveragedOverTimePeriods)
  45. .get_kpis_by_aggregation(**args))
  46. def get_value_stats(self, **args):
  47. '''
  48. '''
  49. return self._aggregate_over_time_periods(super(StatisticalFeaturesAveragedOverTimePeriods)
  50. .get_value_stats(**args))
  51. def get_aggregated_value_stats(self, args):
  52. '''
  53. '''
  54. return self._aggregate_over_time_periods(super(StatisticalFeaturesAveragedOverTimePeriods)
  55. .get_aggregated_value_stats(**args))