1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Created on Wed Nov 7 15:11:21 2018
- @author: tanya
- """
- import pandas as pd
- from libraries.feature_engineering.in_memory_feature_engineering import StatisticalFeaturesOverTime
- class StatisticalFeaturesAveragedOverTimePeriods(StatisticalFeaturesOverTime):
- '''
- '''
-
- def __init__(data, index_cols, date_col, split_date, period_length, past_or_future = 'past', freq = 'days', n_periods = 1, path_to_log = None):
- '''
- '''
- super(StatisticalFeaturesAveragedOverTimePeriods).__init__(data = data.copy(deep = True),
- index_cols = index_cols,
- date_col = date_col,
- split_date = split_date,
- period_length = n_periods*period_length,
- past_or_future = past_or_future,
- freq = freq,
- path_to_log)
-
- self.period_number_col = 'period_number'
- while period_number_col in data.columns:
- self.period_number_col += '&'
-
- perid_numbers = self.data[self.index_cols + [date_col]].drop_duplicates()\
- .groupby(index_cols)[date_col].cumcount()\
- .reset_index()\
- .assign(period_number = lambda x: x[0]/period_length)\
- .rename(columns = {'period_number' : self.period_number_col})
-
-
- self.data = pd.merge(self, data, period_numbers, how = 'left', on = self.index_cols)
-
- self.initial_index_cols = self.index_cols.copy()
- self.index_cols.append(self.period_number_col)
-
-
- def _aggregate_over_time_periods(df):
- '''
- '''
- return df.drop(self.period_number_col, axis = 1)\
- .groupby(self.initial_index_cols)\
- .mean()\
- .reset_index()
-
-
- def get_kpis_by_aggregation(self, **args):
- '''
- '''
- return self._aggregate_over_time_periods(super(StatisticalFeaturesAveragedOverTimePeriods)
- .get_kpis_by_aggregation(**args))
-
-
- def get_value_stats(self, **args):
- '''
- '''
- return self._aggregate_over_time_periods(super(StatisticalFeaturesAveragedOverTimePeriods)
- .get_value_stats(**args))
-
-
- def get_aggregated_value_stats(self, args):
- '''
- '''
- return self._aggregate_over_time_periods(super(StatisticalFeaturesAveragedOverTimePeriods)
- .get_aggregated_value_stats(**args))
-
-
-
|