#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 18 16:26:47 2018

@author: tanya
"""

import os
import unittest
import logging
import pandas as pd
import numpy as np

from pandas.util.testing import assert_frame_equal

from libraries.feature_engineering.in_memory_feature_engineering.StatisticalFeatures import StatisticalFeatures
from libraries.logging.logging_utils import configure_logging


class TestStatisticalFeatures(unittest.TestCase):
    '''
    '''
    def __init__(self, data = None, index_cols = None, path_to_log = None):
        '''
        '''        
        if index_cols is None:
            self.index_cols = ['id1', 'id2']
        else:
            self.index_cols = index_cols
        
        if data is None:
            self.data = pd.DataFrame({'int' : [1,2,3,2,55,3,7],
                                                     'float' : [0.1, 7, 0.1, 99.9, 99.9, np.nan, 7],
                                                     'str' : ['a', np.nan, 'c', 'a', 'a', '', 'c'],
                                                     'datetime' : [pd.datetime(2017, 1, 2), np.nan, pd.datetime(2017, 5, 3), pd.datetime(2017, 1, 4),
                                                                   '2018-01-19', pd.datetime(2018, 1, 4), pd.datetime(2019, 3, 23)],              
                                                     'nan' : [np.nan]*7,
                                                     'id1' : [1,1,3,3,3,1,1],
                                                     'id2' : ['a', 'a', 'b', 'b', 'a', 'a', np.nan]})\
                                                     .sort_values(by = self.index_cols)
        else:
            self.data = data
            
        
        self.obj = StatisticalFeatures(data = self.data, index_cols = self.index_cols, path_to_log = path_to_log)
            
class TestKpisByAggregation(TestStatisticalFeatures):
    '''
    '''
    def __init__(self, data = None, index_cols = None, path_to_log = None):
        '''
        '''
        super(TestKpisByAggregation, self).__init__(data = data, index_cols = index_cols, path_to_log = path_to_log)
    
    def test_builtin_aggfuncs_numeric_cols(self, answer = None, kpis = None):
        '''Tests the expected behaviour of pandas builtin aggregation function,
           in particular behaviour with missing values
           
           :param DataFrame data:
           :param list index_cols:
           :param DataFrame answer:
           :param list of tuples or dict kpis:    
        '''
        kpis = kpis or [('int', ['min', 'std']),
                        ('float', ['mean', np.sum]),
                        ('float', 'sum'),
                        ('nan', 'mean')]
            
        
        answer = answer or pd.DataFrame([
                            {'id1' : 1, 'id2' : 'a', 'int_min' : 1, 'int_std' : pd.Series([1,2,3]).std(), 'float_mean' : np.mean([0.1, 7.0]), 'float_sum' : 7.1, 'nan_mean' : np.nan},
                            {'id1' : 3, 'id2' : 'b', 'int_min' : 2, 'int_std' : pd.Series([2,3]).std(), 'float_mean' : np.mean([0.1, 99.9]), 'float_sum' : 100, 'nan_mean' : np.nan},
                            {'id1' : 3, 'id2' : 'a', 'int_min' : 55, 'int_std' : np.nan, 'float_mean' : 99.9, 'float_sum' : 99.9, 'nan_mean' : np.nan},
                            ]).sort_values(self.index_cols).set_index(self.index_cols)
            
        result = self.obj.get_kpis_by_aggregation(kpis = kpis).sort_values(self.index_cols).set_index(self.index_cols)
        
        assert_frame_equal(result, answer[result.columns])
        
        
    def test_dict_kpi(self, kpis = None, answer = None):
        '''
        '''
        kpis = kpis or {'int' : ['min', 'std'], 'float' : 'mean'}
            
        answer = answer or pd.DataFrame([
                            {'id1' : 1, 'id2' : 'a', 'int_min' : 1, 'int_std' : pd.Series([1,2,3]).std(), 'float_mean' : np.mean([0.1, 7.0])},
                            {'id1' : 3, 'id2' : 'b', 'int_min' : 2, 'int_std' : pd.Series([2,3]).std(), 'float_mean' : np.mean([0.1, 99.9])},
                            {'id1' : 3, 'id2' : 'a', 'int_min' : 55, 'int_std' : np.nan, 'float_mean' : 99.9},
                            ]).sort_values(self.index_cols).set_index(self.index_cols)
                
        result = self.obj.get_kpis_by_aggregation(kpis = kpis).sort_values(self.index_cols).set_index(self.index_cols)
        
        assert_frame_equal(result, answer[result.columns])
        
        
    def test_string_cols(self, kpis = None, answer = None):
        '''
        '''
        kpis = kpis or {'str' : ['sum']}
            
        answer = answer or pd.DataFrame([
                            {'id1' : 1, 'id2' : 'a', 'str_sum' : 'anan'},
                            {'id1' : 3, 'id2' : 'b', 'str_sum' : 'ca'},
                            {'id1' : 3, 'id2' : 'a', 'str_sum' : 'a'},
                            ]).sort_values(self.index_cols).set_index(self.index_cols)
                
        result = self.obj.get_kpis_by_aggregation(kpis = kpis).sort_values(self.index_cols).set_index(self.index_cols)
        
        assert_frame_equal(result, answer[result.columns])
        
        
    def test_custom_aggfunc(self, kpis, answer = None):
        '''
        '''
        
        if kpis is None:
            def custom_sum(x):
                return np.sum(x)
            
            kpis = {'int' : custom_sum}
        
        answer = answer or pd.DataFrame([
                           {'id1' : 1, 'id2' : 'a', 'int_custom_sum' : 6},
                           {'id1' : 3, 'id2' : 'b', 'int_custom_sum' : 55},
                           {'id1' : 3, 'id2' : 'a', 'int_custom_sum' : 5},
                           ]).sort_values(self.index_cols).set_index(self.index_cols)
                
        result = self.obj.get_kpis_by_aggregation(kpis = kpis).sort_values(self.index_cols).set_index(self.index_cols)
        
        assert_frame_equal(result, answer[result.columns])
            
        
    def test_some_wrong_col(self, kpis = None, answer = None):
        '''
        '''
        kpis = kpis or {'bla' : 'sum', 'int' : 'sum'}
            
        answer = answer or pd.DataFrame([
                   {'id1' : 1, 'id2' : 'a', 'int_sum' : 6},
                   {'id1' : 3, 'id2' : 'a', 'int_sum' : 55},
                   {'id1' : 3, 'id2' : 'b', 'int_sum' : 5},
                   ]).sort_values(self.index_cols).set_index(self.index_cols)
                
        result = self.obj.get_kpis_by_aggregation(kpis = kpis).sort_values(self.index_cols).set_index(self.index_cols)
                
        assert_frame_equal(result, answer[result.columns])
        
    def test_all_wrong_cols(self, kpis = None, answer = None):
        '''
        '''
        kpis = kpis or {'bla' : 'sum', 'blub' : 'sum'}
            
        result = self.obj.get_kpis_by_aggregation(kpis = kpis)
            
        answer = self.data[self.index_cols].drop_duplicates().reset_index(drop = True)
                
        assert_frame_equal(result, answer[result.columns])
        
if __name__ == '__main__':
    
    path_to_log = os.path.join(os.environ.get('PROJECT_DIR'),
                               'tests', 'test_feature_engineering','test_in_memory_feature_engineering',
                               'test_kpis_by_aggregation.log')
    
    configure_logging(path_to_log)
    logger = logging.getLogger(__name__)

    inst = TestKpisByAggregation(path_to_log = path_to_log)
    inst.test_builtin_aggfuncs_numeric_cols()
    inst.test_dict_kpi()
    inst.test_string_cols()
    inst.test_some_wrong_col()
    inst.test_all_wrong_cols()
        
    logger.info('Done testing method get_kpis_by_aggregation!')