Browse Source

Changed log print. implemented test for the changed method

ogert 5 years ago
parent
commit
db4a6810b4
2 changed files with 50 additions and 6 deletions
  1. 6 6
      cdplib/FlattenData.py
  2. 44 0
      cdplib/unit_tests/TestFlattenData.py

+ 6 - 6
cdplib/FlattenData.py

@@ -14,13 +14,12 @@ import pandas as pd
 import copy
 sys.path.append(os.getcwd())
 from cdplib.log import Log
-log = Log("Flatten data")
 
 class FlattenData():
 
     def __init__(self):
-        log.info('Flatten Data Initialized')
-
+        self._log = Log("Flatten data")
+    
     def flatten(self, data):
         '''
         :parm data: data given in either dictionary, list or dataframe format.
@@ -32,7 +31,7 @@ class FlattenData():
         start = time.time()
         if type(data) is pd.DataFrame:
             return_data = self.flatten_dataframe(data)
-            print(('Data has been flattened in {} seconds').format(time.time()-start))
+            self._log.info(('Data has been flattened, created {} columns in {} seconds').format(len(return_data.index), time.time()-start))
             return return_data
         if type(data) is dict:
             return self.flatten_dict(data)
@@ -129,13 +128,14 @@ class FlattenData():
                         key = incoming_key + '_' + str(data_list[iteration]['stationsnummer']) + '_' + str(data_list[iteration]['stage'])
                 else:
                     key = incoming_key + '_' + str(iteration)
+            else:
+                key = str(iteration)
             if type(temp_dataframe) == list:
                 temp_result = self.flatten_list(temp_dataframe, key)
-                result_dict = self.append_to_dict(result_dict, temp_result)
 
             elif type(temp_dataframe) == dict:
                 temp_result = self.flatten_dict(temp_dataframe, key)
-                result_dict = self.append_to_dict(result_dict, temp_result)
+
             else:
                 result_dict[key] = temp_dataframe
 

+ 44 - 0
cdplib/unit_tests/TestFlattenData.py

@@ -0,0 +1,44 @@
+import unittest
+import sys
+import os
+import pandas as pd
+sys.path.append(os.getcwd())
+from cdplib.log import Log
+from cdplib.FlattenData import FlattenData
+
+class TestMongodbHandler(unittest.TestCase):
+
+    def setUp(self):
+        self.flattener = FlattenData()
+
+    def test_A_flatten(self):
+        '''
+        Create some nested test data, in the formats: dict, list and dataframe
+        Flatten the test data
+        Compare the results
+        '''
+        nested_data_dict = {
+            "one_level": "test_level_1",
+            "two_levels": {
+                "one_level": "test_level_2"
+            },
+            "three_levels": {
+                "two_levels": {
+                    "one_level": "test_level_3"
+                }
+            }
+        }
+
+        nested_data_list = [nested_data_dict, nested_data_dict]
+        nested_data_df = pd.DataFrame.from_dict([nested_data_dict])
+
+        flattened_dict = self.flattener.flatten(nested_data_dict)
+        flattened_list = self.flattener.flatten(nested_data_list)
+        flattened_df = self.flattener.flatten(nested_data_df)
+
+        self.assertEqual(nested_data_dict["two_levels"]["one_level"], flattened_dict['two_levels_one_level'])
+        self.assertEqual(nested_data_dict["two_levels"]["one_level"], flattened_list['0_two_levels_one_level'])
+        self.assertEqual(nested_data_dict["two_levels"]["one_level"], flattened_df.loc[0 , 'two_levels_one_level'])
+        
+if __name__ == '__main__':
+    unittest.main()