Sfoglia il codice sorgente

Add functionality to ignore certain labels from being flattend

ogert 4 anni fa
parent
commit
7b0d60db15
1 ha cambiato i file con 33 aggiunte e 27 eliminazioni
  1. 33 27
      cdplib/FlattenData.py

+ 33 - 27
cdplib/FlattenData.py

@@ -29,24 +29,29 @@ class FlattenData():
             "Parameter 'data' either be of List, Dictionary or DataFrame type"
         in_length=0
         start = time.time()
+        index_name=None
         if type(data) is pd.DataFrame:
             in_length = len(data.columns)
-            return_data = self.flatten_dataframe(data, labels_to_ignore)
+            index_name = data.index.name
+            return_data = self.flatten_dataframe(data, labels_to_ignore=labels_to_ignore)
+            
         elif type(data) is pd.Series:
             data = pd.DataFrame(data)
             in_length = len(data.columns)
-            return_data = self.flatten_dataframe(data, labels_to_ignore)
+            return_data = self.flatten_dataframe(data, labels_to_ignore=labels_to_ignore)
         elif type(data) is dict:
             in_length = len(data)
-            return_data = self.flatten_dict(data, labels_to_ignore)
+            return_data = self.flatten_dict(data, labels_to_ignore=labels_to_ignore)
         elif type(data) is list:
             in_length = len(data)
-            return_data =  self.flatten_list(data, labels_to_ignore)
+            return_data =  self.flatten_list(data, labels_to_ignore=labels_to_ignore)
         else:
             self._log.log_and_raise_warning(("Input data type '{}' is not supported").format(type(data)))
             return None
 
         result_dataframe = pd.DataFrame.from_dict(return_data, orient='index')
+        if index_name is not None:
+            result_dataframe.index.name = index_name
         self._log.info(('Data has been flattened, created {} columns in {} seconds').format(len(result_dataframe.columns)- in_length, time.time()-start))
         return result_dataframe
 
@@ -66,13 +71,15 @@ class FlattenData():
         for index, row in dataframe.iterrows():
             temp_result_dict = {}
             for key, value in row.iteritems():
-                print('key:',  key)
-                print('value:',  value)
-                print('labels_to_ignore:', labels_to_ignore)
-                if key not in labels_to_ignore:
-                    temp_result = {}
-                    if incoming_key is not None:
-                        key = incoming_key + '_' + key
+                small_key = key
+                if incoming_key is not None:
+                    key = incoming_key + '_' + key
+                temp_result = {}
+
+                if small_key in labels_to_ignore:
+                    temp_result_dict[key] = value
+                    
+                else:
                     if type(value) == list:
                         temp_result = self.flatten_list(value, key, labels_to_ignore)
                     elif type(value) == dict:
@@ -80,9 +87,6 @@ class FlattenData():
                     else:
                         temp_result_dict[key] = value
 
-                else:
-                    temp_result_dict[key] = value
-
                 if len(temp_result) > 0:
                         temp_result_dict = self.append_to_dict(temp_result_dict, temp_result)
 
@@ -104,19 +108,21 @@ class FlattenData():
 
         result_dict = {}
         for key in dictionary:
-            if key not in labels_to_ignore:
-                temp_dataframe = dictionary[key]
-                temp_result = {}
-                if incoming_key is not None:
-                    key = incoming_key + '_' + key
-                if type(temp_dataframe) == list:
-                    temp_result = self.flatten_list(temp_dataframe, key, labels_to_ignore)
-                elif type(temp_dataframe) == dict:
-                    temp_result = self.flatten_dict(temp_dataframe, key, labels_to_ignore)
-                else:
-                    result_dict[key] = temp_dataframe
+            small_key = key
+
+            temp_data = dictionary[key]
+            if incoming_key is not None:
+                key = incoming_key + '_' + key
+            temp_result = {}
+            if small_key in labels_to_ignore:
+                result_dict[key] = temp_data 
             else:
-                result_dict[key] = temp_dataframe
+                if type(temp_data) == list:
+                    temp_result = self.flatten_list(temp_data, key, labels_to_ignore)
+                elif type(temp_data) == dict:
+                    temp_result = self.flatten_dict(temp_data, key, labels_to_ignore)
+                else:
+                    result_dict[key] = temp_data
 
             if len(temp_result) > 0:
                     result_dict = self.append_to_dict(result_dict, temp_result)
@@ -187,7 +193,7 @@ class FlattenData():
 
         for data_type in data.dtypes:
                 if data_type == object:
-                    return self.flatten(data, labels_to_ignore) 
+                    return self.flatten(data, labels_to_ignore=labels_to_ignore) 
 
         return data