Forráskód Böngészése

Add functionality to ignore certain labels from being flattend

ogert 4 éve
szülő
commit
eee0232f9a
1 módosított fájl, 61 hozzáadás és 58 törlés
  1. 61 58
      cdplib/FlattenData.py

+ 61 - 58
cdplib/FlattenData.py

@@ -20,7 +20,7 @@ class FlattenData():
     def __init__(self):
         self._log = Log("Flatten data")
     
-    def flatten(self, data) -> pd.DataFrame():
+    def flatten(self, data, labels_to_ignore: list = None) -> pd.DataFrame():
         '''
         :parm data: data given in either dictionary, list or dataframe format.
         '''
@@ -31,17 +31,17 @@ class FlattenData():
         start = time.time()
         if type(data) is pd.DataFrame:
             in_length = len(data.columns)
-            return_data = self.flatten_dataframe(data)
+            return_data = self.flatten_dataframe(data, labels_to_ignore)
         elif type(data) is pd.Series:
             data = pd.DataFrame(data)
             in_length = len(data.columns)
-            return_data = self.flatten_dataframe(data)
+            return_data = self.flatten_dataframe(data, labels_to_ignore)
         elif type(data) is dict:
             in_length = len(data)
-            return_data = self.flatten_dict(data)
+            return_data = self.flatten_dict(data, labels_to_ignore)
         elif type(data) is list:
             in_length = len(data)
-            return_data =  self.flatten_list(data)
+            return_data =  self.flatten_list(data, labels_to_ignore)
         else:
             self._log.log_and_raise_warning(("Input data type '{}' is not supported").format(type(data)))
             return None
@@ -50,7 +50,7 @@ class FlattenData():
         self._log.info(('Data has been flattened, created {} columns in {} seconds').format(len(result_dataframe.columns)- in_length, time.time()-start))
         return result_dataframe
 
-    def flatten_dataframe(self, dataframe: pd.DataFrame, incoming_key: str = None):
+    def flatten_dataframe(self, dataframe: pd.DataFrame, incoming_key: str = None, labels_to_ignore: list = None):
         '''
         :param pd.Dataframe dataframe: dataframe containing the data to be flattened
         :param str incoming_key: string to be appended to the key
@@ -62,27 +62,28 @@ class FlattenData():
                 "Parameter 'incoming_key' be of String type"
 
         result_dict = {}
-        for index, row in dataframe.iterrows():
-            temp_result_dict = {}
-            for key, value in row.iteritems():
-                temp_result = {}
-                if incoming_key is not None:
-                    key = incoming_key + '_' + key
-                if type(value) == list:
-                    temp_result = self.flatten_list(value, key)
-                elif type(value) == dict:
-                    temp_result = self.flatten_dict(value, key)
-                else:
-                    temp_result_dict[key] = value
-
-                if len(temp_result) > 0:
-                    temp_result_dict = self.append_to_dict(temp_result_dict, temp_result)
-
-            result_dict[index] = copy.deepcopy(temp_result_dict)
+        if incoming_key not in labels_to_ignore:
+            for index, row in dataframe.iterrows():
+                temp_result_dict = {}
+                for key, value in row.iteritems():
+                    temp_result = {}
+                    if incoming_key is not None:
+                        key = incoming_key + '_' + key
+                    if type(value) == list:
+                        temp_result = self.flatten_list(value, key, labels_to_ignore)
+                    elif type(value) == dict:
+                        temp_result = self.flatten_dict(value, key, labels_to_ignore)
+                    else:
+                        temp_result_dict[key] = value
+
+                    if len(temp_result) > 0:
+                        temp_result_dict = self.append_to_dict(temp_result_dict, temp_result)
+
+                result_dict[index] = copy.deepcopy(temp_result_dict)
 
         return result_dict
 
-    def flatten_dict(self, dictionary: dict, incoming_key: str = None):
+    def flatten_dict(self, dictionary: dict, incoming_key: str = None, labels_to_ignore: list = None):
         '''
         :param dict dictionary: dictionary containing the data to be flattened
         :param str incoming_key: string to be appended to the key
@@ -95,25 +96,26 @@ class FlattenData():
 
 
         result_dict = {}
-        for key in dictionary:
+        if incoming_key not in labels_to_ignore:
+            for key in dictionary:
 
-            temp_dataframe = dictionary[key]
-            temp_result = {}
-            if incoming_key is not None:
-                key = incoming_key + '_' + key
-            if type(temp_dataframe) == list:
-                temp_result = self.flatten_list(temp_dataframe, key)
-            elif type(temp_dataframe) == dict:
-                temp_result = self.flatten_dict(temp_dataframe, key)
-            else:
-                result_dict[key] = temp_dataframe
-
-            if len(temp_result) > 0:
-                result_dict = self.append_to_dict(result_dict, temp_result)
+                temp_dataframe = dictionary[key]
+                temp_result = {}
+                if incoming_key is not None:
+                    key = incoming_key + '_' + key
+                if type(temp_dataframe) == list:
+                    temp_result = self.flatten_list(temp_dataframe, key, labels_to_ignore)
+                elif type(temp_dataframe) == dict:
+                    temp_result = self.flatten_dict(temp_dataframe, key, labels_to_ignore)
+                else:
+                    result_dict[key] = temp_dataframe
+
+                if len(temp_result) > 0:
+                    result_dict = self.append_to_dict(result_dict, temp_result)
 
         return result_dict
 
-    def flatten_list(self, data_list: list, incoming_key: str = None):
+    def flatten_list(self, data_list: list, incoming_key: str = None, labels_to_ignore: list = None):
         '''
         :param list data_list: list containing the data to be flattened
         :param str incoming_key: string to be appended to the key
@@ -131,29 +133,30 @@ class FlattenData():
             temp_dataframe = item
             temp_result = {}
             key = incoming_key
-            if incoming_key is not None:
-                # OEBB SPECIFIC IF STATEMENT
-                if type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys():
-                        key = incoming_key + '_' + str(data_list[iteration]['stationsnummer'])
-                
-                elif type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys() and 'stage' in data_list[iteration].keys() :
-                        key = incoming_key + '_' + str(data_list[iteration]['stationsnummer']) + '_' + str(data_list[iteration]['stage'])
-                
+            if incoming_key not in labels_to_ignore:
+                if incoming_key is not None:
+                    # OEBB SPECIFIC IF STATEMENT
+                    if type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys():
+                            key = incoming_key + '_' + str(data_list[iteration]['stationsnummer'])
+                    
+                    elif type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys() and 'stage' in data_list[iteration].keys() :
+                            key = incoming_key + '_' + str(data_list[iteration]['stationsnummer']) + '_' + str(data_list[iteration]['stage'])
+                    
+                    else:
+                        key = incoming_key + '_' + str(iteration)
                 else:
-                    key = incoming_key + '_' + str(iteration)
-            else:
-                key = str(iteration)
-            if type(temp_dataframe) == list:
-                temp_result = self.flatten_list(temp_dataframe, key)
+                    key = str(iteration)
+                if type(temp_dataframe) == list:
+                    temp_result = self.flatten_list(temp_dataframe, key, labels_to_ignore)
 
-            elif type(temp_dataframe) == dict:
-                temp_result = self.flatten_dict(temp_dataframe, key)
+                elif type(temp_dataframe) == dict:
+                    temp_result = self.flatten_dict(temp_dataframe, key, labels_to_ignore)
 
-            else:
-                result_dict[key] = temp_dataframe
+                else:
+                    result_dict[key] = temp_dataframe
 
-            if len(temp_result) > 0:
-                result_dict = self.append_to_dict(result_dict, temp_result)
+                if len(temp_result) > 0:
+                    result_dict = self.append_to_dict(result_dict, temp_result)
 
         return result_dict