5 年前 · eee0232f9a
--- a/cdplib/FlattenData.py
+++ b/cdplib/FlattenData.py
@@ -20,7 +20,7 @@ class FlattenData():
 
				     def __init__(self):
			
 
				         self._log = Log("Flatten data")
			
 
				     
			
 
				-    def flatten(self, data) -> pd.DataFrame():
			
 
				+    def flatten(self, data, labels_to_ignore: list = None) -> pd.DataFrame():
			
 
				         '''
			
 
				         :parm data: data given in either dictionary, list or dataframe format.
			
 
				         '''
			
@@ -31,17 +31,17 @@ class FlattenData():
 
				         start = time.time()
			
 
				         if type(data) is pd.DataFrame:
			
 
				             in_length = len(data.columns)
			
 
				-            return_data = self.flatten_dataframe(data)
			
 
				+            return_data = self.flatten_dataframe(data, labels_to_ignore)
			
 
				         elif type(data) is pd.Series:
			
 
				             data = pd.DataFrame(data)
			
 
				             in_length = len(data.columns)
			
 
				-            return_data = self.flatten_dataframe(data)
			
 
				+            return_data = self.flatten_dataframe(data, labels_to_ignore)
			
 
				         elif type(data) is dict:
			
 
				             in_length = len(data)
			
 
				-            return_data = self.flatten_dict(data)
			
 
				+            return_data = self.flatten_dict(data, labels_to_ignore)
			
 
				         elif type(data) is list:
			
 
				             in_length = len(data)
			
 
				-            return_data =  self.flatten_list(data)
			
 
				+            return_data =  self.flatten_list(data, labels_to_ignore)
			
 
				         else:
			
 
				             self._log.log_and_raise_warning(("Input data type '{}' is not supported").format(type(data)))
			
 
				             return None
			
@@ -50,7 +50,7 @@ class FlattenData():
 
				         self._log.info(('Data has been flattened, created {} columns in {} seconds').format(len(result_dataframe.columns)- in_length, time.time()-start))
			
 
				         return result_dataframe
			
 
				 
			
 
				-    def flatten_dataframe(self, dataframe: pd.DataFrame, incoming_key: str = None):
			
 
				+    def flatten_dataframe(self, dataframe: pd.DataFrame, incoming_key: str = None, labels_to_ignore: list = None):
			
 
				         '''
			
 
				         :param pd.Dataframe dataframe: dataframe containing the data to be flattened
			
 
				         :param str incoming_key: string to be appended to the key
			
@@ -62,27 +62,28 @@ class FlattenData():
 
				                 "Parameter 'incoming_key' be of String type"
			
 
				 
			
 
				         result_dict = {}
			
 
				-        for index, row in dataframe.iterrows():
			
 
				-            temp_result_dict = {}
			
 
				-            for key, value in row.iteritems():
			
 
				-                temp_result = {}
			
 
				-                if incoming_key is not None:
			
 
				-                    key = incoming_key + '_' + key
			
 
				-                if type(value) == list:
			
 
				-                    temp_result = self.flatten_list(value, key)
			
 
				-                elif type(value) == dict:
			
 
				-                    temp_result = self.flatten_dict(value, key)
			
 
				-                else:
			
 
				-                    temp_result_dict[key] = value
			
 
				-
			
 
				-                if len(temp_result) > 0:
			
 
				-                    temp_result_dict = self.append_to_dict(temp_result_dict, temp_result)
			
 
				-
			
 
				-            result_dict[index] = copy.deepcopy(temp_result_dict)
			
 
				+        if incoming_key not in labels_to_ignore:
			
 
				+            for index, row in dataframe.iterrows():
			
 
				+                temp_result_dict = {}
			
 
				+                for key, value in row.iteritems():
			
 
				+                    temp_result = {}
			
 
				+                    if incoming_key is not None:
			
 
				+                        key = incoming_key + '_' + key
			
 
				+                    if type(value) == list:
			
 
				+                        temp_result = self.flatten_list(value, key, labels_to_ignore)
			
 
				+                    elif type(value) == dict:
			
 
				+                        temp_result = self.flatten_dict(value, key, labels_to_ignore)
			
 
				+                    else:
			
 
				+                        temp_result_dict[key] = value
			
 
				+
			
 
				+                    if len(temp_result) > 0:
			
 
				+                        temp_result_dict = self.append_to_dict(temp_result_dict, temp_result)
			
 
				+
			
 
				+                result_dict[index] = copy.deepcopy(temp_result_dict)
			
 
				 
			
 
				         return result_dict
			
 
				 
			
 
				-    def flatten_dict(self, dictionary: dict, incoming_key: str = None):
			
 
				+    def flatten_dict(self, dictionary: dict, incoming_key: str = None, labels_to_ignore: list = None):
			
 
				         '''
			
 
				         :param dict dictionary: dictionary containing the data to be flattened
			
 
				         :param str incoming_key: string to be appended to the key
			
@@ -95,25 +96,26 @@ class FlattenData():
 
				 
			
 
				 
			
 
				         result_dict = {}
			
 
				-        for key in dictionary:
			
 
				+        if incoming_key not in labels_to_ignore:
			
 
				+            for key in dictionary:
			
 
				 
			
 
				-            temp_dataframe = dictionary[key]
			
 
				-            temp_result = {}
			
 
				-            if incoming_key is not None:
			
 
				-                key = incoming_key + '_' + key
			
 
				-            if type(temp_dataframe) == list:
			
 
				-                temp_result = self.flatten_list(temp_dataframe, key)
			
 
				-            elif type(temp_dataframe) == dict:
			
 
				-                temp_result = self.flatten_dict(temp_dataframe, key)
			
 
				-            else:
			
 
				-                result_dict[key] = temp_dataframe
			
 
				-
			
 
				-            if len(temp_result) > 0:
			
 
				-                result_dict = self.append_to_dict(result_dict, temp_result)
			
 
				+                temp_dataframe = dictionary[key]
			
 
				+                temp_result = {}
			
 
				+                if incoming_key is not None:
			
 
				+                    key = incoming_key + '_' + key
			
 
				+                if type(temp_dataframe) == list:
			
 
				+                    temp_result = self.flatten_list(temp_dataframe, key, labels_to_ignore)
			
 
				+                elif type(temp_dataframe) == dict:
			
 
				+                    temp_result = self.flatten_dict(temp_dataframe, key, labels_to_ignore)
			
 
				+                else:
			
 
				+                    result_dict[key] = temp_dataframe
			
 
				+
			
 
				+                if len(temp_result) > 0:
			
 
				+                    result_dict = self.append_to_dict(result_dict, temp_result)
			
 
				 
			
 
				         return result_dict
			
 
				 
			
 
				-    def flatten_list(self, data_list: list, incoming_key: str = None):
			
 
				+    def flatten_list(self, data_list: list, incoming_key: str = None, labels_to_ignore: list = None):
			
 
				         '''
			
 
				         :param list data_list: list containing the data to be flattened
			
 
				         :param str incoming_key: string to be appended to the key
			
@@ -131,29 +133,30 @@ class FlattenData():
 
				             temp_dataframe = item
			
 
				             temp_result = {}
			
 
				             key = incoming_key
			
 
				-            if incoming_key is not None:
			
 
				-                # OEBB SPECIFIC IF STATEMENT
			
 
				-                if type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys():
			
 
				-                        key = incoming_key + '_' + str(data_list[iteration]['stationsnummer'])
			
 
				-                
			
 
				-                elif type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys() and 'stage' in data_list[iteration].keys() :
			
 
				-                        key = incoming_key + '_' + str(data_list[iteration]['stationsnummer']) + '_' + str(data_list[iteration]['stage'])
			
 
				-                
			
 
				+            if incoming_key not in labels_to_ignore:
			
 
				+                if incoming_key is not None:
			
 
				+                    # OEBB SPECIFIC IF STATEMENT
			
 
				+                    if type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys():
			
 
				+                            key = incoming_key + '_' + str(data_list[iteration]['stationsnummer'])
			
 
				+                    
			
 
				+                    elif type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys() and 'stage' in data_list[iteration].keys() :
			
 
				+                            key = incoming_key + '_' + str(data_list[iteration]['stationsnummer']) + '_' + str(data_list[iteration]['stage'])
			
 
				+                    
			
 
				+                    else:
			
 
				+                        key = incoming_key + '_' + str(iteration)
			
 
				                 else:
			
 
				-                    key = incoming_key + '_' + str(iteration)
			
 
				-            else:
			
 
				-                key = str(iteration)
			
 
				-            if type(temp_dataframe) == list:
			
 
				-                temp_result = self.flatten_list(temp_dataframe, key)
			
 
				+                    key = str(iteration)
			
 
				+                if type(temp_dataframe) == list:
			
 
				+                    temp_result = self.flatten_list(temp_dataframe, key, labels_to_ignore)
			
 
				 
			
 
				-            elif type(temp_dataframe) == dict:
			
 
				-                temp_result = self.flatten_dict(temp_dataframe, key)
			
 
				+                elif type(temp_dataframe) == dict:
			
 
				+                    temp_result = self.flatten_dict(temp_dataframe, key, labels_to_ignore)
			
 
				 
			
 
				-            else:
			
 
				-                result_dict[key] = temp_dataframe
			
 
				+                else:
			
 
				+                    result_dict[key] = temp_dataframe
			
 
				 
			
 
				-            if len(temp_result) > 0:
			
 
				-                result_dict = self.append_to_dict(result_dict, temp_result)
			
 
				+                if len(temp_result) > 0:
			
 
				+                    result_dict = self.append_to_dict(result_dict, temp_result)
			
 
				 
			
 
				         return result_dict