|
@@ -20,7 +20,7 @@ class FlattenData():
|
|
|
def __init__(self):
|
|
|
self._log = Log("Flatten data")
|
|
|
|
|
|
- def flatten(self, data) -> pd.DataFrame():
|
|
|
+ def flatten(self, data, labels_to_ignore: list = None) -> pd.DataFrame():
|
|
|
'''
|
|
|
:parm data: data given in either dictionary, list or dataframe format.
|
|
|
'''
|
|
@@ -31,17 +31,17 @@ class FlattenData():
|
|
|
start = time.time()
|
|
|
if type(data) is pd.DataFrame:
|
|
|
in_length = len(data.columns)
|
|
|
- return_data = self.flatten_dataframe(data)
|
|
|
+ return_data = self.flatten_dataframe(data, labels_to_ignore)
|
|
|
elif type(data) is pd.Series:
|
|
|
data = pd.DataFrame(data)
|
|
|
in_length = len(data.columns)
|
|
|
- return_data = self.flatten_dataframe(data)
|
|
|
+ return_data = self.flatten_dataframe(data, labels_to_ignore)
|
|
|
elif type(data) is dict:
|
|
|
in_length = len(data)
|
|
|
- return_data = self.flatten_dict(data)
|
|
|
+ return_data = self.flatten_dict(data, labels_to_ignore)
|
|
|
elif type(data) is list:
|
|
|
in_length = len(data)
|
|
|
- return_data = self.flatten_list(data)
|
|
|
+ return_data = self.flatten_list(data, labels_to_ignore)
|
|
|
else:
|
|
|
self._log.log_and_raise_warning(("Input data type '{}' is not supported").format(type(data)))
|
|
|
return None
|
|
@@ -50,7 +50,7 @@ class FlattenData():
|
|
|
self._log.info(('Data has been flattened, created {} columns in {} seconds').format(len(result_dataframe.columns)- in_length, time.time()-start))
|
|
|
return result_dataframe
|
|
|
|
|
|
- def flatten_dataframe(self, dataframe: pd.DataFrame, incoming_key: str = None):
|
|
|
+ def flatten_dataframe(self, dataframe: pd.DataFrame, incoming_key: str = None, labels_to_ignore: list = None):
|
|
|
'''
|
|
|
:param pd.Dataframe dataframe: dataframe containing the data to be flattened
|
|
|
:param str incoming_key: string to be appended to the key
|
|
@@ -62,27 +62,28 @@ class FlattenData():
|
|
|
"Parameter 'incoming_key' be of String type"
|
|
|
|
|
|
result_dict = {}
|
|
|
- for index, row in dataframe.iterrows():
|
|
|
- temp_result_dict = {}
|
|
|
- for key, value in row.iteritems():
|
|
|
- temp_result = {}
|
|
|
- if incoming_key is not None:
|
|
|
- key = incoming_key + '_' + key
|
|
|
- if type(value) == list:
|
|
|
- temp_result = self.flatten_list(value, key)
|
|
|
- elif type(value) == dict:
|
|
|
- temp_result = self.flatten_dict(value, key)
|
|
|
- else:
|
|
|
- temp_result_dict[key] = value
|
|
|
-
|
|
|
- if len(temp_result) > 0:
|
|
|
- temp_result_dict = self.append_to_dict(temp_result_dict, temp_result)
|
|
|
-
|
|
|
- result_dict[index] = copy.deepcopy(temp_result_dict)
|
|
|
+ if incoming_key not in labels_to_ignore:
|
|
|
+ for index, row in dataframe.iterrows():
|
|
|
+ temp_result_dict = {}
|
|
|
+ for key, value in row.iteritems():
|
|
|
+ temp_result = {}
|
|
|
+ if incoming_key is not None:
|
|
|
+ key = incoming_key + '_' + key
|
|
|
+ if type(value) == list:
|
|
|
+ temp_result = self.flatten_list(value, key, labels_to_ignore)
|
|
|
+ elif type(value) == dict:
|
|
|
+ temp_result = self.flatten_dict(value, key, labels_to_ignore)
|
|
|
+ else:
|
|
|
+ temp_result_dict[key] = value
|
|
|
+
|
|
|
+ if len(temp_result) > 0:
|
|
|
+ temp_result_dict = self.append_to_dict(temp_result_dict, temp_result)
|
|
|
+
|
|
|
+ result_dict[index] = copy.deepcopy(temp_result_dict)
|
|
|
|
|
|
return result_dict
|
|
|
|
|
|
- def flatten_dict(self, dictionary: dict, incoming_key: str = None):
|
|
|
+ def flatten_dict(self, dictionary: dict, incoming_key: str = None, labels_to_ignore: list = None):
|
|
|
'''
|
|
|
:param dict dictionary: dictionary containing the data to be flattened
|
|
|
:param str incoming_key: string to be appended to the key
|
|
@@ -95,25 +96,26 @@ class FlattenData():
|
|
|
|
|
|
|
|
|
result_dict = {}
|
|
|
- for key in dictionary:
|
|
|
+ if incoming_key not in labels_to_ignore:
|
|
|
+ for key in dictionary:
|
|
|
|
|
|
- temp_dataframe = dictionary[key]
|
|
|
- temp_result = {}
|
|
|
- if incoming_key is not None:
|
|
|
- key = incoming_key + '_' + key
|
|
|
- if type(temp_dataframe) == list:
|
|
|
- temp_result = self.flatten_list(temp_dataframe, key)
|
|
|
- elif type(temp_dataframe) == dict:
|
|
|
- temp_result = self.flatten_dict(temp_dataframe, key)
|
|
|
- else:
|
|
|
- result_dict[key] = temp_dataframe
|
|
|
-
|
|
|
- if len(temp_result) > 0:
|
|
|
- result_dict = self.append_to_dict(result_dict, temp_result)
|
|
|
+ temp_dataframe = dictionary[key]
|
|
|
+ temp_result = {}
|
|
|
+ if incoming_key is not None:
|
|
|
+ key = incoming_key + '_' + key
|
|
|
+ if type(temp_dataframe) == list:
|
|
|
+ temp_result = self.flatten_list(temp_dataframe, key, labels_to_ignore)
|
|
|
+ elif type(temp_dataframe) == dict:
|
|
|
+ temp_result = self.flatten_dict(temp_dataframe, key, labels_to_ignore)
|
|
|
+ else:
|
|
|
+ result_dict[key] = temp_dataframe
|
|
|
+
|
|
|
+ if len(temp_result) > 0:
|
|
|
+ result_dict = self.append_to_dict(result_dict, temp_result)
|
|
|
|
|
|
return result_dict
|
|
|
|
|
|
- def flatten_list(self, data_list: list, incoming_key: str = None):
|
|
|
+ def flatten_list(self, data_list: list, incoming_key: str = None, labels_to_ignore: list = None):
|
|
|
'''
|
|
|
:param list data_list: list containing the data to be flattened
|
|
|
:param str incoming_key: string to be appended to the key
|
|
@@ -131,29 +133,30 @@ class FlattenData():
|
|
|
temp_dataframe = item
|
|
|
temp_result = {}
|
|
|
key = incoming_key
|
|
|
- if incoming_key is not None:
|
|
|
- # OEBB SPECIFIC IF STATEMENT
|
|
|
- if type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys():
|
|
|
- key = incoming_key + '_' + str(data_list[iteration]['stationsnummer'])
|
|
|
-
|
|
|
- elif type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys() and 'stage' in data_list[iteration].keys() :
|
|
|
- key = incoming_key + '_' + str(data_list[iteration]['stationsnummer']) + '_' + str(data_list[iteration]['stage'])
|
|
|
-
|
|
|
+ if incoming_key not in labels_to_ignore:
|
|
|
+ if incoming_key is not None:
|
|
|
+ # OEBB SPECIFIC IF STATEMENT
|
|
|
+ if type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys():
|
|
|
+ key = incoming_key + '_' + str(data_list[iteration]['stationsnummer'])
|
|
|
+
|
|
|
+ elif type(data_list[iteration]) is dict and 'stationsnummer' in data_list[iteration].keys() and 'stage' in data_list[iteration].keys() :
|
|
|
+ key = incoming_key + '_' + str(data_list[iteration]['stationsnummer']) + '_' + str(data_list[iteration]['stage'])
|
|
|
+
|
|
|
+ else:
|
|
|
+ key = incoming_key + '_' + str(iteration)
|
|
|
else:
|
|
|
- key = incoming_key + '_' + str(iteration)
|
|
|
- else:
|
|
|
- key = str(iteration)
|
|
|
- if type(temp_dataframe) == list:
|
|
|
- temp_result = self.flatten_list(temp_dataframe, key)
|
|
|
+ key = str(iteration)
|
|
|
+ if type(temp_dataframe) == list:
|
|
|
+ temp_result = self.flatten_list(temp_dataframe, key, labels_to_ignore)
|
|
|
|
|
|
- elif type(temp_dataframe) == dict:
|
|
|
- temp_result = self.flatten_dict(temp_dataframe, key)
|
|
|
+ elif type(temp_dataframe) == dict:
|
|
|
+ temp_result = self.flatten_dict(temp_dataframe, key, labels_to_ignore)
|
|
|
|
|
|
- else:
|
|
|
- result_dict[key] = temp_dataframe
|
|
|
+ else:
|
|
|
+ result_dict[key] = temp_dataframe
|
|
|
|
|
|
- if len(temp_result) > 0:
|
|
|
- result_dict = self.append_to_dict(result_dict, temp_result)
|
|
|
+ if len(temp_result) > 0:
|
|
|
+ result_dict = self.append_to_dict(result_dict, temp_result)
|
|
|
|
|
|
return result_dict
|
|
|
|