|
@@ -29,24 +29,29 @@ class FlattenData():
|
|
"Parameter 'data' either be of List, Dictionary or DataFrame type"
|
|
"Parameter 'data' either be of List, Dictionary or DataFrame type"
|
|
in_length=0
|
|
in_length=0
|
|
start = time.time()
|
|
start = time.time()
|
|
|
|
+ index_name=None
|
|
if type(data) is pd.DataFrame:
|
|
if type(data) is pd.DataFrame:
|
|
in_length = len(data.columns)
|
|
in_length = len(data.columns)
|
|
- return_data = self.flatten_dataframe(data, labels_to_ignore)
|
|
|
|
|
|
+ index_name = data.index.name
|
|
|
|
+ return_data = self.flatten_dataframe(data, labels_to_ignore=labels_to_ignore)
|
|
|
|
+
|
|
elif type(data) is pd.Series:
|
|
elif type(data) is pd.Series:
|
|
data = pd.DataFrame(data)
|
|
data = pd.DataFrame(data)
|
|
in_length = len(data.columns)
|
|
in_length = len(data.columns)
|
|
- return_data = self.flatten_dataframe(data, labels_to_ignore)
|
|
|
|
|
|
+ return_data = self.flatten_dataframe(data, labels_to_ignore=labels_to_ignore)
|
|
elif type(data) is dict:
|
|
elif type(data) is dict:
|
|
in_length = len(data)
|
|
in_length = len(data)
|
|
- return_data = self.flatten_dict(data, labels_to_ignore)
|
|
|
|
|
|
+ return_data = self.flatten_dict(data, labels_to_ignore=labels_to_ignore)
|
|
elif type(data) is list:
|
|
elif type(data) is list:
|
|
in_length = len(data)
|
|
in_length = len(data)
|
|
- return_data = self.flatten_list(data, labels_to_ignore)
|
|
|
|
|
|
+ return_data = self.flatten_list(data, labels_to_ignore=labels_to_ignore)
|
|
else:
|
|
else:
|
|
self._log.log_and_raise_warning(("Input data type '{}' is not supported").format(type(data)))
|
|
self._log.log_and_raise_warning(("Input data type '{}' is not supported").format(type(data)))
|
|
return None
|
|
return None
|
|
|
|
|
|
result_dataframe = pd.DataFrame.from_dict(return_data, orient='index')
|
|
result_dataframe = pd.DataFrame.from_dict(return_data, orient='index')
|
|
|
|
+ if index_name is not None:
|
|
|
|
+ result_dataframe.index.name = index_name
|
|
self._log.info(('Data has been flattened, created {} columns in {} seconds').format(len(result_dataframe.columns)- in_length, time.time()-start))
|
|
self._log.info(('Data has been flattened, created {} columns in {} seconds').format(len(result_dataframe.columns)- in_length, time.time()-start))
|
|
return result_dataframe
|
|
return result_dataframe
|
|
|
|
|
|
@@ -66,13 +71,15 @@ class FlattenData():
|
|
for index, row in dataframe.iterrows():
|
|
for index, row in dataframe.iterrows():
|
|
temp_result_dict = {}
|
|
temp_result_dict = {}
|
|
for key, value in row.iteritems():
|
|
for key, value in row.iteritems():
|
|
- print('key:', key)
|
|
|
|
- print('value:', value)
|
|
|
|
- print('labels_to_ignore:', labels_to_ignore)
|
|
|
|
- if key not in labels_to_ignore:
|
|
|
|
- temp_result = {}
|
|
|
|
- if incoming_key is not None:
|
|
|
|
- key = incoming_key + '_' + key
|
|
|
|
|
|
+ small_key = key
|
|
|
|
+ if incoming_key is not None:
|
|
|
|
+ key = incoming_key + '_' + key
|
|
|
|
+ temp_result = {}
|
|
|
|
+
|
|
|
|
+ if small_key in labels_to_ignore:
|
|
|
|
+ temp_result_dict[key] = value
|
|
|
|
+
|
|
|
|
+ else:
|
|
if type(value) == list:
|
|
if type(value) == list:
|
|
temp_result = self.flatten_list(value, key, labels_to_ignore)
|
|
temp_result = self.flatten_list(value, key, labels_to_ignore)
|
|
elif type(value) == dict:
|
|
elif type(value) == dict:
|
|
@@ -80,9 +87,6 @@ class FlattenData():
|
|
else:
|
|
else:
|
|
temp_result_dict[key] = value
|
|
temp_result_dict[key] = value
|
|
|
|
|
|
- else:
|
|
|
|
- temp_result_dict[key] = value
|
|
|
|
-
|
|
|
|
if len(temp_result) > 0:
|
|
if len(temp_result) > 0:
|
|
temp_result_dict = self.append_to_dict(temp_result_dict, temp_result)
|
|
temp_result_dict = self.append_to_dict(temp_result_dict, temp_result)
|
|
|
|
|
|
@@ -104,19 +108,21 @@ class FlattenData():
|
|
|
|
|
|
result_dict = {}
|
|
result_dict = {}
|
|
for key in dictionary:
|
|
for key in dictionary:
|
|
- if key not in labels_to_ignore:
|
|
|
|
- temp_dataframe = dictionary[key]
|
|
|
|
- temp_result = {}
|
|
|
|
- if incoming_key is not None:
|
|
|
|
- key = incoming_key + '_' + key
|
|
|
|
- if type(temp_dataframe) == list:
|
|
|
|
- temp_result = self.flatten_list(temp_dataframe, key, labels_to_ignore)
|
|
|
|
- elif type(temp_dataframe) == dict:
|
|
|
|
- temp_result = self.flatten_dict(temp_dataframe, key, labels_to_ignore)
|
|
|
|
- else:
|
|
|
|
- result_dict[key] = temp_dataframe
|
|
|
|
|
|
+ small_key = key
|
|
|
|
+
|
|
|
|
+ temp_data = dictionary[key]
|
|
|
|
+ if incoming_key is not None:
|
|
|
|
+ key = incoming_key + '_' + key
|
|
|
|
+ temp_result = {}
|
|
|
|
+ if small_key in labels_to_ignore:
|
|
|
|
+ result_dict[key] = temp_data
|
|
else:
|
|
else:
|
|
- result_dict[key] = temp_dataframe
|
|
|
|
|
|
+ if type(temp_data) == list:
|
|
|
|
+ temp_result = self.flatten_list(temp_data, key, labels_to_ignore)
|
|
|
|
+ elif type(temp_data) == dict:
|
|
|
|
+ temp_result = self.flatten_dict(temp_data, key, labels_to_ignore)
|
|
|
|
+ else:
|
|
|
|
+ result_dict[key] = temp_data
|
|
|
|
|
|
if len(temp_result) > 0:
|
|
if len(temp_result) > 0:
|
|
result_dict = self.append_to_dict(result_dict, temp_result)
|
|
result_dict = self.append_to_dict(result_dict, temp_result)
|
|
@@ -187,7 +193,7 @@ class FlattenData():
|
|
|
|
|
|
for data_type in data.dtypes:
|
|
for data_type in data.dtypes:
|
|
if data_type == object:
|
|
if data_type == object:
|
|
- return self.flatten(data, labels_to_ignore)
|
|
|
|
|
|
+ return self.flatten(data, labels_to_ignore=labels_to_ignore)
|
|
|
|
|
|
return data
|
|
return data
|
|
|
|
|