FlattenData.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Wed Oct 9 15:17:34 2019
  5. @author: oskar
  6. @description: Class which flattens nested Dataframes, Dictionaries and Lists into tabular form
  7. """
  8. import sys
  9. import os
  10. import time
  11. import pandas as pd
  12. import copy
  13. sys.path.append(os.getcwd())
  14. from libraries.log import Log
  15. log = Log("Flatten data")
  16. class FlattenData():
  17. def __init__(self):
  18. log.info('Flatten Data Initialized')
  19. def flatten(self, data):
  20. '''
  21. :parm data: data given in either dictionary, list or dataframe format.
  22. '''
  23. assert(isinstance(data, (list, dict, pd.DataFrame))),\
  24. "Parameter 'data' either be of List, Dictionary or DataFrame type"
  25. start = time.time()
  26. if type(data) is pd.DataFrame:
  27. return_data = self.flatten_dataframe(data)
  28. print(('Data has been flattened in {} seconds').format(time.time()-start))
  29. return return_data
  30. if type(data) is dict:
  31. return self.flatten_dict(data)
  32. if type(data) is list:
  33. return self.flatten_list(data)
  34. def flatten_dataframe(self, dataframe: pd.DataFrame, incoming_key: str = None):
  35. '''
  36. :param pd.Dataframe dataframe: dataframe containing the data to be flattened
  37. :param str incoming_key: string to be appended to the key
  38. '''
  39. assert(isinstance(dataframe, pd.DataFrame)),\
  40. "Parameter 'dataframe' be of DataFrame type"
  41. assert(isinstance(incoming_key, str)),\
  42. "Parameter 'incoming_key' be of String type"
  43. result_dict = {}
  44. for index, row in dataframe.iterrows():
  45. temp_result_dict = {}
  46. for key, value in row.iteritems():
  47. temp_result = {}
  48. if incoming_key is not None:
  49. key = incoming_key + '_' + key
  50. if type(value) == list:
  51. temp_result = self.flatten_list(value, key)
  52. elif type(value) == dict:
  53. temp_result = self.flatten_dict(value, key)
  54. else:
  55. temp_result_dict[key] = value
  56. if len(temp_result) > 0:
  57. result_dict = self.append_to_dict(result_dict, temp_result)
  58. result_dict[index] = copy.deepcopy(temp_result_dict)
  59. result_dataframe = pd.DataFrame.from_dict(result_dict, orient='index')
  60. return result_dataframe
  61. def flatten_dict(self, dictionary: dict, incoming_key: str = None):
  62. '''
  63. :param dict dictionary: dictionary containing the data to be flattened
  64. :param str incoming_key: string to be appended to the key
  65. '''
  66. assert(isinstance(dictionary, pd.DataFrame)),\
  67. "Parameter 'dictionary' be of Dictionary type"
  68. assert(isinstance(incoming_key, str)),\
  69. "Parameter 'incoming_key' be of String type"
  70. result_dict = {}
  71. for key in dictionary:
  72. temp_dataframe = dictionary[key]
  73. temp_result = {}
  74. if incoming_key is not None:
  75. key = incoming_key + '_' + key
  76. if type(temp_dataframe) == list:
  77. temp_result = self.flatten_list(temp_dataframe, key)
  78. elif type(temp_dataframe) == dict:
  79. temp_result = self.flatten_dict(temp_dataframe, key)
  80. else:
  81. result_dict[key] = temp_dataframe
  82. if len(temp_result) > 0:
  83. result_dict = self.append_to_dict(result_dict, temp_result)
  84. return result_dict
  85. def flatten_list(self, data_list: list, incoming_key: str = None):
  86. '''
  87. :param list data_list: list containing the data to be flattened
  88. :param str incoming_key: string to be appended to the key
  89. '''
  90. assert(isinstance(data_list, pd.DataFrame)),\
  91. "Parameter 'data_list' be of List type"
  92. assert(isinstance(incoming_key, str)),\
  93. "Parameter 'incoming_key' be of String type"
  94. result_dict = {}
  95. for iteration, item in enumerate(data_list):
  96. temp_dataframe = item
  97. temp_result = {}
  98. key = incoming_key
  99. if incoming_key is not None:
  100. if type(data_list[iteration]) is dict:
  101. if 'stationsnummer' in data_list[iteration].keys() and 'stage' in data_list[iteration].keys() :
  102. key = incoming_key + '_' + str(data_list[iteration]['stationsnummer']) + '_' + str(data_list[iteration]['stage'])
  103. else:
  104. key = incoming_key + '_' + str(iteration)
  105. if type(temp_dataframe) == list:
  106. temp_result = self.flatten_list(temp_dataframe, key)
  107. result_dict = self.append_to_dict(result_dict, temp_result)
  108. elif type(temp_dataframe) == dict:
  109. temp_result = self.flatten_dict(temp_dataframe, key)
  110. result_dict = self.append_to_dict(result_dict, temp_result)
  111. else:
  112. result_dict[key] = temp_dataframe
  113. if len(temp_result) > 0:
  114. result_dict = self.append_to_dict(result_dict, temp_result)
  115. return result_dict
  116. def append_to_dict(self, dictionary: dict, to_append):
  117. '''
  118. :param dict dictionary: dictionary which holds all the resulting data.
  119. :param dict to_append: data to be added to the resulting dictionary.
  120. '''
  121. assert(isinstance(dictionary, (list, dict))),\
  122. "Parameter 'dictionary' be of Dictionary type"
  123. assert(isinstance(to_append, dict)),\
  124. "Parameter 'to_append' be of Dictionary type"
  125. for key in to_append:
  126. dictionary[key] = to_append[key]
  127. return dictionary