ParseMapping.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Fri Sep 20 15:33:17 2019
  5. @author: tanya
  6. """
  7. import os
  8. import sys
  9. import numpy as np
  10. import json
  11. from cdplib.log import Log
  12. sys.path.append(os.getcwd())
  13. class ParseMapping:
  14. '''
  15. '''
  16. def __init__(self, mapping_paths: (str, list), log_name: str = "ParseMapping",
  17. source: str = "original_name", target: str = "mongo_name",
  18. target_collections: str = "mongo_collection"):
  19. '''
  20. '''
  21. self._log = Log('Parse Mapping')
  22. assert(isinstance(mapping_paths, (list, str))),\
  23. "Mapping_paths must be either str or lists"
  24. if isinstance(mapping_paths, str):
  25. mapping_paths = [mapping_paths]
  26. self._mapping_paths = mapping_paths
  27. self._source = source
  28. self._target = target
  29. self._target_collections = target_collections
  30. self._update_mapping()
  31. def _update_mapping(self):
  32. '''
  33. Since we can have multiple mappings per table we need to add them to
  34. the object. I concatenated the mapping so that we don't have to adjust
  35. all function of the class to accept also list input. The class could
  36. be adjusted to accept list or even a dictornary with the key name as
  37. name of the mapping and value the json mapping.
  38. !!! WARNING !!!!
  39. Since the mapping are just concatenated there is right now
  40. no way to ditinguish from the object itself which item belongs to which
  41. mapping file.
  42. '''
  43. mappings = []
  44. for mapping_path in self._mapping_paths:
  45. try:
  46. with open(mapping_path, "r") as f:
  47. mapping = json.load(f)
  48. mappings.append(mapping)
  49. except Exception as e:
  50. err = ("Could not load json schema:{1} , "
  51. "Obtained error {0}".format(e, mapping_path))
  52. self._log.error(err)
  53. raise Exception(err)
  54. if len(mappings) > 1:
  55. concatenate_mapping = []
  56. for mapping in mappings:
  57. if not concatenate_mapping:
  58. concatenate_mapping = mapping
  59. else:
  60. concatenate_mapping.extend(mapping)
  61. self._mapping = concatenate_mapping
  62. else:
  63. self._mapping = mappings[0]
  64. def get_field_mapping(self) -> dict:
  65. '''
  66. '''
  67. assert(all([set([self._source, self._target]) <= set(d)
  68. for d in self._mapping]))
  69. return {d[self._source]: d[self._target] for d in self._mapping}
  70. def _get_fields_satistisfying_condition(self, key: str, value) -> list:
  71. '''
  72. '''
  73. assert(all([self._source in d for d in self._mapping])),\
  74. "Invalid from field"
  75. return [d[self._source] for d in self._mapping
  76. if (key in d) and (value in d[key])]
  77. def get_required_fields(self) -> list:
  78. '''
  79. '''
  80. return self._get_fields_satistisfying_condition(key="required",
  81. value=1)
  82. def get_date_fields(self) -> list:
  83. '''
  84. '''
  85. return self._get_fields_satistisfying_condition(key="type",
  86. value="Date")
  87. def get_fields_restricted_to_collection(self, collection_name: str) -> list:
  88. '''
  89. '''
  90. return self._get_fields_satistisfying_condition(key=self._target_collections,
  91. value=collection_name)
  92. def _get_property_from_mapping(self, property_names: list) -> dict:
  93. '''
  94. Get specified property names from migration mapping json.
  95. '''
  96. assert(isinstance(property_names,list)),\
  97. "Parameter 'property_names' is not a list"
  98. assert(all([self._source in d for d in self._mapping])),\
  99. "Not all objects in the mapping json contain property tag " + self._source
  100. result = {}
  101. for column_mapping in self._mapping:
  102. for property_name in property_names:
  103. if property_name in column_mapping and column_mapping[property_name]:
  104. result.update({column_mapping[self._source]: column_mapping[property_name]})
  105. return result
  106. def get_default_values(self) -> dict:
  107. '''
  108. Get default values from migration mapping json. If more peorerty names
  109. are beeing added also add them in the unit test.
  110. '''
  111. standard_default_names=["default_values"]
  112. return self._get_property_from_mapping(standard_default_names)
  113. def get_types(self) -> dict:
  114. '''
  115. Get type from migration mapping json. If more peorerty names
  116. are beeing added also add them in the unit test.
  117. '''
  118. standard_type_names=["type"]
  119. return self._get_property_from_mapping(standard_type_names)
  120. def get_value_mappings(self) -> dict:
  121. '''
  122. Get type from migration mapping json. If more peorerty names
  123. are beeing added also add them in the unit test.
  124. '''
  125. standard_value_mapping_names = ["value_mapping"]
  126. return self._get_property_from_mapping(standard_value_mapping_names)
  127. def get_date_formats(self) -> dict:
  128. '''
  129. Get date fromats from migration mapping json. If more peorerty names
  130. are beeing added or value also add them in the unit test.
  131. '''
  132. standard_date_format_names = ["date_format"]
  133. return self._get_property_from_mapping(standard_date_format_names)
  134. def get_internal_names(self) -> dict:
  135. '''
  136. '''
  137. if all(["internal_name" in d for d in self._mapping]):
  138. internal_names = [d["internal_name"] for d in self._mapping]
  139. elif all(["internal_name" not in d for d in self._mapping]):
  140. internal_names = list(range(len(self._mapping)))
  141. else:
  142. err = ("Incorrectly filled mapping. Internal names should "
  143. "either be in all or in neither of the fields")
  144. self._log.error(err)
  145. raise Exception(err)
  146. return internal_names
  147. def get_mongo_names(self) -> dict:
  148. '''
  149. '''
  150. if all(["mongo_name" in d for d in self._mapping]):
  151. mongo_names = [d["mongo_name"] for d in self._mapping]
  152. elif all(["mongo_name" not in d for d in self._mapping]):
  153. mongo_names = list(range(len(self._mapping)))
  154. else:
  155. err = ("Incorrectly filled mapping. Mongo names should "
  156. "either be in all or in neither of the fields")
  157. self._log.error(err)
  158. raise Exception(err)
  159. return mongo_names
  160. def get_python_types(self) -> dict:
  161. '''
  162. '''
  163. sql_to_python_dtypes = {
  164. "Text": str,
  165. "Date": np.dtype('<M8[ns]'),
  166. "Double": float,
  167. "Integer": int
  168. }
  169. sql_types = self.get_types()
  170. return {k: sql_to_python_dtypes[v] for k, v in sql_types.items()}
  171. def get_column_numbers(self) -> list:
  172. '''
  173. '''
  174. if all(["column_number" in d for d in self._mapping]):
  175. column_numbers = [d["column_number"] for d in self._mapping]
  176. elif all(["column_number" not in d for d in self._mapping]):
  177. column_numbers = list(range(len(self._mapping)))
  178. else:
  179. err = ("Incorrectly filled mapping. Column numbers should ",
  180. "either in all or in neither of the fields")
  181. self._log.err(err)
  182. raise Exception(err)
  183. return column_numbers
  184. if __name__ == "__main__":
  185. mapping_path = os.path.join(".", "migration_mappings", "unit_test_migration_mapping.json")
  186. if os.path.isfile(mapping_path):
  187. print("found mapping path")
  188. parser = ParseMapping(mapping_path, source="internal_name",
  189. target="mongo_name")
  190. default_values = parser.get_default_values()
  191. print(default_values)
  192. date_formats = parser.get_date_formats()
  193. print(date_formats)
  194. mongo_names = parser.get_mongo_names()
  195. print(mongo_names)
  196. types = parser.get_types()
  197. print(types)
  198. column_numbers = parser.get_column_numbers()
  199. print(column_numbers)
  200. value_mappings = parser.get_value_mappings()
  201. print(value_mappings)
  202. date_formats = parser.get_date_formats()
  203. print(date_formats)
  204. print("Done testing!")