CleanRs0.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Sep 30 10:14:46 2019
  5. @author: tanya
  6. """
  7. import pandas as pd
  8. import os
  9. import sys
  10. sys.path.append(os.getcwd())
  11. from libraries.import_process_instances.CleanProcessTable import CleanTable
  12. class CleanRs0(CleanTable):
  13. '''
  14. '''
  15. def __init__(self):
  16. '''
  17. '''
  18. super().__init__(
  19. mapping_path=os.path.join(".", "migration_mappings",
  20. "rs0_mapping.json"),
  21. inconsist_report_table="inconsist_rs0",
  22. filter_index_columns=["radsatznummer"],
  23. sort_columns=["radsatznummer", "eingabe_datum"],
  24. index_columns=["radsatznummer", "eingabe_datum"],
  25. log_name="CleanRs0:")
  26. def restrict_to_process_data(self, data: pd.DataFrame) -> pd.DataFrame:
  27. '''
  28. '''
  29. process_columns = ["radsatznummer", "aufarbeitungstyp", "ihs",
  30. "befundung_code_1", "befundung_code_2",
  31. "befundung_code_3"]
  32. self.error_column_abscence(columns=process_columns,
  33. data=data)
  34. return data[process_columns]
  35. def add_ist_schrott(self, data: pd.DataFrame) -> pd.DataFrame:
  36. '''
  37. '''
  38. mongo_name = "final_state.ist_schrott"
  39. self.error_column_abscence(columns=["aufarbeitungstyp"],
  40. data=data)
  41. data[mongo_name] = (data["aufarbeitungstyp"] == 2)
  42. return data
  43. def restrict_to_meta_data(self, data: pd.DataFrame) -> pd.DataFrame:
  44. '''
  45. '''
  46. meta_columns = [c for c in data.columns if c not in
  47. ["aufarbeitungstyp", "ihs",
  48. "befundung_code_1", "befundung_code_2",
  49. "befundung_code_3"]]
  50. self.error_column_abscence(columns=meta_columns,
  51. data=data)
  52. return data[meta_columns]
  53. def filter_invalid_metacolumns(self, data: pd.DataFrame,
  54. metacolumns: list = None) -> pd.DataFrame:
  55. '''
  56. '''
  57. if metacolumns is None:
  58. metacolumns = ["wellentype", "Lagerbauart", "tauschgruppe"]
  59. for column in metacolumns:
  60. invalid_mask = data[column].isnull()
  61. reason = "Missing {}".format(column)
  62. data = self._filter_invalid_data(invalid_mask=invalid_mask,
  63. reason=reason,
  64. data=data)
  65. return data