CleanRs2.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Created on Mon Sep 30 10:06:48 2019
  5. @author: tanya
  6. """
  7. import pandas as pd
  8. import os
  9. import sys
  10. sys.path.append(os.getcwd())
  11. from libraries.import_process_instances.CleanProcessTable import CleanProcessTable
  12. class CleanRs2(CleanProcessTable):
  13. '''
  14. '''
  15. def __init__(self):
  16. '''
  17. '''
  18. super().__init__(
  19. mapping_path=os.path.join(".", "migration_mappings",
  20. "rs2_mapping.json"),
  21. inconsist_report_table="inconsist_rs2",
  22. sort_columns=["radsatznummer", "ende_der_bearbeitung"],
  23. index_columns=["radsatznummer", "positionsnummer"],
  24. log_name="CleanRs2")
  25. def filter_invalid_ende_der_bearbeitung(self, data: pd.DataFrame
  26. ) -> pd.DataFrame:
  27. '''
  28. We filter out all the rows that have missing ende_der_bearbeitung,
  29. it means that the activities we planned, but not executed.
  30. '''
  31. self.error_column_abscence(columns=["radsatznummer",
  32. "ende_der_bearbeitung"],
  33. data=data)
  34. is_invalid = (data["ende_der_bearbeitung"].isnull())
  35. data = self._filter_invalid_data(
  36. data=data,
  37. invalid_mask=is_invalid,
  38. reason="invalid ende der bearbeitung")
  39. data["ende_der_bearbeitung"] =\
  40. pd.to_datetime(data["ende_der_bearbeitung"])
  41. return data
  42. def filter_invalid_taetigkeitsname(self, data: pd.DataFrame
  43. ) -> pd.DataFrame:
  44. '''
  45. In the configuration we store a list of activities
  46. execution of which means that the wheel-set is scrap.
  47. After execution of this activities the process history should end.
  48. '''
  49. from libraries.configuration import default as cfg
  50. self.error_column_abscence(columns=["radsatznummer",
  51. "taetigkeitsname"],
  52. data=data)
  53. data.sort_values(by=self._sort_columns, inplace=True)
  54. is_last_station = (
  55. data["radsatznummer"] !=
  56. data["radsatznummer"].shift(-1))
  57. is_invalid = (
  58. ~is_last_station &
  59. (data["taetigkeitsname"].isin(cfg.schrott_taetigkeiten)))
  60. data = self._filter_invalid_data(
  61. data=data,
  62. invalid_mask=is_invalid,
  63. reason="invalid taetigkeit")
  64. return data