#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Sep 30 10:06:48 2019 @author: tanya """ import pandas as pd import os import sys sys.path.append(os.getcwd()) from libraries.import_process_instances.CleanProcessTable import CleanProcessTable class CleanRs2(CleanProcessTable): ''' ''' def __init__(self): ''' ''' super().__init__( mapping_path=os.path.join(".", "migration_mappings", "rs2_mapping.json"), inconsist_report_table="inconsist_rs2", sort_columns=["radsatznummer", "ende_der_bearbeitung"], index_columns=["radsatznummer", "positionsnummer"], log_name="CleanRs2") def filter_invalid_ende_der_bearbeitung(self, data: pd.DataFrame ) -> pd.DataFrame: ''' We filter out all the rows that have missing ende_der_bearbeitung, it means that the activities we planned, but not executed. ''' self.error_column_abscence(columns=["radsatznummer", "ende_der_bearbeitung"], data=data) is_invalid = (data["ende_der_bearbeitung"].isnull()) data = self._filter_invalid_data( data=data, invalid_mask=is_invalid, reason="invalid ende der bearbeitung") data["ende_der_bearbeitung"] =\ pd.to_datetime(data["ende_der_bearbeitung"]) return data def filter_invalid_taetigkeitsname(self, data: pd.DataFrame ) -> pd.DataFrame: ''' In the configuration we store a list of activities execution of which means that the wheel-set is scrap. After execution of this activities the process history should end. ''' from libraries.configuration import default as cfg self.error_column_abscence(columns=["radsatznummer", "taetigkeitsname"], data=data) data.sort_values(by=self._sort_columns, inplace=True) is_last_station = ( data["radsatznummer"] != data["radsatznummer"].shift(-1)) is_invalid = ( ~is_last_station & (data["taetigkeitsname"].isin(cfg.schrott_taetigkeiten))) data = self._filter_invalid_data( data=data, invalid_mask=is_invalid, reason="invalid taetigkeit") return data