#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Sep 30 10:11:55 2019 @author: tanya """ import pandas as pd import os import sys sys.path.append(os.getcwd()) from libraries.import_process_instances.CleanProcessTable import CleanProcessTable class CleanRs70(CleanProcessTable): ''' ''' def __init__(self): ''' ''' super().__init__( mapping_path=os.path.join(".", "migration_mappings", "rs70_mapping.json"), inconsist_report_table="inconsist_rs70", sort_columns=["radsatznummer", "eingabe_datum"], index_columns=["radsatznummer", "eingabe_datum"], log_name="CleanRs70") def filter_invalid_schadcode(self, data: pd.DataFrame) -> pd.DataFrame: ''' In the configuration we store a list of schadcodes assignment of which means that the product is scrap. No more schadcodes after this schadcode should be assigned. ''' from libraries.configuration import default as cfg self.error_column_abscence(columns=["radsatznummer", "schadcode"], data=data) data.sort_values(by=self._sort_columns, inplace=True) is_last_schadcode = (data["radsatznummer"] != data["radsatznummer"].shift(-1)) is_invalid = (~is_last_schadcode & data["schadcode"].isin(cfg.schrott_schadcodes)) data = self._filter_invalid_data( data=data, invalid_mask=is_invalid, reason="invalid schadcode") # XXX temporary here # data["eingabe_datum"] = pd.to_datetime(data["eingabe_datum"]) return data