Ver código fonte

removed oebb libraries

tanja 4 anos atrás
pai
commit
636e15e163

+ 0 - 178
cdplib/SimplifiedProcessModel.py

@@ -1,178 +0,0 @@
-
-import sys
-import os
-import time
-sys.path.append(os.getcwd())
-from cdplib.log import Log
-log = Log("Simplified Process Model")
-
-
-class SimplifiedProcessModel:
-
-    def __init__(self):
-        self._log = Log("Simplified Process Model")
-
-    @property
-    def process_stages(self):
-        '''
-        For machine learning predictions we divide the process into
-        big stages, stages can be skipped dependeing on the IHS of the
-        wheelset. We use all information geathered during the previous
-        process stages to make predictions for the next stage.
-        '''
-        import networkx as nx
-        '''
-        critical_stations = {"A": [421, 110]}
-
-        critical_stations["B"] = [130]
-
-        critical_stations["C"] = [140, 150]
-
-        critical_stations["D"] = [410]
-
-        critical_stations["E"] = [510, 520, 535,
-                                530, 550]
-
-        critical_stations["F"] = [490, 480, 430, 170]
-
-        critical_stations["G"] = [595, 190, 630]
-
-        critical_stations["H"] = [640]
-
-        critical_stations["I"] = [650, 560]
-
-        critical_stations["J"] = [675]
-
-        critical_stations["K"] = [690]
-
-        critical_stations["L"] = [710, 670]
-
-        stages_graph = nx.DiGraph()
-
-        for stage in critical_stations:
-            stages_graph.add_node(stage, stations=critical_stations[stage])
-
-        stages_graph.add_edge("A", "B")
-        stages_graph.add_edge("B", "C")
-        stages_graph.add_edge("C", "D")
-        stages_graph.add_edge("D", "E")
-        stages_graph.add_edge("D", "F")
-        stages_graph.add_edge("E", "G")
-        stages_graph.add_edge("F", "G")
-        stages_graph.add_edge("G", "H")
-        stages_graph.add_edge("H", "I")
-        stages_graph.add_edge("I", "J")
-        stages_graph.add_edge("J", "K")
-        stages_graph.add_edge("K", "L")
-        '''
-
-        critical_stations = {"A": [421, 110]}
-
-        critical_stations["B"] = [130]
-
-        critical_stations["C"] = [140, 150]
-
-        critical_stations["D"] = [410]
-
-        critical_stations["E"] = [510, 520, 535,
-                                530, 320, 550]
-
-        #critical_stations["F"] = [490, 480, 430, 595]
-
-        #critical_stations["G"] = [170, 506, 430]
-
-        # Merge the two above since they both contain station 430
-        critical_stations["F"] = [490, 480, 430, 595, 170, 506, 430]
-
-        critical_stations["H"] = [190]
-
-        critical_stations["I"] = [320, 340, 630]
-
-        #This path is in parallel with the rest of the paths. handle as an exception
-        #critical_stations["J"] = [680]
-
-        critical_stations["K"] = [640]
-
-        stages_graph = nx.DiGraph()
-        for stage in critical_stations:
-            stages_graph.add_node(node_for_adding=stage, stations=critical_stations[stage])
-
-        stages_graph.add_edge("A", "B")
-
-        stages_graph.add_edge("B", "C")
-        stages_graph.add_edge("B", "K")
-        #stages_graph.add_edge("B", "J")
-
-        stages_graph.add_edge("C", "D")
-
-        stages_graph.add_edge("D", "E")
-        stages_graph.add_edge("D", "F")
-        #stages_graph.add_edge("D", "G")
-        stages_graph.add_edge("D", "H")
-
-        stages_graph.add_edge("E", "H")
-
-        #stages_graph.add_edge("F", "G")
-
-        #stages_graph.add_edge("G", "H")
-
-        stages_graph.add_edge("E", "I")
-        stages_graph.add_edge("H", "I")
-
-        #stages_graph.add_edge("J", "K")
-
-        stages_graph.add_edge("I", "K")
-
-        return stages_graph
-
-    def process_stages_paths(self):
-        import networkx as nx
-        result_graph = self.process_stages
-
-        result_dict = {}
-        stage_list = result_graph.nodes()
-        for stage in stage_list:
-            for path in nx.all_simple_paths(result_graph, source="A", target=stage):
-                station_list = []
-                for stage in path:
-                        for station in result_graph.nodes()[stage]['stations']:
-                            station_list.append(station)
-                result_dict[str(path)] = station_list
-        return result_dict
-
-    def get_stage(self, station_nummer: int):
-        import networkx as nx
-        result_graph = self.process_stages
-        for path in nx.all_simple_paths(result_graph, source="A", target="K"):
-            station_list = []
-            for stage in path:
-                    for station in result_graph.nodes()[stage]['stations']:
-                        if station == station_nummer:
-                            return stage
-        return
-
-    def calculate_stage_path(self, stations_list: list):
-        position_list = list(self.process_stages.nodes())
-        current_index = 0
-        stage_path = []
-        for station in stations_list:
-            stage = self.get_stage(station)
-            if stage is not None:
-                new_index = position_list.index(stage)
-                if current_index <= new_index:
-                    if stage not in stage_path:
-                        stage_path.append(stage)
-                        current_index = new_index
-                else:
-                    self._log.warning('Path does not conform to the model, no prediction can be made')
-                    return
-        return str(stage_path)
-
-    def get_stations_for_prediction_model(self, stations_list):
-        stages_paths = self.process_stages_paths()
-        prediction_stations = stages_paths[self.calculate_stage_path(stations_list)]
-
-        # Hack solution for handling that station 680 is running in parallel with most other stations
-        if len(prediction_stations) > 3:
-            prediction_stations.append(680)
-        return prediction_stations

+ 0 - 63
cdplib/data_cleaning/DataCleaningUtils.py

@@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-"""
-Created on Fri Sep 27 16:20:03 2019
-
-@author: tanya
-"""
-
-import pandas as pd
-import numpy as np
-
-
-class CleaningUtils:
-    '''
-    '''
-    def convert_dates(series: pd.Series, formats: (str, list)) -> pd.Series:
-        '''
-        '''
-        formats = list(formats)
-
-        converted = pd.Series([pd.to_datetime(np.nan)]*len(series))
-
-        for formt in formats:
-            if formt == "%d%m%Y":
-                missing_leading_zero = (series.astype(str).str.len() == 7)
-
-                series = series.astype(str)
-
-                series.loc[missing_leading_zero] = "0" +\
-                    series.loc[missing_leading_zero]
-
-            converted_this_format = pd.to_datetime(series,
-                                                   format=formt,
-                                                   errors="coerce")
-
-            converted.fillna(converted_this_format, inplace=True)
-
-        return converted
-
-    def standarize_writing(self, s: str, to_lowercase: bool = True):
-        '''
-        '''
-        import re
-
-        german_character_mapping = {"ß": "ss",
-                                    "ü": "ue",
-                                    "Ü": "Ue",
-                                    "ä": "ae",
-                                    "Ä": "Ae",
-                                    "ö": "oe",
-                                    "Ö": "Oe"}
-
-        s = s.encode('raw_unicode_escape').decode('raw_unicode_escape')
-        for char, correct_char in german_character_mapping.items():
-            s = s.replace(char, correct_char)
-
-        if to_lowercase:
-            s = s.lower()
-
-        s = re.sub('[^0-9a-zA-Z]+', '_', s).lstrip("_").rstrip("_")
-
-        return s
-

+ 0 - 1
cdplib/data_cleaning/__init__.py

@@ -1 +0,0 @@
-from .DataCleaningUtils import *

+ 0 - 151
cdplib/db_handlers/OebbMongodbHandler.py

@@ -1,151 +0,0 @@
-"""
-Created on Mon Sep 16 13:27:44 2019
-
-@author: oskar
-@description: Oebb specific Mongodb Handler which inherits from the 'general' Mongodb Handler
-"""
-
-
-import json
-import simplejson
-import sys
-import os
-import jsonref
-
-from copy import deepcopy
-from pymongo import MongoClient
-import pandas as pd
-import numpy as np
-
-sys.path.append(os.getcwd())
-from libraries.log import Log
-from libraries.configuration import default as cfg
-from libraries.db_handlers.MongodbHandler import MongodbHandler
-from libraries.Singleton_Threadsafe import SingletonThreadsafe
-
-
-class OebbMongodbHandlerPool(metaclass=SingletonThreadsafe):
-    '''
-    '''
-
-    def __init__(self, size: int = 10):
-        self._size = size
-        self._mongodb_handlers = [OebbMongodbHandler() for _ in range(size)]
-
-    def aquire(self):
-        while not self._mongodb_handlers:
-            self._mongodb_handlers = [OebbMongodbHandler() for _ in range(self._size)]
-            log.warning("Ran out of Mongodb handlers, 10 more have been added. Are you sure you've returned yours?")
-        return self._mongodb_handlers.pop()
-        
-    def release(self, mongodb_handler):
-        if len(self._mongodb_handlers) < self._size:
-            self._mongodb_handlers.append(mongodb_handler)
-
-
-class OebbMongodbHandler(MongodbHandler):
-
-    def __init__(self, database_url: str = None,
-                 database_name: str = None):
-        '''
-        '''
-        super().__init__(
-                database_url=database_url,
-                database_name=database_name)
-
-        self._log = Log("Oebb Mongodb Handler")
-
-
-    def query_process_instances_with_stage_filtering(self, stage_path: list):
-        '''
-        :param list stage_path: Stage path for which the data will be queried. 
-
-        Method will return all wheelsets which stage_path is a subset of their final stage path. 
-        The wheelsets will only be returned with the actual stations from the stages in the stage_path.
-        '''
-    
-        assert(isinstance(stage_path, list)),\
-            "Parameter 'stage_path must be a list type"
-
-        aggregation_pipeline = [
-                                {"$match": {"$expr": {"$setIsSubset": [stage_path, '$final_state.stage_path']}}},
-                                {"$unwind": "$process"},
-                                {"$sort": {"process.step_number": 1}},
-                                {"$group": {
-                                    "_id": {
-                                            "_id": "$_id", 
-                                            "radsatznummer": "$radsatznummer",
-                                            "final_state": "$final_state"
-                                            }, 
-                                        "process": {"$push": "$process"}
-                                    }
-                                },
-                                {"$project": {
-                                    "_id": "$_id._id",
-                                    "radsatznummer": "$_id.radsatznummer",
-                                    "final_state": "$_id.final_state",
-                                    "process":{
-                                        "$filter":{
-                                            "input": "$process",
-                                            "as": "process",
-                                            "cond":{
-                                                "$setIsSubset": [["$$process.stage"], stage_path]
-                                            },      
-                                        }
-                                    }
-                                }
-                                }
-                            ]
-
-        return self.aggregate_data_and_generate_dataframe('process_instances',aggregation_pipeline)                
-
-    def query_process_instances_with_guaranteed_station_sorting(self, attribute: str = None, 
-                                                attribute_value: str = None, comparison_operator: str = '$eq', ascending: bool = True):
-        '''
-        :param str attribute:
-        :param str attribute_value:
-        :param str comparison_operator:
-        :param bool ascending:
-
-        Method will return all wheelsets which stage_path is a subset of their final stage path. 
-        The wheelsets will only be returned with the actual stations from the stages in the stage_path.
-        '''
-        if attribute is not None:
-            assert(isinstance(attribute, str)),\
-                "Parameter 'attribute' must be a string type"
-
-        if attribute_value is not None:
-            assert(isinstance(attribute_value, (str, int, bool))),\
-                "Parameter 'attribute_value' must be a string, integer or boolean type"
-    
-        assert(isinstance(comparison_operator, str)),\
-            "Parameter 'comparison_operator' must be a string type"
-
-        assert(isinstance(ascending, bool)),\
-            "Parameter 'ascending' must be a boolean type"
-
-        
-        order = 1 if ascending else -1
-
-        aggregation_pipeline = [
-                                {"$match":{ attribute:{comparison_operator: attribute_value}}},
-                                {"$unwind": "$process"},
-                                {"$sort": {"process.step_number": order}},
-                                {"$group": {
-                                    "_id": {
-                                            "_id": "$_id", 
-                                            "radsatznummer": "$radsatznummer",
-                                            "final_state": "$final_state"
-                                            }, 
-                                        "process": {"$push": "$process"}
-                                    }
-                                },
-                                {"$project": {
-                                    "_id": "$_id._id",
-                                    "radsatznummer": "$_id.radsatznummer",
-                                    "final_state": "$_id.final_state",
-                                    "process": "$process"}
-                                }
-                            ]
-                    
-        return self.aggregate_data_and_generate_dataframe('process_instances',aggregation_pipeline)

BIN
cdplib/db_handlers/__pycache__/MongodbHandler.cpython-37.pyc


BIN
cdplib/db_handlers/__pycache__/SQLHandler.cpython-37.pyc


BIN
cdplib/db_handlers/__pycache__/SQLOperations.cpython-37.pyc


BIN
cdplib/db_migration/__pycache__/DataFrameToCollection.cpython-37.pyc


BIN
cdplib/db_migration/__pycache__/MigrationCleaning.cpython-37.pyc


BIN
cdplib/db_migration/__pycache__/ParseDbSchema.cpython-37.pyc


BIN
cdplib/db_migration/__pycache__/ParseJsonSchema.cpython-37.pyc


BIN
cdplib/db_migration/__pycache__/ParseMapping.cpython-37.pyc


BIN
cdplib/utils/__pycache__/ClassLogging.cpython-37.pyc


BIN
cdplib/utils/__pycache__/CleaningUtils.cpython-37.pyc


BIN
cdplib/utils/__pycache__/ExceptionsHandler.cpython-37.pyc