Explorar o código

ad utils folder

tsteuer %!s(int64=5) %!d(string=hai) anos
pai
achega
ede503f90f

+ 2 - 1
cdplib/__init__.py

@@ -1,4 +1,5 @@
 from .db_handlers import *
 from .db_migration import *
 from .data_cleaning import *
-from .data_hyperopt import *
+from .hyperopt import *
+from .utils import *

+ 62 - 0
cdplib/utils/CleaningUtils.py

@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Sep 27 16:20:03 2019
+
+@author: tanya
+"""
+
+import pandas as pd
+import numpy as np
+
+
+class CleaningUtils:
+    '''
+    '''
+    def convert_dates(series: pd.Series, formats: (str, list)) -> pd.Series:
+        '''
+        '''
+        formats = list(formats)
+
+        converted = pd.Series([pd.to_datetime(np.nan)]*len(series))
+
+        for formt in formats:
+            if formt == "%d%m%Y":
+                missing_leading_zero = (series.astype(str).str.len() == 7)
+
+                series = series.astype(str)
+
+                series.loc[missing_leading_zero] = "0" +\
+                    series.loc[missing_leading_zero]
+
+            converted_this_format = pd.to_datetime(series,
+                                                   format=formt,
+                                                   errors="coerce")
+
+            converted.fillna(converted_this_format, inplace=True)
+
+        return converted
+
+    def standarize_writing(self, s: str):
+        '''
+        '''
+        import re
+
+        german_character_mapping = {"ß": "ss",
+                                    "ü": "ue",
+                                    "Ü": "Ue",
+                                    "ä": "ae",
+                                    "Ä": "Ae",
+                                    "ö": "oe",
+                                    "Ö": "Oe"}
+
+        s = s.encode('raw_unicode_escape').decode('raw_unicode_escape')
+        for char, correct_char in german_character_mapping.items():
+            s = s.replace(char, correct_char)
+
+        s = s.lower()
+
+        s = re.sub('[^0-9a-zA-Z]+', '_', s)
+
+        return s
+

cdplib/ExceptionsHandler.py → cdplib/utils/ExceptionsHandler.py


+ 3 - 0
cdplib/utils/__init__.py

@@ -0,0 +1,3 @@
+from .ExceptionsHandler import *
+from .CleaningUtils import *
+

BIN=BIN
cdplib/utils/__pycache__/ClassLogging.cpython-37.pyc


BIN=BIN
cdplib/utils/__pycache__/CleaningUtils.cpython-37.pyc


BIN=BIN
cdplib/utils/__pycache__/ExceptionsHandler.cpython-37.pyc