5 years ago · 3bb02ade48
--- a/cdplib/db_handlers/MongodbHandler.py
+++ b/cdplib/db_handlers/MongodbHandler.py
@@ -21,9 +21,8 @@ import pandas as pd
 
				 import numpy as np
			
 
				 
			
 
				 sys.path.append(os.getcwd())
			
 
				-from libraries.log import Log
			
 
				-from libraries.configuration import default as cfg
			
 
				-from libraries.Singleton_Threadsafe import SingletonThreadsafe
			
 
				+from cdplib.log import Log
			
 
				+from cdplib.Singleton_Threadsafe import SingletonThreadsafe
			
 
				 
			
 
				 
			
 
				 #class MongodbHandlerPool(metaclass=SingletonThreadsafe):
			
@@ -40,7 +39,7 @@ class MongodbHandlerPool():
 
				             self._mongodb_handlers = [MongodbHandler() for _ in range(self._size)]
			
 
				             log.warning("Ran out of Mongodb handlers, 10 more have been added. Are you sure you've returned yours?")
			
 
				         return self._mongodb_handlers.pop()
			
 
				-        
			
 
				+
			
 
				     def release(self, mongodb_handler):
			
 
				         if len(self._mongodb_handlers) < self._size:
			
 
				             self._mongodb_handlers.append(mongodb_handler)
			
@@ -60,6 +59,8 @@ class MongodbHandler:
 
				         '''
			
 
				         if database_url is None:
			
 
				 
			
 
				+            from libraries.configuration import default as cfg
			
 
				+
			
 
				             database_url = "mongodb://{0}:{1}@{2}:{3}"\
			
 
				                            .format(cfg["MONGO"]["MONGO_USER"],
			
 
				                                    cfg["MONGO"]["MONGO_PASSWORD"],
			
@@ -204,7 +205,7 @@ class MongodbHandler:
 
				             try:
			
 
				                 self._log.info(("Collection '{}' has been created").format(collection_name))
			
 
				                 return self._database.create_collection(collection_name)
			
 
				-            
			
 
				+
			
 
				             except Exception as error:
			
 
				                 self._log.log_and_raise_error(('An error occured while creating the new collection: {}. \nError: {}').format(collection_name, error))
			
 
				         else:
			
@@ -247,7 +248,7 @@ class MongodbHandler:
 
				                 self._database[collection_name].insert_one(data)
			
 
				             else:
			
 
				                 self._database[collection_name].insert_many(data, ordered=ordered)
			
 
				-        
			
 
				+
			
 
				         except Exception as error:
			
 
				             self._log.log_and_raise_error(('An error occured when trying to insert data into {}, {}. \nError: {}').format(self._database_name, collection_name, error))
			
 
				 
			
@@ -286,14 +287,14 @@ class MongodbHandler:
 
				 
			
 
				         try:
			
 
				             data = self._database[collection_name].aggregate(pipeline=aggregation_pipeline, allowDiskUse=True)
			
 
				-         
			
 
				+
			
 
				         except Exception as error:
			
 
				             self._log.log_and_raise_error(('A problem occured when aggregating the collection {} with the pipeline {}. \nError: {}').format(collection_name, aggregation_pipeline, error))
			
 
				 
			
 
				         return self.convert_mongo_data_into_dataframe(data)
			
 
				 
			
 
				     def convert_mongo_data_into_dataframe(self, data) -> pd.DataFrame():
			
 
				-        
			
 
				+
			
 
				         data = list(data)
			
 
				         try:
			
 
				             if len(data)> 0:
			
--- a/cdplib/db_handlers/SQLHandler.py
+++ b/cdplib/db_handlers/SQLHandler.py
@@ -15,8 +15,8 @@ import pandas as pd
 
				 import warnings
			
 
				 
			
 
				 sys.path.append(os.getcwd())
			
 
				-from libraries.log import Log
			
 
				-from libraries.Singleton_Threadsafe import SingletonThreadsafe
			
 
				+from cdplib.log import Log
			
 
				+from cdplib.Singleton_Threadsafe import SingletonThreadsafe
			
 
				 
			
 
				 class SQLHandlerPool(metaclass=SingletonThreadsafe):
			
 
				 #class SQLHandlerPool():
			
@@ -33,7 +33,7 @@ class SQLHandlerPool(metaclass=SingletonThreadsafe):
 
				             self._sql_handlers = [SQLHandler() for _ in range(self._size)]
			
 
				             self._log.warning("Ran out of SQL handlers, 10 more have been added. Are you sure you've returned yours?")
			
 
				         return self._sql_handlers.pop()
			
 
				-        
			
 
				+
			
 
				     def release(self, mongodb_handler):
			
 
				         if len(self._sql_handlers) < self._size:
			
 
				             self._sql_handlers.append(mongodb_handler)
			
@@ -58,15 +58,14 @@ class SQLHandler:
 
				              for mysql : mysql+pymysql
			
 
				              for db2: ibm_db_sa
			
 
				         '''
			
 
				-
			
 
				-        
			
 
				-        from libraries.configuration import default as cfg
			
 
				         from sqlalchemy_utils import database_exists, create_database
			
 
				 
			
 
				         self._log = Log(name='SQLHandler')
			
 
				 
			
 
				         if db_uri is None:
			
 
				 
			
 
				+            from libraries.configuration import default as cfg
			
 
				+
			
 
				             db_uri = "mysql+pymysql://{0}:{1}@{2}:{3}/{4}?charset=utf8&local_infile=1"\
			
 
				                      .format(cfg["SQL"]["SQL_USER"],
			
 
				                              cfg["SQL"]["SQL_PASSWORD"],
			
@@ -146,7 +145,7 @@ class SQLHandler:
 
				         self.execute("DROP DATABASE IF EXISTS {}".format(database))
			
 
				         self._engine.execute("CREATE DATABASE {}".format(database))
			
 
				         self._engine.execute("USE {}".format(database))
			
 
				-        
			
 
				+
			
 
				     @property
			
 
				     def _db_metadata(self) -> dict:
			
 
				         '''
			
@@ -206,7 +205,7 @@ class SQLHandler:
 
				         '''
			
 
				         connection = self._engine.connect()
			
 
				         transaction = connection.begin()
			
 
				-    
			
 
				+
			
 
				         errors = []
			
 
				 
			
 
				         # in the case of multi-query execute each query
			
@@ -507,7 +506,7 @@ class SQLHandler:
 
				             data = pd.read_sql(sql=query,
			
 
				                                con=connection,
			
 
				                                **read_sql_kwargs)
			
 
				-                               
			
 
				+
			
 
				             connection.close()
			
 
				             return data
			
 
				 
			
--- a/cdplib/db_migration/DataFrameToCollection.py
+++ b/cdplib/db_migration/DataFrameToCollection.py
@@ -0,0 +1,396 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Mon Jul 22 11:05:47 2019
			
 
				+
			
 
				+@author: tanya
			
 
				+
			
 
				+@description: a function to reshape a pandas dataframe to a list of
			
 
				+(possibly nested) documents with respect to a (json) mongodb schema
			
 
				+"""
			
 
				+
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+
			
 
				+
			
 
				+class DataFrameToCollection():
			
 
				+    '''
			
 
				+    '''
			
 
				+    def __init__(self, schema_path: str):
			
 
				+        '''
			
 
				+        '''
			
 
				+        from cdplib.log import Log
			
 
				+        import json
			
 
				+
			
 
				+        self._log = Log("ParseJsonSchema")
			
 
				+
			
 
				+
			
 
				+        if not os.path.isfile(schema_path):
			
 
				+            err = "JsonSchema not found"
			
 
				+            self._log.error(err)
			
 
				+            raise FileNotFoundError(err)
			
 
				+
			
 
				+        # load schema to dictionary if it is a valid json file
			
 
				+        try:
			
 
				+            with open(schema_path, "r") as f:
			
 
				+                self.schema = json.load(f)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            err = ("Could not load json schema, "
			
 
				+                   "Obtained error {}".format(e))
			
 
				+
			
 
				+            self._log.error(err)
			
 
				+            raise Exception(err)
			
 
				+
			
 
				+
			
 
				+    def to_list_of_documents(self, data: pd.DataFrame,
			
 
				+                             grp_fields: list,
			
 
				+                             schema: dict = None,
			
 
				+                             _final_step: bool = True) -> list:
			
 
				+        '''
			
 
				+        Reshapes a pandas dataframe to a list of documents according
			
 
				+         to a complex (json) mongodb schema
			
 
				+
			
 
				+         Remark1: column names of data need to reflect the "nestedness"
			
 
				+         of the field in the mongodb schema with the help of a "." separator
			
 
				+         Example: field.sub_field_1, field.sub_field_2
			
 
				+
			
 
				+         Remark2: if the schema is stored as a json file, first load it
			
 
				+         to a dictionary with the help of the python json module
			
 
				+
			
 
				+         The function goes recurisively through all the fields and reshapes
			
 
				+         them correspondingly depending on whether the field is an array,
			
 
				+         an object, or simple field. For each field we group the data by the
			
 
				+         grp_fields and reshape it accordingly, the result is a pandas Series.
			
 
				+         In the end all the series are collected and concatenated.
			
 
				+        '''
			
 
				+        from copy import deepcopy
			
 
				+
			
 
				+        data = self._melt_duplicated_columns(data)
			
 
				+
			
 
				+        reshaped_fields = []
			
 
				+
			
 
				+        if schema is None:
			
 
				+            schema = self.schema
			
 
				+
			
 
				+        for field in schema["properties"]:
			
 
				+
			
 
				+            if field not in self._unroll_nested_names(data.columns):
			
 
				+                continue
			
 
				+
			
 
				+            field_type = schema["properties"][field]["bsonType"]
			
 
				+
			
 
				+            # if field has a simple type
			
 
				+            if field_type not in ["array", "object"]:
			
 
				+
			
 
				+                grp_fields = [c for c in grp_fields if c in data.columns]
			
 
				+
			
 
				+                # check that there is only one possible value of this field
			
 
				+                n_distinct_values = data.groupby(grp_fields, sort=False)[field].nunique().max()
			
 
				+
			
 
				+                if n_distinct_values != 1:
			
 
				+                    err = "Field {0} is not unique with respect to {1}"\
			
 
				+                          .format(field, grp_fields)
			
 
				+
			
 
				+                    self._log.error(err)
			
 
				+                    raise Exception(err)
			
 
				+
			
 
				+                if field not in grp_fields:
			
 
				+                    reshaped_field = data.groupby(grp_fields, sort=False)[field].first()
			
 
				+                else:
			
 
				+                    reshaped_field =\
			
 
				+                        data[grp_fields].drop_duplicates()\
			
 
				+                        .set_index(grp_fields, drop=False)[field]
			
 
				+
			
 
				+                reshaped_fields.append(reshaped_field)
			
 
				+
			
 
				+            # if field is sub-document (dictionary)
			
 
				+            elif field_type == "object":
			
 
				+
			
 
				+                sub_schema = deepcopy(schema["properties"][field])
			
 
				+
			
 
				+                # rename sub-schema properties to match with data column names
			
 
				+                sub_schema["properties"] =\
			
 
				+                    {".".join([field, k]): v for k, v
			
 
				+                     in sub_schema["properties"].items()}
			
 
				+
			
 
				+                sub_data = self.to_list_of_documents(
			
 
				+                            data=data,
			
 
				+                            schema=sub_schema,
			
 
				+                            grp_fields=grp_fields,
			
 
				+                            _final_step=False)
			
 
				+
			
 
				+                reshaped_field = sub_data.apply(self._make_dict, axis=1)
			
 
				+                reshaped_field.name = field
			
 
				+
			
 
				+                reshaped_fields.append(reshaped_field)
			
 
				+
			
 
				+            # if field is a list of dictionaries
			
 
				+            elif field_type == "array":
			
 
				+
			
 
				+                items_type = schema["properties"][field]["items"]["bsonType"]
			
 
				+
			
 
				+                if items_type == "object":
			
 
				+
			
 
				+                    sub_schema = deepcopy(schema["properties"][field]["items"])
			
 
				+
			
 
				+                    # rename sub-schema properties to match data column names
			
 
				+                    sub_schema["properties"] =\
			
 
				+                        {".".join([field, k]): v for k, v in
			
 
				+                         sub_schema["properties"].items()}
			
 
				+
			
 
				+                    # extend grp fields by sub-fields of field simple types
			
 
				+                    sub_grp_fields = [f for f in sub_schema["properties"]
			
 
				+                                      if (sub_schema["properties"][f]["bsonType"] not in ["array", "object"])
			
 
				+                                      and (f in data.columns)]
			
 
				+
			
 
				+                    if len(sub_grp_fields) == 0:
			
 
				+                        err = ("One of the sub-keys in a list of documents"
			
 
				+                               " must be of simple type for the field {}"
			
 
				+                               .format(field))
			
 
				+
			
 
				+                        self._log.error(err)
			
 
				+                        raise Exception(err)
			
 
				+
			
 
				+                    # group and reshape sub-fields with complex types
			
 
				+                    sub_data = self.to_list_of_documents(
			
 
				+                                data=data,
			
 
				+                                schema=sub_schema,
			
 
				+                                grp_fields=grp_fields + sub_grp_fields,
			
 
				+                                _final_step=False)
			
 
				+
			
 
				+                    if sub_data is not None:
			
 
				+
			
 
				+                        # gether the results into a list of dictionaries
			
 
				+                        sub_data = sub_data.apply(self._make_dict, axis=1)
			
 
				+
			
 
				+                        sub_data.name = field
			
 
				+                        sub_data = sub_data.reset_index(grp_fields)
			
 
				+
			
 
				+                        reshaped_field =\
			
 
				+                            sub_data.groupby(grp_fields, sort=False)[field]\
			
 
				+                                    .apply(self._make_list_of_distinct)
			
 
				+
			
 
				+                        reshaped_fields.append(reshaped_field)
			
 
				+
			
 
				+                # if field is a list of values with simple type
			
 
				+                elif items_type == "array":
			
 
				+
			
 
				+                    grp_fields = [c for c in grp_fields if c in data.columns]
			
 
				+
			
 
				+                    if field in data.columns:
			
 
				+
			
 
				+                        reshaped_field = data.groupby(grp_fields, sort=False)[field]\
			
 
				+                                             .apply(self._make_list_of_distinct)
			
 
				+
			
 
				+                        reshaped_fields.append(reshaped_field)
			
 
				+
			
 
				+                else:
			
 
				+
			
 
				+                    grp_fields = [c for c in grp_fields if c in data.columns]
			
 
				+
			
 
				+                    if field in data.columns:
			
 
				+
			
 
				+                        reshaped_field = data.groupby(grp_fields, sort=False)[field]\
			
 
				+                                             .apply(self._make_flattened_list_of_distinct)
			
 
				+
			
 
				+                        reshaped_fields.append(reshaped_field)
			
 
				+
			
 
				+        if len(reshaped_fields) > 0:
			
 
				+
			
 
				+            reshaped_fields = pd.concat(reshaped_fields, sort=False, axis=1)
			
 
				+
			
 
				+            if _final_step:
			
 
				+                # dropping the index names if it is the final step,
			
 
				+                # if not the index is needed for merging
			
 
				+                reshaped_fields =\
			
 
				+                    reshaped_fields.drop(list(reshaped_fields.index.names), axis=1, errors="ignore")\
			
 
				+                                   .reset_index(drop=False)
			
 
				+
			
 
				+                self._log.info("Done reshaping the dataframe to a list of documents")
			
 
				+
			
 
				+            return reshaped_fields
			
 
				+
			
 
				+        else:
			
 
				+            return
			
 
				+
			
 
				+    def _melt_duplicated_columns(self, data: pd.DataFrame) -> pd.DataFrame:
			
 
				+        '''
			
 
				+        '''
			
 
				+        data = data.copy(deep=True)
			
 
				+
			
 
				+        for c in set(data.columns):
			
 
				+            if isinstance(data[c], pd.DataFrame):
			
 
				+                """
			
 
				+                data = pd.melt(data, id_vars=[cc for cc in data.columns
			
 
				+                                              if cc != c], value_vars=c)\
			
 
				+                         .drop("variable", axis=1)\
			
 
				+                         .rename(columns={"value": c})
			
 
				+                """
			
 
				+                data["temp"] = data[c].apply(self._make_list, axis=1)
			
 
				+                data.drop(c, axis=1, inplace=True)
			
 
				+                data = data.rename(columns={"temp": c})
			
 
				+
			
 
				+        return data
			
 
				+
			
 
				+    def _make_dict(self, x: pd.Series) -> dict:
			
 
				+        '''
			
 
				+        Transforms pandas series to a dictionary
			
 
				+         is meant to be applied to a dataframe in axis = 1,
			
 
				+         then the index of the input series are the column names
			
 
				+         of the dataframe
			
 
				+        '''
			
 
				+        def custom_is_null(y):
			
 
				+            if isinstance(pd.notnull(y), bool):
			
 
				+                return pd.notnull(y)
			
 
				+            else:
			
 
				+                return True
			
 
				+
			
 
				+        return {f.split(".")[-1]: x[f] for f in x.index
			
 
				+                if custom_is_null(x[f])}
			
 
				+
			
 
				+    def _make_list(self, x: pd.Series) -> list:
			
 
				+        '''
			
 
				+        return: list of values in a series
			
 
				+        '''
			
 
				+        return list(x)
			
 
				+
			
 
				+    def _make_list_of_distinct(self, x: pd.Series) -> list:
			
 
				+        '''
			
 
				+        return: list of unique values from a Series where
			
 
				+         entries are arbitrary objects
			
 
				+         (pandas unique() method does not work if entries are of complex types)
			
 
				+        '''
			
 
				+        uniques = pd.DataFrame({"temp": x.tolist()})\
			
 
				+                    .assign(temp_str=lambda y: y["temp"].astype(str))\
			
 
				+                    .drop_duplicates(subset=["temp_str"])\
			
 
				+                    .drop("temp_str", axis=1).iloc[:, 0].tolist()
			
 
				+
			
 
				+        def is_empty(y):
			
 
				+            is_empty_dict = (isinstance(y, dict) and (len(y) == 0))
			
 
				+            is_empty_list = (isinstance(y, list) and (len(y) == 0))
			
 
				+            return is_empty_dict or is_empty_list
			
 
				+
			
 
				+        return [el for el in uniques if not is_empty(el)]
			
 
				+
			
 
				+    def _make_flattened_list_of_distinct(self, x: pd.Series) -> list:
			
 
				+        '''
			
 
				+        return: list of unique values from a Series where
			
 
				+         entries are arbitrary objects
			
 
				+         (pandas unique() method does not work if entries are of complex types)
			
 
				+        '''
			
 
				+        uniques = self._make_list_of_distinct(x)
			
 
				+        return uniques[0]
			
 
				+
			
 
				+    def _unroll_nested_names(self, names: list) -> list:
			
 
				+        '''
			
 
				+        Example: transform a list ["name.firstname", "name.surname"]
			
 
				+        into ["name", "name.firstname", "name.surname"]
			
 
				+        '''
			
 
				+        unrolled = []
			
 
				+
			
 
				+        for c in names:
			
 
				+            splitted = c.split(".")
			
 
				+            for i in range(len(splitted)):
			
 
				+                unrolled.append(".".join(splitted[:i+1]))
			
 
				+
			
 
				+        return unrolled
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # Testing
			
 
				+
			
 
				+    df = pd.DataFrame({
			
 
				+                       "a": [1]*8 + [2]*8,
			
 
				+                       "b": [10]*8 + [20]*8,
			
 
				+                       "c": [100, 200]*8,
			
 
				+                       "d.da": [11]*8 + [22]*8,
			
 
				+                       "d.db": [33]*8 + [34]*8,
			
 
				+                       "e.ea.eaa": [5]*8 + [55]*8,
			
 
				+                       "e.ea.eab": [6]*8 + [66]*8,
			
 
				+                       "e.eb": [2, 2, 3, 3]*4,
			
 
				+                       "e.ec.eca": [1, 2, 3, 4]*4,
			
 
				+                       "e.ec.ecb": [5, 6, 7, 8]*4,
			
 
				+                       "f.fa": [1]*4 + [3]*4 + [11]*4 + [33]*4,
			
 
				+                       "f.fb": [2]*4 + [3]*2 + [4]*2 + [22]*4 + [44]*4})
			
 
				+
			
 
				+    duplicate = pd.DataFrame({"c": [300, 400]*8})
			
 
				+
			
 
				+    df = pd.concat([df, duplicate], axis=1)
			
 
				+
			
 
				+    schm = {
			
 
				+              "bsonType": "object",
			
 
				+              "required": ["a"],
			
 
				+              "properties": {
			
 
				+
			
 
				+                  "a": {"bsonType": "integer"},
			
 
				+
			
 
				+                  "b": {"bsonType": "integer"},
			
 
				+
			
 
				+                  "c": {
			
 
				+                      "bsonType": "array",
			
 
				+                      "items": {"bsonType": "integer"}
			
 
				+                  },
			
 
				+                  "d": {
			
 
				+                      "bsonType": "object",
			
 
				+                      "properties": {
			
 
				+                          "da": {"bsonType": "integer"},
			
 
				+                          "db": {"bsonType": "integer"}
			
 
				+                       }
			
 
				+                  },
			
 
				+                  "e": {
			
 
				+                      "bsonType": "object",
			
 
				+                      "properties": {
			
 
				+                          "ea": {
			
 
				+                              "bsonType": "object",
			
 
				+                              "properties": {
			
 
				+                                  "eaa": {"bsonType": "integer"},
			
 
				+                                  "eab": {"bsonType": "integer"}
			
 
				+                               }
			
 
				+
			
 
				+                          },
			
 
				+
			
 
				+                          "eb": {
			
 
				+                              "bsonType": "array",
			
 
				+                              "items": {"bsonType": "integer"}
			
 
				+                          },
			
 
				+
			
 
				+                          "ec": {
			
 
				+                                "bsonType": "array",
			
 
				+                                "items": {
			
 
				+                                  "bsonType": "object",
			
 
				+                                  "properties": {
			
 
				+                                      "eca": {"bsonType": "integer"},
			
 
				+                                      "ecb": {"bsonType": "integer"}
			
 
				+                                    }
			
 
				+                                  }
			
 
				+                          }
			
 
				+                      }
			
 
				+                  },
			
 
				+                  "f": {
			
 
				+                      "bsonType": "array",
			
 
				+                      "items": {
			
 
				+                          "bsonType": "object",
			
 
				+                          "properties": {
			
 
				+                              "fa": {"bsonType": "integer"},
			
 
				+                              "fb": {
			
 
				+                                  "bsonType": "array",
			
 
				+                                  "items": {"bsonType": "integer"}
			
 
				+                              }
			
 
				+                          }
			
 
				+                      }
			
 
				+                  }
			
 
				+              }
			
 
				+              }
			
 
				+
			
 
				+    grp_fields = ["a"]
			
 
				+
			
 
				+    result = DataFrameToCollection().to_list_of_documents(
			
 
				+                    data=df,
			
 
				+                    schema=schm,
			
 
				+                    grp_fields=grp_fields)
			
--- a/cdplib/db_migration/MigrationCleaning.py
+++ b/cdplib/db_migration/MigrationCleaning.py
@@ -14,11 +14,11 @@ import gc
 
				 
			
 
				 sys.path.append(os.getcwd())
			
 
				 
			
 
				-from libraries.db_migration.ParseMapping import ParseMapping
			
 
				-from libraries.db_migration.ParseJsonSchema import ParseJsonSchema
			
 
				-from libraries.utils.ExceptionsHandler import ExceptionsHandler
			
 
				-from libraries.utils.CleaningUtils import CleaningUtils
			
 
				-from libraries.log import Log
			
 
				+from cdplib.db_migration.ParseMapping import ParseMapping
			
 
				+from cdplib.db_migration.ParseJsonSchema import ParseJsonSchema
			
 
				+from cdplib.utils.ExceptionsHandler import ExceptionsHandler
			
 
				+from cdplib.utils.CleaningUtils import CleaningUtils
			
 
				+from cdplib.log import Log
			
 
				 
			
 
				 class MigrationCleaning:
			
 
				     '''
			
@@ -38,7 +38,7 @@ class MigrationCleaning:
 
				         '''
			
 
				         self._log = Log('Migration Cleaning')
			
 
				         self._exception_handler = ExceptionsHandler()
			
 
				-        
			
 
				+
			
 
				         assert isinstance(inconsist_report_table, str),\
			
 
				             "Inconsistent report table should be a tablename string"
			
 
				 
			
@@ -58,7 +58,7 @@ class MigrationCleaning:
 
				         self._mapping_path = mapping_path
			
 
				         self._schema_paths = schema_paths
			
 
				 
			
 
				-        from libraries.db_handlers.SQLHandler import SQLHandlerPool
			
 
				+        from cdplib.db_handlers.SQLHandler import SQLHandlerPool
			
 
				         self._sql_db = SQLHandlerPool(20)
			
 
				 
			
 
				     def _assert_dataframe_input(self, data: pd.DataFrame):
			
@@ -222,7 +222,7 @@ class MigrationCleaning:
 
				         data = data.copy(deep=True)
			
 
				 
			
 
				         #db = self._sql_db.aquire()
			
 
				-        from libraries.db_handlers.SQLHandler import SQLHandler
			
 
				+        from cdplib.db_handlers.SQLHandler import SQLHandler
			
 
				         db = SQLHandler()
			
 
				 
			
 
				         if invalid_mask.sum() == 0:
			
@@ -242,7 +242,7 @@ class MigrationCleaning:
 
				 
			
 
				         db.append_to_table(data=data_inconsist,
			
 
				                            tablename=self._inconsist_report_table)
			
 
				-       
			
 
				+
			
 
				         n_rows_filtered = len(data_inconsist)
			
 
				         n_instances_filtered = len(data_inconsist[self._filter_index_columns].drop_duplicates())
			
 
				 
			
@@ -517,7 +517,7 @@ if __name__ == "__main__":
 
				 
			
 
				     # testing
			
 
				 
			
 
				-    from libraries.db_handlers.SQLHandler import SQLHandler
			
 
				+    from cdplib.db_handlers.SQLHandler import SQLHandler
			
 
				 
			
 
				     mapping_path = os.path.join(".", "migration_mappings", "rs1_mapping.json")
			
 
				 
			
--- a/cdplib/db_migration/ParseDbSchema.py
+++ b/cdplib/db_migration/ParseDbSchema.py
@@ -0,0 +1,62 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Wed Sep 25 08:22:20 2019
			
 
				+
			
 
				+@author: tanya
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import abc
			
 
				+sys.path.append(os.getcwd())
			
 
				+
			
 
				+
			
 
				+class ParseDbSchema(metaclass=abc.ABCMeta):
			
 
				+    '''
			
 
				+    '''
			
 
				+    def __init__(self, schema_paths: [list, str], log_file: str = None):
			
 
				+        '''
			
 
				+        '''
			
 
				+        from cdplib.log import Log
			
 
				+
			
 
				+        self._log = Log(name="ParseDbSchema:", log_file=log_file)
			
 
				+
			
 
				+        if isinstance(schema_paths, str):
			
 
				+            schema_paths = [schema_paths]
			
 
				+
			
 
				+        for schema_path in schema_paths:
			
 
				+            if not os.path.isfile(schema_path):
			
 
				+                err = "Schema not found"
			
 
				+                self._log.error(err)
			
 
				+                raise FileNotFoundError(err)
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_fields(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_datetime_fields(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_python_types(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_default_values(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return
			
 
				+
			
 
				+    @abc.abstractmethod
			
 
				+    def get_allowed_values(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return
			
--- a/cdplib/db_migration/ParseJsonSchema.py
+++ b/cdplib/db_migration/ParseJsonSchema.py
@@ -0,0 +1,354 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Thu Jan 31 11:41:48 2019
			
 
				+
			
 
				+@author: tanya
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+from copy import deepcopy
			
 
				+import numpy as np
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+
			
 
				+from cdplib.db_migration.ParseDbSchema import ParseDbSchema
			
 
				+
			
 
				+
			
 
				+class ParseJsonSchema(ParseDbSchema):
			
 
				+    '''
			
 
				+    Class for retrieving column properties from mongodb jsonSchema
			
 
				+    '''
			
 
				+
			
 
				+    def __init__(self, schema_paths: [list, str], log_file: str = None):
			
 
				+        '''
			
 
				+        '''
			
 
				+        import json
			
 
				+        from cdplib.log import Log
			
 
				+
			
 
				+        super().__init__(schema_paths=schema_paths, log_file=log_file)
			
 
				+
			
 
				+        self._log = Log(name="ParseJsonSchema", log_file=log_file)
			
 
				+
			
 
				+        # load schemas to dictionaries if they are valid json files
			
 
				+
			
 
				+        assert(isinstance(schema_paths, (list, str))),\
			
 
				+            "Schema paths must be either str or lists"
			
 
				+
			
 
				+        if isinstance(schema_paths, str):
			
 
				+            schema_paths = [schema_paths]
			
 
				+
			
 
				+        self._schema_paths = schema_paths
			
 
				+
			
 
				+        self.schemas = []
			
 
				+
			
 
				+        for schema_path in schema_paths:
			
 
				+            try:
			
 
				+                with open(schema_path, "r") as f:
			
 
				+                    self.schemas.append(json.load(f))
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                err = ("Could not load json schema, "
			
 
				+                       "Obtained error {}".format(e))
			
 
				+
			
 
				+                self._log.error(err)
			
 
				+                raise Exception(err)
			
 
				+
			
 
				+    @property
			
 
				+    def _collection_names(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        # Don't use strip() instaed of replace since schema_c.strip(schema_)
			
 
				+        # will discard the c as well which is not a appropriate output
			
 
				+        return [os.path.basename(p).replace("schema_","").split(".")[0] for p in self._schema_paths]
			
 
				+
			
 
				+    def get_fields(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse()
			
 
				+
			
 
				+    def get_fields_restricted_to_collection(self, collection_name: str) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        schemas = [self.schemas[self._collection_names.index(collection_name)]]
			
 
				+        return self._parse(schemas=schemas)
			
 
				+
			
 
				+    def get_required_fields(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(required_only=True)
			
 
				+
			
 
				+    def get_mongo_types(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(field_info="bsonType")
			
 
				+
			
 
				+    def get_datetime_fields(self):
			
 
				+        '''
			
 
				+        '''
			
 
				+        mongo_types = self.get_mongo_types()
			
 
				+
			
 
				+        return [k for k, v in mongo_types.items()
			
 
				+                if v in ["date", "timestamp", "Date", "Timestamp"]]
			
 
				+
			
 
				+    def get_python_types(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        mongo_types = self.get_mongo_types()
			
 
				+        python_types = {}
			
 
				+
			
 
				+        bson_to_python_types = {"double": float,
			
 
				+                                "decimal": float,
			
 
				+                                "string": str,
			
 
				+                                "object": object,
			
 
				+                                "array": list,
			
 
				+                                "bool": bool,
			
 
				+                                "int": int,
			
 
				+                                "long": int,
			
 
				+                                "date": np.dtype('<M8[ns]'),
			
 
				+                                "timestamp": np.dtype('<M8[ns]')
			
 
				+                                }
			
 
				+
			
 
				+        for k, v in mongo_types.items():
			
 
				+
			
 
				+            if isinstance(v, list):
			
 
				+                if ("date" in v) or ("timestamp" in v):
			
 
				+                    v = "date"
			
 
				+                elif "string" in v:
			
 
				+                    v = "string"
			
 
				+                elif ("double" in v) or ("decimal" in v):
			
 
				+                    v = "double"
			
 
				+                elif ("null" in v) and (len(v) == 2) and ("int" not in v):
			
 
				+                    v = [t for t in v if type != "null"][0]
			
 
				+                else:
			
 
				+                    err = "Type {0}: {1} not convertibale".format(k, v)
			
 
				+                    self._log.error(err)
			
 
				+                    raise Exception(err)
			
 
				+
			
 
				+            if v in bson_to_python_types:
			
 
				+                python_types[k] = bson_to_python_types[v]
			
 
				+
			
 
				+        return python_types
			
 
				+
			
 
				+    def get_patterns(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(field_info="pattern")
			
 
				+
			
 
				+    def get_default_values(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(field_info="default")
			
 
				+
			
 
				+    def get_allowed_values(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(field_info="enum")
			
 
				+
			
 
				+    def get_maximum_value(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(field_info="maximum")
			
 
				+
			
 
				+    def get_minimum_value(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(field_info="minimum")
			
 
				+
			
 
				+    def get_max_items(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(field_info="maxItems")
			
 
				+
			
 
				+    def get_min_items(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(field_info="minItems")
			
 
				+
			
 
				+    def get_field_descriptions(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._parse(field_info="description")
			
 
				+
			
 
				+    def _parse(self,
			
 
				+               field_info: str = None,
			
 
				+               required_only: bool = False,
			
 
				+               schemas: list = None):
			
 
				+        '''
			
 
				+        '''
			
 
				+        if schemas is None:
			
 
				+            schemas = self.schemas
			
 
				+
			
 
				+        result = self._parse_one(schema=schemas[0],
			
 
				+                                 field_info=field_info,
			
 
				+                                 required_only=required_only)
			
 
				+
			
 
				+        for schema in schemas[1:]:
			
 
				+
			
 
				+            next_result = self._parse_one(schema=schema,
			
 
				+                                          field_info=field_info,
			
 
				+                                          required_only=required_only)
			
 
				+
			
 
				+            if isinstance(result, list):
			
 
				+                result.extend(next_result)
			
 
				+            else:
			
 
				+                result.update(next_result)
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    def _parse_one(self,
			
 
				+                   schema: dict,
			
 
				+                   field_info: str = None,
			
 
				+                   required_only: bool = False,
			
 
				+                   super_field_name: str = None,
			
 
				+                   already_parsed: (list, dict) = None) -> (list, dict):
			
 
				+        '''
			
 
				+        Recursive function that returns a list of (nested) field names or
			
 
				+        a dictionary of (nested) field names with field characteristics.
			
 
				+
			
 
				+        :param schema: if None => entire self.schema, or a sub-schema
			
 
				+            of self.schema
			
 
				+
			
 
				+        :param field_info: optional, if provided a dictionary of field
			
 
				+            names with field characteristics is returned (for examples
			
 
				+            bsonType of each field), else a list of fields is returned
			
 
				+
			
 
				+        :param required_only: when True, only returns fields marked as
			
 
				+            required in the mongo schema
			
 
				+
			
 
				+        :param super_field_name: needed for recursion
			
 
				+            Example: the field 'article' has
			
 
				+            subfields 'id' and 'supplier'.
			
 
				+            If we parse the sub-document corresponding to article, then
			
 
				+            super_field_name is'article' and we might get an output like
			
 
				+            {'article.id': string, 'article.supplier': string}
			
 
				+
			
 
				+        :param alread_parsed: needed for recursion
			
 
				+
			
 
				+        '''
			
 
				+        schema = deepcopy(schema)
			
 
				+
			
 
				+        assert(isinstance(schema, dict)),\
			
 
				+            "Parameter 'schema' must be a dict"
			
 
				+
			
 
				+        if field_info is None:
			
 
				+            # parse a list of fields
			
 
				+            if already_parsed is None:
			
 
				+                already_parsed = []
			
 
				+            else:
			
 
				+                assert(isinstance(already_parsed, list)),\
			
 
				+                    "Parameter 'already_parsed' must be of type list"
			
 
				+        else:
			
 
				+            # parse a dictionary of field names with field characteristics
			
 
				+            if already_parsed is None:
			
 
				+                already_parsed = {}
			
 
				+            else:
			
 
				+                assert(isinstance(already_parsed, dict)),\
			
 
				+                    "Parameter 'already_parsed' must be of type dict"
			
 
				+
			
 
				+        # If schema is nested, then
			
 
				+        # either it is of bsonType object
			
 
				+        # and the field information is stored under the key 'properties'
			
 
				+        # or it is of bsonType array
			
 
				+        # and the field information is stored in sub-schemas
			
 
				+        # under the key 'items'
			
 
				+
			
 
				+        # if schema is of bsonType object
			
 
				+        if "properties" in schema.keys():
			
 
				+            if "required" in schema.keys():
			
 
				+                required_subfields = schema["required"]
			
 
				+            else:
			
 
				+                required_subfields = []
			
 
				+
			
 
				+            for sub_field_name in schema["properties"].keys():
			
 
				+
			
 
				+                sub_schema = schema["properties"][sub_field_name]
			
 
				+
			
 
				+                # only process fields that are required
			
 
				+                if required_only and\
			
 
				+                        (sub_field_name not in required_subfields):
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    if super_field_name is not None:
			
 
				+                        field_name = '.'.join([super_field_name,
			
 
				+                                               sub_field_name])
			
 
				+                    else:
			
 
				+                        field_name = sub_field_name
			
 
				+
			
 
				+                    # if the given sub-field is nested, parse the
			
 
				+                    # sub-schema corresponding to this sub-field
			
 
				+                    self._parse_one(
			
 
				+                            schema=sub_schema,
			
 
				+                            super_field_name=field_name,
			
 
				+                            field_info=field_info,
			
 
				+                            already_parsed=already_parsed,
			
 
				+                            required_only=required_only)
			
 
				+
			
 
				+        # if schema is of bsonType array
			
 
				+        elif "items" in schema.keys():
			
 
				+            # one schema for all items
			
 
				+            if isinstance(schema["items"], dict):
			
 
				+
			
 
				+                sub_schema = schema["items"]
			
 
				+
			
 
				+                self._parse_one(schema=sub_schema,
			
 
				+                                super_field_name=super_field_name,
			
 
				+                                field_info=field_info,
			
 
				+                                already_parsed=already_parsed,
			
 
				+                                required_only=required_only)
			
 
				+
			
 
				+            # list of separate schemas for each item
			
 
				+            elif isinstance(schema["items"], list):
			
 
				+
			
 
				+                for sub_schema in schema["items"]:
			
 
				+                    self._parse_one(schema=sub_schema,
			
 
				+                                    super_field_name=super_field_name,
			
 
				+                                    field_info=field_info,
			
 
				+                                    already_parsed=already_parsed,
			
 
				+                                    required_only=required_only)
			
 
				+            else:
			
 
				+                raise Exception(('Schema is not composed correctly: '
			
 
				+                                 'items must be a dictionary or a list'))
			
 
				+        else:
			
 
				+            # If neither properties nor items is in schema keys
			
 
				+            # we reached the last level of nestedness,
			
 
				+            # field information is stored in the schema keys.
			
 
				+            field_name = super_field_name
			
 
				+
			
 
				+            if field_info is None:
			
 
				+                already_parsed.append(field_name)
			
 
				+            else:
			
 
				+                if field_info in schema.keys():
			
 
				+                    already_parsed[field_name] = schema[field_info]
			
 
				+                else:
			
 
				+                    pass
			
 
				+
			
 
				+        return already_parsed
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    # Only for testing
			
 
				+
			
 
				+    schema_path = os.path.join(".", "mongo_schema", "schema_wheelsets.json")
			
 
				+
			
 
				+    if os.path.isfile(schema_path):
			
 
				+
			
 
				+        parse_obj = ParseJsonSchema(schema_paths=schema_path)
			
 
				+
			
 
				+        fields = parse_obj.get_fields()
			
 
				+
			
 
				+        required_fileds = parse_obj.get_required_fields()
			
 
				+
			
 
				+        patterns = parse_obj.get_patterns()
			
 
				+
			
 
				+        mongo_types = parse_obj.get_mongo_types()
			
 
				+
			
 
				+        python_types_except_dates = parse_obj.get_python_types()
			
 
				+
			
 
				+        datetime_fields = parse_obj.get_datetime_fields()
			
 
				+
			
 
				+        allowed_values = parse_obj.get_allowed_values()
			
 
				+
			
 
				+        descriptions = parse_obj.get_field_descriptions()
			
--- a/cdplib/db_migration/ParseMapping.py
+++ b/cdplib/db_migration/ParseMapping.py
@@ -0,0 +1,164 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Fri Sep 20 15:33:17 2019
			
 
				+
			
 
				+@author: tanya
			
 
				+"""
			
 
				+
			
 
				+import os
			
 
				+import sys
			
 
				+import numpy as np
			
 
				+sys.path.append(os.getcwd())
			
 
				+
			
 
				+class ParseMapping:
			
 
				+    '''
			
 
				+    '''
			
 
				+    def __init__(self, mapping_path: str, log_name: str = "ParseMapping",
			
 
				+                 source: str = "original_name", target: str = "mongo_name",
			
 
				+                 target_collection: str = "mongo_collection"):
			
 
				+        '''
			
 
				+        '''
			
 
				+        import json
			
 
				+        from cdplog.log import Log
			
 
				+
			
 
				+        self.log = Log('Parse Mapping')
			
 
				+
			
 
				+        if not os.path.isfile(mapping_path):
			
 
				+            err = "Mapping not found"
			
 
				+            self._log.error(err)
			
 
				+            raise FileNotFoundError(err)
			
 
				+
			
 
				+        try:
			
 
				+            with open(mapping_path, "r") as f:
			
 
				+                self._mapping = json.load(f)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            err = ("Could not load mapping. "
			
 
				+                   "Exit with error {}".format(e))
			
 
				+            self._log.error(err)
			
 
				+            raise Exception(err)
			
 
				+
			
 
				+        self._source = source
			
 
				+        self._target = target
			
 
				+        self._target_collection = target_collection
			
 
				+
			
 
				+    def get_field_mapping(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        assert(all([set([self._source, self._target]) <= set(d)
			
 
				+                    for d in self._mapping]))
			
 
				+
			
 
				+        return {d[self._source]: d[self._target] for d in self._mapping}
			
 
				+
			
 
				+    def _get_fields_satistisfying_condition(self, key: str, value) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        assert(all([self._source in d for d in self._mapping])),\
			
 
				+            "Invalid from field"
			
 
				+
			
 
				+        return [d[self._source] for d in self._mapping
			
 
				+                if (key in d) and (d[key] == value)]
			
 
				+
			
 
				+    def get_required_fields(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._get_fields_satistisfying_condition(key="required",
			
 
				+                                                        value=1)
			
 
				+
			
 
				+    def get_date_fields(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._get_fields_satistisfying_condition(key="type",
			
 
				+                                                        value="Date")
			
 
				+
			
 
				+    def get_fields_restricted_to_collecton(self, collection_name: str) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._get_fields_satistisfying_condition(key=self._target_collection,
			
 
				+                                                        value=collection_name)
			
 
				+
			
 
				+    def _get_info(self, key: str, value=None) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        assert(all([self._source in d for d in self._mapping])),\
			
 
				+            "Invalid from field"
			
 
				+
			
 
				+        return {d[self._source]: d[key] for d in self._mapping
			
 
				+                if (key in d) and ((value is not None)
			
 
				+                and (d[key] == value)) or (key in d)}
			
 
				+
			
 
				+    def get_default_values(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._get_info(key="default_values")
			
 
				+
			
 
				+    def get_date_formats(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._get_info(key="date_format")
			
 
				+
			
 
				+    def get_types(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._get_info(key="type")
			
 
				+
			
 
				+    def get_python_types(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        sql_to_python_dtypes = {
			
 
				+                "Text": str,
			
 
				+                "Date": np.dtype('<M8[ns]'),
			
 
				+                "Double": float,
			
 
				+                "Integer": int
			
 
				+                }
			
 
				+
			
 
				+        sql_types = self.get_types()
			
 
				+
			
 
				+        return {k: sql_to_python_dtypes[v] for k, v in sql_types.items()}
			
 
				+
			
 
				+    def get_value_mappings(self) -> dict:
			
 
				+        '''
			
 
				+        '''
			
 
				+        return self._get_info(key="value_mapping")
			
 
				+
			
 
				+    def get_column_numbers(self) -> list:
			
 
				+        '''
			
 
				+        '''
			
 
				+        if all(["column_number" in d for d in self._mapping]):
			
 
				+            column_numbers = [d["column_number"] for d in self._mapping]
			
 
				+
			
 
				+        elif all(["column_number" not in d for d in self._mapping]):
			
 
				+            column_numbers = list(range(len(self._mapping)))
			
 
				+
			
 
				+        else:
			
 
				+            err = ("Incorrectly filled mapping. Column numbers should ",
			
 
				+                   "either in all or in neither of the fields")
			
 
				+            self.log.err(err)
			
 
				+            raise Exception(err)
			
 
				+
			
 
				+        return column_numbers
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    mapping_path = os.path.join(".", "migration_mappings", "rs0_mapping.json")
			
 
				+
			
 
				+    if os.path.isfile(mapping_path):
			
 
				+
			
 
				+        print("found mapping path")
			
 
				+
			
 
				+        parser = ParseMapping(mapping_path, source="internal_name",
			
 
				+                              target="mongo_name")
			
 
				+
			
 
				+        internal_to_mongo_mapping = parser.get_field_mapping()
			
 
				+
			
 
				+        original_to_internal_mapping = parser.get_field_mapping()
			
 
				+
			
 
				+        default_values = parser.get_default_values()
			
 
				+
			
 
				+        types = parser.get_types()
			
 
				+
			
 
				+        column_numbers = parser.get_column_numbers()
			
 
				+
			
 
				+        print("Done testing!")