Browse Source

pretyfied code

tanja 4 years ago
parent
commit
98f696ff89

+ 117 - 94
cdplib/gridsearch/GridSearchPipelineSelector.py

@@ -78,99 +78,118 @@ class GridSearchPipelineSelector(PipelineSelector):
 
         :param str stdout_log_level: can be INFO, WARNING, ERROR
         """
-        super().__init__(cost_func=cost_func,
-                         greater_is_better=greater_is_better,
-                         trials_path=trials_path,
-                         backup_trials_freq=backup_trials_freq,
-                         cross_val_averaging_func=cross_val_averaging_func,
-                         additional_metrics=additional_metrics,
-                         strategy_name=strategy_name,
-                         stdout_log_level=stdout_log_level)
+        try:
+
+            super().__init__(cost_func=cost_func,
+                             greater_is_better=greater_is_better,
+                             trials_path=trials_path,
+                             backup_trials_freq=backup_trials_freq,
+                             cross_val_averaging_func=cross_val_averaging_func,
+                             additional_metrics=additional_metrics,
+                             strategy_name=strategy_name,
+                             stdout_log_level=stdout_log_level)
+
+            self._logger = Log("GridsearchPipelineSelector: ",
+                               stdout_log_level=stdout_log_level)
+
+            self._trials = self._trials or []
 
-        self._trials = self._trials or []
+        except Exception as e:
+            err = "Failed initialization. Exit with error: {}".format(e)
+
+            self._logger.log_and_raise_error(err)
 
     def run_trials(self):
         """
         """
         try:
-            assert(self.attached_space)
-        except AssertionError:
-            err = "Parameter distribution space must be attached"
-            self._logger.log_and_raise_error(err)
+            assert(self.attached_space),\
+                "Parameter distribution space must be attached"
+
+            done_trial_ids = [{"name": trial["name"],
+                               "params": trial["params"],
+                               "status": trial["status"]}
+                              for trial in self._trials]
 
-        done_trial_ids = [{"name": trial["name"],
-                           "params": trial["params"],
-                           "status": trial["status"]}
-                          for trial in self._trials]
+            # list (generator) of (flattened) dictionaries
+            # with all different combinations of
+            # parameters for different pipelines
+            # from the space definition.
+            space_unfolded = ({"name": pipeline_dist["name"],
+                               "pipeline": pipeline_dist["pipeline"],
+                               "params": param_set}
+                              for pipeline_dist in self._space
+                              for param_set in
+                              (dict(ChainMap(*tup)) for tup in
+                               product(*[[{k: v} for v in
+                                          pipeline_dist["params"][k]]
+                                         for k in pipeline_dist["params"]])))
 
-        # list (generator) of (flattened) dictionaries
-        # with all different combinations of
-        # parameters for different pipelines
-        # from the space definition.
-        space_unfolded = ({"name": pipeline_dist["name"],
-                           "pipeline": pipeline_dist["pipeline"],
-                           "params": param_set}
-                          for pipeline_dist in self._space
-                          for param_set in
-                          (dict(ChainMap(*tup)) for tup in
-                           product(*[[{k: v} for v in
-                                      pipeline_dist["params"][k]]
-                                     for k in pipeline_dist["params"]])))
+            for space_element in space_unfolded:
 
-        for space_element in space_unfolded:
+                trial_id = {"name": space_element["name"],
+                            "params": space_element["params"],
+                            "status": 'ok'}
 
-            trial_id = {"name": space_element["name"],
-                        "params": space_element["params"],
-                        "status": 'ok'}
+                if trial_id in done_trial_ids:
+                    continue
 
-            if trial_id in done_trial_ids:
-                continue
+                result = self._objective(space_element)
 
-            result = self._objective(space_element)
+                pipeline = space_element["pipeline"].set_params(
+                        **space_element["params"])
 
-            pipeline = space_element["pipeline"].set_params(
-                    **space_element["params"])
+                trial = {"name": space_element["name"],
+                         "params": space_element["params"],
+                         "pipeline": pipeline}
 
-            trial = {"name": space_element["name"],
-                     "params": space_element["params"],
-                     "pipeline": pipeline}
+                trial.update(result)
 
-            trial.update(result)
+                self._trials.append(trial)
 
-            self._trials.append(trial)
+            self.finished_tuning = True
 
-        self.finished_tuning = True
+            self.total_tuning_time = datetime.datetime.today()\
+                - self.start_tuning_time
 
-        self.total_tuning_time = datetime.datetime.today()\
-            - self.start_tuning_time
+            self._backup_trials()
 
-        self._backup_trials()
+        except Exception as e:
+            err = "Failed to run trials. Exit with error: {}".format(e)
+            self._logger.log_and_raise_error(err)
 
     @property
     def number_of_trials(self) -> int:
         """
         Number of trials already run in the current trials object
         """
-        if self._trials is None:
-            return 0
-        else:
-            return len(self._trials)
+        try:
+            if self._trials is None:
+                return 0
+            else:
+                return len(self._trials)
+
+        except Exception as e:
+            err = ("Failed to retrieve the number of trials. "
+                   "Exit with error: {}".format(e))
+
+            self._logger.log_and_raise_error(err)
 
     @property
     def best_trial(self) -> dict:
         """
         """
         try:
-            assert(self._trials is not None)
-        except AssertionError:
-            err = ("Trials object is empty. "
-                   "Call run_trials method.")
-            self._logger.log_and_raise_error(err)
-        try:
+            assert(self._trials is not None),\
+                ("Trials object is empty. "
+                 "Call run_trials method.")
+
             return max(self._trials, key=lambda x: x["score"])
+
         except Exception as e:
             err = ("Could not retrieve the best trial. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     @property
@@ -178,16 +197,16 @@ class GridSearchPipelineSelector(PipelineSelector):
         '''
         '''
         try:
-            assert(self._trials is not None)
-        except AssertionError:
-            err = ("Trials object is empty. "
-                   "Call run_trials method.")
-            self._logger.log_and_raise_error(err)
-        try:
+            assert(self._trials is not None),\
+                ("Trials object is empty. "
+                 "Call run_trials method.")
+
             return self.best_trial["score"]
+
         except Exception as e:
             err = ("Could not retrieve the best trial. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     @property
@@ -195,16 +214,16 @@ class GridSearchPipelineSelector(PipelineSelector):
         '''
         '''
         try:
-            assert(self._trials is not None)
-        except AssertionError:
-            err = ("Trials object is empty. "
-                   "Call run_trials method.")
-            self._logger.log_and_raise_error(err)
-        try:
+            assert(self._trials is not None),\
+                ("Trials object is empty. "
+                 "Call run_trials method.")
+
             return self.best_trial["score_variance"]
+
         except Exception as e:
             err = ("Could not retrieve the best trial. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     @property
@@ -212,17 +231,16 @@ class GridSearchPipelineSelector(PipelineSelector):
         '''
         '''
         try:
-            assert(self._trials is not None)
-        except AssertionError:
-            err = ("Trials object is empty. "
-                   "Call run_trials method.")
-            self._logger.log_and_raise_error(err)
-        try:
+            assert(self._trials is not None),\
+                ("Trials object is empty. "
+                 "Call run_trials method.")
+
             return self.best_trial["pipeline"]
 
         except Exception as e:
             err = ("Could not retrieve the best trial. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     def get_n_best_trial_pipelines(self, n: int) -> list:
@@ -231,18 +249,13 @@ class GridSearchPipelineSelector(PipelineSelector):
         best hyperparameters
         """
         try:
-            assert(isinstance(n, int))
-        except AssertionError:
-            err = "Parameter n must be an int"
-            self._logger.log_and_raise_error(err)
+            assert(isinstance(n, int)),\
+                "Parameter n must be an int"
+
+            assert(self._trials is not None),\
+                ("Trials object is empty. "
+                 "Call run_trials method.")
 
-        try:
-            assert(self._trials is not None)
-        except AssertionError:
-            err = ("Trials object is empty. "
-                   "Call run_trials method.")
-            self._logger.log_and_raise_error(err)
-        try:
             return [trial["pipeline"] for trial in
                     sorted(self._trials, key=lambda x: x["score"],
                            reverse=True)[:n]]
@@ -250,6 +263,7 @@ class GridSearchPipelineSelector(PipelineSelector):
         except Exception as e:
             err = ("Failed to retrieve n best trials. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     def get_n_best_trial_pipelines_of_each_type(self, n: int) -> list:
@@ -259,12 +273,13 @@ class GridSearchPipelineSelector(PipelineSelector):
         with corresponding hyperparameters
         """
         try:
-            assert(self._trials is not None)
-        except AssertionError:
-            err = ("Trials object is empty. "
-                   "Call run_trials method.")
-            self._logger.log_and_raise_error(err)
-        try:
+            assert(isinstance(n, int)),\
+                "Parameter n must be an int"
+
+            assert(self._trials is not None),\
+                ("Trials object is empty. "
+                 "Call run_trials method.")
+
             return pd.DataFrame(self._trials)\
                      .sort_values(by=["name", "score"],
                                   ascending=False)\
@@ -274,6 +289,7 @@ class GridSearchPipelineSelector(PipelineSelector):
         except Exception as e:
             err = ("Failed to retrieve n best trials of each type."
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     def trials_to_excel(self, path: str):
@@ -284,7 +300,14 @@ class GridSearchPipelineSelector(PipelineSelector):
         as well as additional information configured
         through self.save_result method.
         """
-        pd.DataFrame(self._trials).to_excel(path)
+        try:
+            pd.DataFrame(self._trials).to_excel(path)
+
+        except Exception as e:
+            err = ("Failed to write trials to excel. "
+                   "Exit with error: {}".format(e))
+
+            self._logger.log_and_raise_error(err)
 
 
 if __name__ == "__main__":

+ 87 - 63
cdplib/hyperopt/HyperoptPipelineSelector.py

@@ -94,16 +94,25 @@ class HyperoptPipelineSelector(PipelineSelector):
         :param str stdout_log_level: can be INFO, WARNING, ERROR
         """
 
-        super().__init__(cost_func=cost_func,
-                         greater_is_better=greater_is_better,
-                         trials_path=trials_path,
-                         backup_trials_freq=backup_trials_freq,
-                         cross_val_averaging_func=cross_val_averaging_func,
-                         additional_metrics=additional_metrics,
-                         strategy_name=strategy_name,
-                         stdout_log_level=stdout_log_level)
+        try:
+
+            super().__init__(cost_func=cost_func,
+                             greater_is_better=greater_is_better,
+                             trials_path=trials_path,
+                             backup_trials_freq=backup_trials_freq,
+                             cross_val_averaging_func=cross_val_averaging_func,
+                             additional_metrics=additional_metrics,
+                             strategy_name=strategy_name,
+                             stdout_log_level=stdout_log_level)
+
+            self._logger = Log("HyperoptPipelineSelector: ",
+                               stdout_log_level=stdout_log_level)
+
+            self._trials = self._trials or Trials()
 
-        self._trials = self._trials or Trials()
+        except Exception as e:
+            err = "Failed to intialize. Exit with error: {}".format(e)
+            self._logger.log_and_raise_error(err)
 
     def run_trials(self,
                    niter: int,
@@ -118,28 +127,19 @@ class HyperoptPipelineSelector(PipelineSelector):
             random search or random for random search
         '''
         try:
-            assert(self.attached_space)
-        except AssertionError:
-            err = ("Space must be attach to be able to "
-                   "retrieve this information.")
-            self._logger.log_and_raise_error(err)
+            assert(self.attached_space),\
+                ("Space must be attach to be able to "
+                 "retrieve this information.")
 
-        try:
-            assert(isinstance(niter, int))
-        except AssertionError:
-            err = "Parameter 'niter' must be of int type"
-            self._logger.log_and_raise_error(err, ErrorType=NameError)
+            assert(isinstance(niter, int)),\
+                "Parameter 'niter' must be of int type"
 
-        try:
             # right now only two algorithms are provided by hyperopt
-            assert(algo in [tpe.suggest, rand.suggest])
-        except AssertionError:
-            err = ("Parameter 'algo' can be now only tpe or random. "
-                   "If other algorithms have been developped by "
-                   "by hyperopt, plased add them to the list.")
-            self._logger.log_and_raise_error(err)
+            assert(algo in [tpe.suggest, rand.suggest]),\
+                ("Parameter 'algo' can be now only tpe or random. "
+                 "If other algorithms have been developped by "
+                 "by hyperopt, plased add them to the list.")
 
-        try:
             self._trials = self._trials or Trials()
 
             self._logger.info(("Starting {0} iterations of search "
@@ -183,6 +183,7 @@ class HyperoptPipelineSelector(PipelineSelector):
 
         try:
             return len(self._trials.trials)
+
         except Exception as e:
             err = ("Failed to retrieve the number of trials. "
                    "Exit with error {}".format(e))
@@ -198,15 +199,12 @@ class HyperoptPipelineSelector(PipelineSelector):
         After retrieving the space element,
             parameters of the pipeline are set.
         """
-        trial = deepcopy(trial)
-
         try:
-            assert(self.attached_space)
-        except AssertionError:
-            err = "Hyperparameter space not attached."
-            self._logger.log_and_raise_error(err)
+            trial = deepcopy(trial)
+
+            assert(self.attached_space),\
+                "Hyperparameter space not attached."
 
-        try:
             space_element = space_eval(self._space,
                                        {k: v[0] for k, v in
                                         trial['misc']['vals'].items()
@@ -224,6 +222,8 @@ class HyperoptPipelineSelector(PipelineSelector):
             err = ("Failed to retrieve a space element from a trial. "
                    "Exit with error: {}".format(e))
 
+            self._logger.log_and_raise_error(err)
+
     def _get_space_element_from_index(self, i: int) -> dict:
         """
         Gets the space element of shape
@@ -231,18 +231,16 @@ class HyperoptPipelineSelector(PipelineSelector):
         from the trial number i.
         """
         try:
-            assert(len(self._trials.trials) > i)
-        except AssertionError:
-            err = ("Trials object is not long enough "
-                   "to retrieve index {}".format(i))
-            self._logger.log_and_raise_error(err, ErrorType=NameError)
+            assert(len(self._trials.trials) > i),\
+                ("Trials object is not long enough "
+                 "to retrieve index {}".format(i))
 
-        try:
             return self._get_space_element_from_trial(self._trials.trials[i])
 
         except Exception as e:
             err = ("Failed to get space element from index. "
                    "Exit with error {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     def _get_pipeline_from_index(self, i: int) -> Pipeline:
@@ -257,6 +255,7 @@ class HyperoptPipelineSelector(PipelineSelector):
         except Exception as e:
             err = ("Failed to retrieve pipeline from index. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     @property
@@ -309,6 +308,7 @@ class HyperoptPipelineSelector(PipelineSelector):
         except Exception as e:
             err = ("Failed to retrieve best trial score. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     @property
@@ -324,6 +324,7 @@ class HyperoptPipelineSelector(PipelineSelector):
         except Exception as e:
             err = ("Failed to retrieve best trial score variance. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     @property
@@ -339,6 +340,7 @@ class HyperoptPipelineSelector(PipelineSelector):
         except Exception as e:
             err = ("Failed to retrieve best trial pipeline. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     def get_n_best_trial_pipelines(self, n: int) -> list:
@@ -347,6 +349,9 @@ class HyperoptPipelineSelector(PipelineSelector):
         documented in trials
         """
         try:
+            assert(isinstance(n, int)),\
+                "Parameter n must be an int"
+
             if len(self._trials.trials) == 0:
                 return []
             else:
@@ -360,6 +365,7 @@ class HyperoptPipelineSelector(PipelineSelector):
         except Exception as e:
             err = ("Failed to retrieve n best pipelines. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     def get_n_best_trial_pipelines_of_each_type(self, n: int) -> dict:
@@ -367,35 +373,53 @@ class HyperoptPipelineSelector(PipelineSelector):
         :return: a dictiionry where keys are pipeline names,
         and values are lists of best pipelines with this name
         """
-        scores = [trial["result"]["score"] for trial in self._trials.trials]
-
-        names = [self._get_space_element_from_trial(trial)["name"]
-                 for trial in self._trials.trials]
-
-        return pd.DataFrame({"name": names, "score": scores})\
-                 .sort_values(by=["name", "score"], ascending=False)\
-                 .groupby("name")\
-                 .head(n)\
-                 .reset_index()\
-                 .assign(pipeline=lambda x: x["index"]
-                         .apply(self._get_pipeline_from_index))\
-                 .groupby("name")["pipeline"]\
-                 .apply(lambda x: list(x))\
-                 .to_dict()
+        try:
+            assert(isinstance(n, int)),\
+                "Parameter n must be an int"
+
+            scores = [trial["result"]["score"]
+                      for trial in self._trials.trials]
+
+            names = [self._get_space_element_from_trial(trial)["name"]
+                     for trial in self._trials.trials]
+
+            return pd.DataFrame({"name": names, "score": scores})\
+                     .sort_values(by=["name", "score"], ascending=False)\
+                     .groupby("name")\
+                     .head(n)\
+                     .reset_index()\
+                     .assign(pipeline=lambda x: x["index"]
+                             .apply(self._get_pipeline_from_index))\
+                     .groupby("name")["pipeline"]\
+                     .apply(lambda x: list(x))\
+                     .to_dict()
+
+        except Exception as e:
+            err = ("Failed to get n best pipelines of each type. "
+                   "Exit with error: {}".format(e))
+
+            self._logger.log_and_raise_error(err)
 
     def trials_to_excel(self, path: str = None):
         """
         Saves an excel file with pipeline names, scores,
         parameters, and timestamps.
         """
-        results = [trial["result"] for trial in self._trials.trials]
+        try:
+            results = [trial["result"] for trial in self._trials.trials]
+
+            space_elements = [self._get_space_element_from_trial(trial)
+                              for trial in self._trials.trials]
 
-        space_elements = [self._get_space_element_from_trial(trial)
-                          for trial in self._trials.trials]
+            pd.DataFrame([{**result, **space_element}
+                          for result, space_element in
+                          zip(results, space_elements)]).to_excel(path)
 
-        pd.DataFrame([{**result, **space_element}
-                      for result, space_element in
-                      zip(results, space_elements)]).to_excel(path)
+        except Exception as e:
+            err = ("Failed to write trials to excel. "
+                   "Exit with error: {}".format(e))
+
+            self._logger.log_and_raise_error(err)
 
 
 if __name__ == '__main__':
@@ -406,8 +430,8 @@ if __name__ == '__main__':
     from sklearn.datasets import load_breast_cancer
     from cdplib.log import Log
     from cdplib.db_handlers import MongodbHandler
-    from cdplib.hyperopt.space_sample import space
-    # from cdplib.hyperopt.composed_space_sample import space
+    # from cdplib.hyperopt.space_sample import space
+    from cdplib.hyperopt.composed_space_sample import space
 
     trials_path = "hyperopt_trials_TEST.pkl"
     additional_metrics = {"precision": precision_score}

+ 4 - 2
cdplib/hyperopt/composed_space_sample.py

@@ -93,8 +93,10 @@ models = [
 
         # the default solver does not accept l1 penalty
         {"name": "lr",
-         "object": LogisticRegression(n_jobs=-1, random_state=33,
-                                      solver='liblinear'),
+         "object": LogisticRegression(random_state=33,
+                                      solver='liblinear',
+                                      # n_jobs=-1
+                                      ),
          "params":  {
            "penalty": hp.choice("lr__penalty", ["l1", "l2"]),
            "C": hp.uniform("lr__C", 0.1, 1000)}},

+ 194 - 208
cdplib/pipeline_selector/PipelineSelector.py

@@ -97,104 +97,111 @@ class PipelineSelector(ABC):
 
         :param str stdout_log_level: can be INFO, WARNING, ERROR
         """
-        self._logger = Log("PipelineSelector: ",
-                           stdout_log_level=stdout_log_level)
-
-        input_errors = [(cost_func, Callable,
-                         "Parameter 'cost_func' must be a Callable"),
-                        (greater_is_better, bool,
-                         "Parameter 'greater_is_better' must be bool type"),
-                        (trials_path, str,
-                         "Parameter 'trials_path' must be of string type"),
-                        (cross_val_averaging_func, (Callable, None.__class__),
-                         ("Parameter 'cross_val_averaging_func'"
-                          "must be a Callable")),
-                        (backup_trials_freq, (int, None.__class__),
-                         "Parameter backup_trials_freq must be an int"),
-                        (additional_metrics, (dict, None.__class__),
-                         "Parameter additional_metrics must be a dict"),
-                        (strategy_name, (str, None.__class__),
-                         "Parameter strategy_name must be a str"),
-                        (stdout_log_level, str,
-                         "Parameter stdout_log_level must be a str")]
-
-        for p, t, err in input_errors:
-            try:
-                assert(isinstance(p, t))
-            except AssertionError:
-                self._logger.log_and_raise_error(err, ErrorType=NameError)
-
         try:
+
+            self._logger = Log("PipelineSelector: ",
+                               stdout_log_level=stdout_log_level)
+
+            input_errors = [
+                    (cost_func, Callable,
+                     "Parameter 'cost_func' must be a Callable"),
+                    (greater_is_better, bool,
+                     "Parameter 'greater_is_better' must be bool type"),
+                    (trials_path, str,
+                     "Parameter 'trials_path' must be of string type"),
+                    (cross_val_averaging_func, (Callable, None.__class__),
+                     ("Parameter 'cross_val_averaging_func'"
+                      "must be a Callable")),
+                    (backup_trials_freq, (int, None.__class__),
+                     "Parameter backup_trials_freq must be an int"),
+                    (additional_metrics, (dict, None.__class__),
+                     "Parameter additional_metrics must be a dict"),
+                    (strategy_name, (str, None.__class__),
+                     "Parameter strategy_name must be a str"),
+                    (stdout_log_level, str,
+                     "Parameter stdout_log_level must be a str")]
+
+            for p, t, err in input_errors:
+                assert((isinstance(p, t))), err
+
             assert((additional_metrics is None) or
                    all([isinstance(metric, Callable)
-                        for metric in additional_metrics.values()]))
-        except AssertionError:
-            err = "Metrics in additional_metrics must be Callables"
-            self._logger.log_and_raise_error(err, ErrorType=NameError)
-
-        ExceptionsHandler(self._logger).assert_is_directory(path=trials_path)
-
-        self.attached_space = False
-        self.attached_data = False
-        self.configured_cross_validation = False
-        self.configured_summary_saving = False
-
-        self._cost_func = cost_func
-        # score factor is 1 when cost_func is minimized,
-        # -1 when cost func is maximized
-        self._score_factor = (not greater_is_better) - greater_is_better
-        self.trials_path = trials_path
-        self._backup_trials_freq = backup_trials_freq
-        self._cross_val_averaging_func = cross_val_averaging_func or np.mean
-        self._additional_metrics = additional_metrics or {}
-        self._strategy_name = strategy_name
-        self._data_path = None
-        self._cv_path = None
-
-        # best_score can be also read from trials
-        # but is kept explicitely in order not to
-        # search through the trials object every time
-        # loss is the opposite of score
-        self.best_score = np.nan
-
-        self._cross_validation = sklearn_cross_validation
-
-        # if a trials object already exists at the given path,
-        # it is loaded and the search is continued. Else,
-        # the search is started from the beginning.
-        if os.path.isfile(self.trials_path):
-            try:
-                with open(self.trials_path, "rb") as f:
-                    self._trials = pickle.load(f)
-
-                self._start_iteration = self.number_of_trials
-
-                self.best_score = self.best_trial_score
-
-                self._logger.info(("Loaded an existing trials object"
-                                   "Consisting of {} trials")
-                                  .format(self._start_iteration))
-
-            except Exception as e:
-                err = ("Trials object could not be loaded. "
-                       "Exit with error {}").format(e)
-                self._logger.log_and_raise_error(err)
+                        for metric in additional_metrics.values()])),\
+                "Metrics in additional_metrics must be Callables"
+
+            ExceptionsHandler(self._logger)\
+                .assert_is_directory(path=trials_path)
+
+            self.attached_space = False
+            self.attached_data = False
+            self.configured_cross_validation = False
+            self.configured_summary_saving = False
+
+            self._cost_func = cost_func
+            # score factor is 1 when cost_func is minimized,
+            # -1 when cost func is maximized
+            self._score_factor = (not greater_is_better) - greater_is_better
+            self.trials_path = trials_path
+            self._backup_trials_freq = backup_trials_freq
+            self._cross_val_averaging_func = cross_val_averaging_func\
+                or np.mean
+            self._additional_metrics = additional_metrics or {}
+            self._strategy_name = strategy_name
+            self._data_path = None
+            self._cv_path = None
+
+            # best_score can be also read from trials
+            # but is kept explicitely in order not to
+            # search through the trials object every time
+            # loss is the opposite of score
+            self.best_score = np.nan
+
+            # if cross-valition is not configured,
+            # sklearn cross-validation method is taken by default
+            self._cross_validation = sklearn_cross_validation
+
+            # if a trials object already exists at the given path,
+            # it is loaded and the search is continued. Else,
+            # the search is started from the beginning.
+            if os.path.isfile(self.trials_path):
+                try:
+                    with open(self.trials_path, "rb") as f:
+                        self._trials = pickle.load(f)
+
+                    self._start_iteration = self.number_of_trials
+
+                    self.best_score = self.best_trial_score
+
+                    self._logger.info(("Loaded an existing trials object"
+                                       "Consisting of {} trials")
+                                      .format(self._start_iteration))
+
+                except Exception as e:
+                    err = ("Trials object could not be loaded. "
+                           "Exit with error {}").format(e)
+                    self._logger.log_and_raise_error(err)
+                    self._trials = None
+
+            else:
+                self._logger.warning(("No existing trials object was found, "
+                                      "Starting from scratch."))
+
                 self._trials = None
+                self._start_iteration = 0
 
-        else:
-            self._logger.warning(("No existing trials object was found, "
-                                  "Starting from scratch."))
+            # keeping track of the current search iteration
+            self._iteration = self._start_iteration
+            self._score_improved = False
 
-            self._trials = None
-            self._start_iteration = 0
+            self.start_tuning_time = datetime.datetime.today()
+            self.total_tuning_time = None
+            self.finished_tuning = False
 
-        # keeping track of the current search iteration
-        self._iteration = self._start_iteration
-        self._score_improved = False
+        except Exception as e:
+            err = ("Failed to initialize the class. "
+                   "Exit with error: {}".format(e))
 
-        self.start_tuning_time = datetime.datetime.today()
-        self.total_tuning_time = None
-        self.finished_tuning = False
+            self._logger.log_and_raise_error(err)
 
     def _backup_trials(self):
         '''
@@ -204,6 +211,7 @@ class PipelineSelector(ABC):
         try:
             with open(self.trials_path, "wb") as f:
                 pickle.dump(self._trials, f)
+
         except Exception as e:
             err = "Could not backup trials. Exit with error: {}".format(e)
             self._logger.log_and_raise_error(err)
@@ -217,19 +225,14 @@ class PipelineSelector(ABC):
              signature as sklearn.model_selection.cross_validate
         """
         try:
-            assert(isinstance(cross_validation, Callable))
-        except AssertionError:
-            err = "Parameter cross_validation must be a function"
-            self._logger.log_and_raise_error(err, ErrorType=NameError)
+            assert(isinstance(cross_validation, Callable)),\
+                "Parameter cross_validation must be a function"
 
-        try:
             kwargs = kwargs or {}
-            assert(isinstance(kwargs, dict))
-        except AssertionError:
-            err = "Paramter kwargs must be a dict"
-            self._logger.log_and_raise_error(err, ErrorType=NameError)
 
-        try:
+            assert(isinstance(kwargs, dict)),\
+                "Paramter kwargs must be a dict"
+
             self._cross_validation = functools.partial(
                     self._cross_validation, **kwargs)
 
@@ -244,6 +247,7 @@ class PipelineSelector(ABC):
         except Exception as e:
             err = ("Failed to configure cross-validation. "
                    "Exit with error: {}".format(e))
+
             self._logger.log_and_raise_error(err)
 
     def configure_cross_validation_from_module(self,
@@ -258,12 +262,9 @@ class PipelineSelector(ABC):
         """
         try:
             assert(isinstance(module_path, str) and
-                   isinstance(name, str))
-        except AssertionError:
-            err = "Parameters module_path and name must be of str type"
-            self._logger.log_and_raise_error(err, ErrorType=NameError)
+                   isinstance(name, str)),\
+                   "Parameters module_path and name must be of str type"
 
-        try:
             self._cross_validation = \
                 LoadingUtils().load_from_module(
                         module_path=module_path, name=name)
@@ -277,7 +278,8 @@ class PipelineSelector(ABC):
         except Exception as e:
             err = ("Failed to load cross-validation from module. "
                    "Exit with error: {}".format(e))
-            self._logger.log_and_raise_error(e)
+
+            self._logger.log_and_raise_error(err)
 
     def attach_space(self, space):
         """
@@ -287,9 +289,16 @@ class PipelineSelector(ABC):
             the elements of which are dictionaries with keys:
             name, pipeline, params
         """
-        self._space = space
-        self._logger.info("Attached parameter distribution space")
-        self.attached_space = True
+        try:
+            self._space = space
+            self._logger.info("Attached parameter distribution space")
+            self.attached_space = True
+
+        except Exception as e:
+            err = ("Failed to attach space. "
+                   "Exit with error: {}".format(e))
+
+            self._logger.log_and_raise_error(err)
 
     def attach_space_from_module(self, module_path: str, name: str):
         """
@@ -301,21 +310,20 @@ class PipelineSelector(ABC):
         """
         try:
             assert(isinstance(module_path, str) and
-                   isinstance(name, str))
-        except AssertionError:
-            err = "Parameters module_path and name must be of str type"
-            self._logger.log_and_raise_error(err, ErrorType=NameError)
+                   isinstance(name, str)),\
+                   "Parameters module_path and name must be of str type"
 
-        try:
             self._space = LoadingUtils().load_from_module(
                     module_path=module_path, name=name)
 
             self._logger.info("Attached parameter distribution space")
 
             self.attached_space = True
+
         except Exception as e:
             err = ("Failed to attach space from module. "
                    "Exit with error {}".format(e))
+
             self._logger.loger_and_raise_error(err)
 
     def attach_data(self, X_train: (pd.DataFrame, np.ndarray),
@@ -342,22 +350,19 @@ class PipelineSelector(ABC):
             during cross-validation
             example: [([0,1,2], [3,4]), ([1,2,3], [4,5])]
         '''
-        NoneType = None.__class__
+        try:
+            NoneType = None.__class__
 
-        input_err = "Non-valid combination of train and val data types"
+            input_err = "Non-valid combination of train and val data types"
 
-        if cv is None:
-            try:
+            if cv is None:
                 assert(isinstance(X_train, (pd.DataFrame, np.ndarray)) and
                        isinstance(X_val, (pd.DataFrame, np.ndarray)) and
                        isinstance(y_train, (pd.Series, np.ndarray,
                                             pd.DataFrame, NoneType)) and
                        isinstance(y_val, (pd.Series, np.ndarray)) and
-                       (y_val is None) == (y_train is None))
-            except AssertionError:
-                self._logger.log_and_raise_error(input_err)
+                       (y_val is None) == (y_train is None)), input_err
 
-            try:
                 # cost is evaluated with a cross validation function
                 # that accepts an array and a cv object with
                 # indices of the fold splits.
@@ -373,25 +378,24 @@ class PipelineSelector(ABC):
                 self._y = None if y_train is None\
                     else np.concatenate([y_train, y_val])
 
-            except Exception as e:
-                err = "Failed to attach data. Exit with error: {}".format(e)
-                self._logger.log_and_raise_error(err)
-
-        else:
-            try:
+            else:
                 assert(isinstance(X_train, (pd.DataFrame, np.ndarray)) and
                        isinstance(y_train, (pd.Series, np.ndarray,
                                             pd.DataFrame, NoneType)) and
-                       (X_val is None) and (y_val is None))
-            except AssertionError:
-                self._logger.log_and_raise_error(input_err)
+                       (X_val is None) and (y_val is None)), input_err
+
+                self._cv = cv
+                self._X = X_train
+                self._y = y_train
+
+            self._logger.info("Attached data")
+            self.attached_data = True
 
-            self._cv = cv
-            self._X = X_train
-            self._y = y_train
+        except Exception as e:
+            err = ("Failed to attach data. "
+                   "Exit with error: {}".format(e))
 
-        self._logger.info("Attached data")
-        self.attached_data = True
+            self._logger.log_and_raise_error(err)
 
     def attach_data_from_hdf5(self,
                               data_hdf5_store_path: str,
@@ -412,48 +416,38 @@ class PipelineSelector(ABC):
             the cv data
         """
         try:
-            assert(os.path.isfile(data_hdf5_store_path))
-        except AssertionError:
-            err = "Parameter hdf5_store_path is not a file"
-            self._logger.log_and_raise_error(err, ErrorType=NameError)
+            assert(os.path.isfile(data_hdf5_store_path)),\
+                "Parameter hdf5_store_path is not a file"
 
-        # load the hdf5 store
-        try:
             store = pd.HDFStore(data_hdf5_store_path)
+
             self._data_path = data_hdf5_store_path
-        except Exception as e:
-            err = "Could not load the hdf5 store. Exit with error: {}."\
-                .format(e)
-            self._logger.log_and_raise_error(err)
 
-        data_input = {}
+            data_input = {}
 
-        for key in ["/X_train", "/y_train", "/X_val", "/y_val"]:
-            if key not in store.keys():
-                data_input[key.replace("/", "")] = None
-            else:
-                data_input[key.replace("/", "")] = store[key]
+            for key in ["/X_train", "/y_train", "/X_val", "/y_val"]:
+                if key not in store.keys():
+                    data_input[key.replace("/", "")] = None
+                else:
+                    data_input[key.replace("/", "")] = store[key]
 
-        if cv_pickle_path is not None:
-            try:
-                assert(os.path.isfile(cv_pickle_path))
-            except AssertionError:
-                err = "Parameter hdf5_store_path is not a file"
-                self._logger.log_and_raise_error(err, ErrorType=NameError)
+            if cv_pickle_path is not None:
+                assert(os.path.isfile(cv_pickle_path)),\
+                    "Parameter cv_pickle_path is not a file"
 
-            try:
                 data_input["cv"] = pickle.load(open(cv_pickle_path, "rb"))
                 self._cv_path = cv_pickle_path
-            except Exception as e:
-                err = "Could not load the pickeled cv. Exit with error: {}."\
-                    .format(e)
-                self._logger.log_and_raise_error(err)
-        else:
-            data_input["cv"] = None
 
-        self.attach_data(**data_input)
+            else:
+                data_input["cv"] = None
+
+            self.attach_data(**data_input)
 
-        store.close()
+            store.close()
+
+        except Exception as e:
+            err = "Failed to attach data. Exit with error: {}".format(e)
+            self._logger.log_and_raise_error(err)
 
     def configer_summary_saving(self,
                                 save_method: Callable = None,
@@ -506,12 +500,9 @@ class PipelineSelector(ABC):
         """
         """
         try:
-            assert(self.configured_summary_saving)
-        except AssertionError:
-            err = "Result saving must be configured first"
-            self._logger.log_and_raise_error(err, ErrorType=AssertionError)
+            assert(self.configured_summary_saving),\
+                "Result saving must be configured first"
 
-        try:
             self._save_method(summary)
 
         except Exception as e:
@@ -608,59 +599,54 @@ class PipelineSelector(ABC):
         '''
         try:
             assert(isinstance(space_element, dict) and
-                   set(['name', 'pipeline', 'params']) <= space_element.keys())
+                   set(['name', 'pipeline', 'params'])
+                   <= space_element.keys()),\
+                 "Space elements are of wrong form"
 
             assert(isinstance(space_element['name'], str) and
                    isinstance(space_element['pipeline'], Pipeline) and
-                   isinstance(space_element['params'], dict))
+                   isinstance(space_element['params'], dict)),\
+                "Space elements are of wrong form"
 
-        except AssertionError:
-            err = "Space elements are of wrong form"
-            self._logger.log_and_raise_error(err)
-
-        start_time = time.time()
+            start_time = time.time()
 
-        try:
-            assert(self.attached_data)
-        except AssertionError:
-            err = ("Data must be attached in order "
-                   "in order to effectuate the best"
-                   "pipeline search")
-            self._logger.log_and_raise_error(err)
+            assert(self.attached_data),\
+                ("Data must be attached in order "
+                 "in order to effectuate the best"
+                 "pipeline search")
 
-        summary = {}
+            summary = {}
 
-        if self._strategy_name is not None:
-            summary["strategy_name"] = self._strategy_name
+            if self._strategy_name is not None:
+                summary["strategy_name"] = self._strategy_name
 
-        if isinstance(self._cost_func, str):
-            summary["cost_func"] = self._cost_func
+            if isinstance(self._cost_func, str):
+                summary["cost_func"] = self._cost_func
 
-        elif hasattr(self._cost_func, "__name__"):
-            summary["cost_func"] = self._cost_func.__name__
+            elif hasattr(self._cost_func, "__name__"):
+                summary["cost_func"] = self._cost_func.__name__
 
-        summary["trials_path"] = self.trials_path
+            summary["trials_path"] = self.trials_path
 
-        if self._data_path is not None:
-            summary["data_path"] = self._data_path
+            if self._data_path is not None:
+                summary["data_path"] = self._data_path
 
-        if self._cv_path is not None:
-            summary["cv_path"] = self._cv_path
+            if self._cv_path is not None:
+                summary["cv_path"] = self._cv_path
 
-        summary["start_tuning_time"] = self.start_tuning_time
+            summary["start_tuning_time"] = self.start_tuning_time
 
-        summary["iteration"] = self._iteration
+            summary["iteration"] = self._iteration
 
-        backup_cond = (self._backup_trials_freq is not None) and\
-            ((self._iteration - self._start_iteration - 1) %
-             self._backup_trials_freq == 0) or\
-            self._score_improved
+            backup_cond = (self._backup_trials_freq is not None) and\
+                ((self._iteration - self._start_iteration - 1) %
+                 self._backup_trials_freq == 0) or\
+                self._score_improved
 
-        if backup_cond:
-            self._backup_trials()
-            self._score_improved = False
+            if backup_cond:
+                self._backup_trials()
+                self._score_improved = False
 
-        try:
             pipeline = space_element['pipeline']
             params = space_element['params']
             pipeline.set_params(**params)