|
@@ -0,0 +1,85 @@
|
|
|
+#!/usr/bin/env python3
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+"""
|
|
|
+Created on Wed Sep 30 13:54:04 2020
|
|
|
+
|
|
|
+@author: tanya
|
|
|
+@description: a class that from a given list of pipeline steps
|
|
|
+ composes a space to be passed in the GridsearchPipelineSelector
|
|
|
+ or HyperoptPipelineSelector classes.
|
|
|
+ A classic list of steps would be: [encoders, transformers, selectors, models]
|
|
|
+"""
|
|
|
+from sklearn.pipeline import Pipeline
|
|
|
+from hyperopt import hp
|
|
|
+from itertools import product
|
|
|
+
|
|
|
+
|
|
|
+class SpaceComposer:
|
|
|
+ """
|
|
|
+ A class that from a given list of pipeline steps
|
|
|
+ composes a space to be passed to GridsearchPipelineSelector
|
|
|
+ or HyperoptPipelineSelector.
|
|
|
+ """
|
|
|
+ def compose_gridsearch_space(self, step_list: list) -> list:
|
|
|
+ """
|
|
|
+ Composes a hyperparameter space for input to the
|
|
|
+ GridsearchPipelineSelector class.
|
|
|
+
|
|
|
+ :param step_list: a classic list of steps would be
|
|
|
+ [encoders, transformers, selectors, models],
|
|
|
+ where, for example, selectors is a list
|
|
|
+ of sklearn feature selectors, each selector given as a dict:
|
|
|
+ for example {"name": "kbest",
|
|
|
+ "object": SelectPercentile(),
|
|
|
+ "params": {
|
|
|
+ "percentile":
|
|
|
+ [5, 10, 20],
|
|
|
+ "score_func":
|
|
|
+ [f_classif, chi2, mutual_info_classif]}}
|
|
|
+
|
|
|
+ :return: a list of dictionaries of form
|
|
|
+ {"name": NAME, "pipeline": PIPELINE, "params": PARAMS}
|
|
|
+ """
|
|
|
+ space = []
|
|
|
+
|
|
|
+ step_combinations = product(*[step for step in
|
|
|
+ step_list if len(step) > 0])
|
|
|
+
|
|
|
+ for step_combination in step_combinations:
|
|
|
+
|
|
|
+ space_element = {}
|
|
|
+
|
|
|
+ space_element["name"] = "_".join([step["name"]
|
|
|
+ for step in step_combination])
|
|
|
+
|
|
|
+ space_element["pipeline"] = Pipeline(
|
|
|
+ [(step["name"], step["object"])
|
|
|
+ for step in step_combination])
|
|
|
+
|
|
|
+ space_element["params"] =\
|
|
|
+ {step["name"] + "__" + param_name: param_dist
|
|
|
+ for step in step_combination
|
|
|
+ for param_name, param_dist
|
|
|
+ in step["params"].items()}
|
|
|
+
|
|
|
+ space.append(space_element)
|
|
|
+
|
|
|
+ return space
|
|
|
+
|
|
|
+ def compose_hyperopt_space(self, step_list: list) -> hp.choice:
|
|
|
+ """
|
|
|
+ Composes a hyperopt space from a list of steps.
|
|
|
+ A classic list of steps would be
|
|
|
+ [encoders, transformers, selectors, models],
|
|
|
+ where, for example, selectors is a list
|
|
|
+ of sklearn feature selectors, each selector given as a dict:
|
|
|
+ for example {"name": "kbest",
|
|
|
+ "object": SelectPercentile(),
|
|
|
+ "params": {
|
|
|
+ "percentile":
|
|
|
+ 3 + hp.randint("kbest__percentile", 200),
|
|
|
+ "score_func":
|
|
|
+ hp.choice("kbest__score_func",
|
|
|
+ [f_classif, chi2, mutual_info_classif])}}
|
|
|
+ """
|
|
|
+ return hp.choise("pipelines", self.compose_gridsearch_space(step_list))
|