#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Sep 30 13:54:04 2020 @author: tanya @description: a class that from a given list of pipeline steps composes a space to be passed in the GridsearchPipelineSelector or HyperoptPipelineSelector classes. A classic list of steps would be: [encoders, transformers, selectors, models] """ from sklearn.pipeline import Pipeline from hyperopt import hp from itertools import product class SpaceComposer: """ A class that from a given list of pipeline steps composes a space to be passed to GridsearchPipelineSelector or HyperoptPipelineSelector. """ def compose_gridsearch_space(self, step_list: list) -> list: """ Composes a hyperparameter space for input to the GridsearchPipelineSelector class. :param step_list: a classic list of steps would be [encoders, transformers, selectors, models], where, for example, selectors is a list of sklearn feature selectors, each selector given as a dict: for example {"name": "kbest", "object": SelectPercentile(), "params": { "percentile": [5, 10, 20], "score_func": [f_classif, chi2, mutual_info_classif]}} :return: a list of dictionaries of form {"name": NAME, "pipeline": PIPELINE, "params": PARAMS} """ space = [] step_combinations = product(*[step for step in step_list if len(step) > 0]) for step_combination in step_combinations: space_element = {} space_element["name"] = "_".join([step["name"] for step in step_combination]) space_element["pipeline"] = Pipeline( [(step["name"], step["object"]) for step in step_combination]) space_element["params"] =\ {step["name"] + "__" + param_name: param_dist for step in step_combination for param_name, param_dist in step["params"].items()} space.append(space_element) return space def compose_hyperopt_space(self, step_list: list) -> hp.choice: """ Composes a hyperopt space from a list of steps. A classic list of steps would be [encoders, transformers, selectors, models], where, for example, selectors is a list of sklearn feature selectors, each selector given as a dict: for example {"name": "kbest", "object": SelectPercentile(), "params": { "percentile": 3 + hp.randint("kbest__percentile", 200), "score_func": hp.choice("kbest__score_func", [f_classif, chi2, mutual_info_classif])}} """ return hp.choice("pipelines", self.compose_gridsearch_space(step_list))