лет назад: 3 · 232d4f35cf
--- a/cdplib/hyperopt/SpaceComposer.py
+++ b/cdplib/hyperopt/SpaceComposer.py
@@ -0,0 +1,47 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Wed Sep 30 13:54:04 2020
			
 
				+
			
 
				+@author: tanya
			
 
				+@description: a function that from a given list of pipeline steps
			
 
				+ composes a space to be passed in the HyperoptPipelineSelection class.
			
 
				+ A classic list of steps would be: [encoders, transformers, selectors, models]
			
 
				+"""
			
 
				+from sklearn.pipeline import Pipeline
			
 
				+from hyperopt import hp
			
 
				+from itertools import product
			
 
				+
			
 
				+
			
 
				+def space_composer(step_list: list) -> hp.choice:
			
 
				+    """
			
 
				+    :param step_list: list of pipeline steps
			
 
				+     of the form [encoders, transformers, selectors, models]
			
 
				+     each element of step_list is a list of dictionaries
			
 
				+     of the form {"name": NAME, "object": OBJECT, "params": PARAMS}
			
 
				+    :return: hp.choice object of pipelines to choose from
			
 
				+     when passed to the HyperoptPipelineSelection class
			
 
				+    """
			
 
				+
			
 
				+    pipelines = []
			
 
				+
			
 
				+    step_combinations = product(*[step for step in
			
 
				+                                  step_list if len(step) > 0])
			
 
				+
			
 
				+    for step_combination in step_combinations:
			
 
				+
			
 
				+        pipeline_dist = {}
			
 
				+
			
 
				+        pipeline_dist["name"] = "_".join([step["name"]
			
 
				+                                          for step in step_combination])
			
 
				+        pipeline_dist["pipeline"] = Pipeline([(step["name"], step["object"])
			
 
				+                                              for step in step_combination]),
			
 
				+
			
 
				+        pipeline_dist["params"] = {step["name"] + "__" + param_name: param_dist
			
 
				+                                   for step in step_combination
			
 
				+                                   for param_name, param_dist
			
 
				+                                   in step["params"].items()}
			
 
				+
			
 
				+        pipelines.append(pipeline_dist)
			
 
				+
			
 
				+    return hp.choice("pipelines", pipelines)
			
--- a/cdplib/hyperopt/space_SAMPLE.py
+++ b/cdplib/hyperopt/space_SAMPLE.py
@@ -0,0 +1,107 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Wed Sep 30 13:58:39 2020
			
 
				+
			
 
				+@author: tanya
			
 
				+@description: a sample space of scikit learn pipelines
			
 
				+ to pass to the HyperoptPipelineSelection class
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+from sklearn.ensemble import RandomForestClassifier
			
 
				+from sklearn.feature_selection import SelectFromModel, SelectKBest,\
			
 
				+    RFE, SelectFpr, f_classif, chi2, mutual_info_classif
			
 
				+from xgboost import XGBRFClassifier
			
 
				+from sklearn.svm import SVC
			
 
				+from sklearn.linear_model import LogisticRegression
			
 
				+from sklearn.decomposition import PCA
			
 
				+from hyperopt import hp
			
 
				+
			
 
				+from cdplib.hyperopt.SpaceComposer import space_composer
			
 
				+
			
 
				+encoders = []
			
 
				+
			
 
				+transformers = []
			
 
				+
			
 
				+selectors = [
			
 
				+    {"name": "kbest",
			
 
				+     "object": SelectKBest(),
			
 
				+     "params": {
			
 
				+       "k": 3 + hp.randint("kbest__k", 200),
			
 
				+       "score_func": hp.choice("kbest__score_func",
			
 
				+                               [f_classif, chi2, mutual_info_classif])}},
			
 
				+
			
 
				+    {"name": "fpr",
			
 
				+     "object": SelectFpr(),
			
 
				+     "params": {
			
 
				+        "score_func": hp.choice("fpr__score_func",
			
 
				+                                [f_classif, chi2, mutual_info_classif]),
			
 
				+        "alpha": hp.uniform("fpr__alpha", 0.1, 0.6)}},
			
 
				+
			
 
				+    {"name": "rfe_rf",
			
 
				+     "object":
			
 
				+         RFE(estimator=RandomForestClassifier(n_jobs=-1, random_state=33)),
			
 
				+     "params": {
			
 
				+         "n_features_to_select":
			
 
				+             3 + hp.randint("rfe_rf__n_features_to_select", 200),
			
 
				+         "estimator__n_estimators":
			
 
				+             20 + hp.randint("rfe_rf__estimator__n_estimators", 70)}},
			
 
				+
			
 
				+    {"name": "rfm_rf",
			
 
				+     "object":
			
 
				+         SelectFromModel(estimator=RandomForestClassifier(n_jobs=-1,
			
 
				+                                                          random_state=33)),
			
 
				+     "params": {
			
 
				+         "estimator__n_estimators":
			
 
				+             20 + hp.randint("rfm_rf__estimator__n_estimators", 70)}},
			
 
				+
			
 
				+    {"name": "rfm_lr",
			
 
				+     "object":
			
 
				+         SelectFromModel(estimator=LogisticRegression(n_jobs=-1,
			
 
				+                                                      random_state=33)),
			
 
				+     "params": {
			
 
				+          "estimator__C": hp.uniform("rfm_lr__estimator__C", 0.1, 1000)}},
			
 
				+
			
 
				+    {"name": "pca",
			
 
				+     "object": PCA(random_state=33),
			
 
				+     "params": {
			
 
				+       "n_components": 3 + hp.randint("pca__n_components", 20)
			
 
				+       }}
			
 
				+    ]
			
 
				+
			
 
				+models = [
			
 
				+        {"name": "xgb",
			
 
				+         "object": XGBRFClassifier(n_jobs=-1, eval_metric="map", seed=33),
			
 
				+         "params": {
			
 
				+           "n_estimators": 50 + hp.randint('xgb__n_estimators', 100),
			
 
				+           "max_depth": 3 + hp.randint("xgb__max_depth", 10),
			
 
				+           "learning_rate": hp.loguniform("xgb__learning_rate", 0.01, 0.5)
			
 
				+           }},
			
 
				+
			
 
				+        {"name": "rf",
			
 
				+         "object": RandomForestClassifier(n_jobs=-1, random_state=33),
			
 
				+         "params": {
			
 
				+           "n_estimators": 50 + hp.randint('rf__n_estimators', 500),
			
 
				+           "max_depth": 3 + hp.randint("rf__max_depth", 10),
			
 
				+           "min_samples_leaf": 1 + hp.randint("rf__min_samples_leaf", 10)
			
 
				+           }},
			
 
				+
			
 
				+        {"name": "lr",
			
 
				+         "object": LogisticRegression(n_jobs=-1, random_state=33),
			
 
				+         "params":  {
			
 
				+           "penalty": hp.choice("lr__penalty", ["l1", "l2"]),
			
 
				+           "C": hp.uniform("lr__C", 0.1, 1000)}},
			
 
				+
			
 
				+        {"name": "svc",
			
 
				+         "object": SVC(random_state=33),
			
 
				+         "params": {
			
 
				+            "kernel": hp.choice("svc__kernel", ["linear", "poly", "rbf"]),
			
 
				+            "degree": 2 + hp.randint("svc__degree", 3),
			
 
				+            "C": hp.uniform("svc__C", 0.1, 1000)
			
 
				+            }}
			
 
				+        ]
			
 
				+
			
 
				+step_list = [encoders, transformers, selectors, models]
			
 
				+
			
 
				+space = space_composer(step_list)