|
@@ -1,107 +0,0 @@
|
|
|
-
|
|
|
-
|
|
|
-"""
|
|
|
-Created on Wed Sep 30 13:58:39 2020
|
|
|
-
|
|
|
-@author: tanya
|
|
|
-@description: a sample space of scikit learn pipelines
|
|
|
- to pass to the HyperoptPipelineSelection class
|
|
|
-
|
|
|
-"""
|
|
|
-
|
|
|
-from sklearn.ensemble import RandomForestClassifier
|
|
|
-from sklearn.feature_selection import SelectFromModel, SelectKBest,\
|
|
|
- RFE, SelectFpr, f_classif, chi2, mutual_info_classif
|
|
|
-from xgboost import XGBRFClassifier
|
|
|
-from sklearn.svm import SVC
|
|
|
-from sklearn.linear_model import LogisticRegression
|
|
|
-from sklearn.decomposition import PCA
|
|
|
-from hyperopt import hp
|
|
|
-
|
|
|
-from cdplib.hyperopt.SpaceComposer import space_composer
|
|
|
-
|
|
|
-encoders = []
|
|
|
-
|
|
|
-transformers = []
|
|
|
-
|
|
|
-selectors = [
|
|
|
- {"name": "kbest",
|
|
|
- "object": SelectKBest(),
|
|
|
- "params": {
|
|
|
- "k": 3 + hp.randint("kbest__k", 200),
|
|
|
- "score_func": hp.choice("kbest__score_func",
|
|
|
- [f_classif, chi2, mutual_info_classif])}},
|
|
|
-
|
|
|
- {"name": "fpr",
|
|
|
- "object": SelectFpr(),
|
|
|
- "params": {
|
|
|
- "score_func": hp.choice("fpr__score_func",
|
|
|
- [f_classif, chi2, mutual_info_classif]),
|
|
|
- "alpha": hp.uniform("fpr__alpha", 0.1, 0.6)}},
|
|
|
-
|
|
|
- {"name": "rfe_rf",
|
|
|
- "object":
|
|
|
- RFE(estimator=RandomForestClassifier(n_jobs=-1, random_state=33)),
|
|
|
- "params": {
|
|
|
- "n_features_to_select":
|
|
|
- 3 + hp.randint("rfe_rf__n_features_to_select", 200),
|
|
|
- "estimator__n_estimators":
|
|
|
- 20 + hp.randint("rfe_rf__estimator__n_estimators", 70)}},
|
|
|
-
|
|
|
- {"name": "rfm_rf",
|
|
|
- "object":
|
|
|
- SelectFromModel(estimator=RandomForestClassifier(n_jobs=-1,
|
|
|
- random_state=33)),
|
|
|
- "params": {
|
|
|
- "estimator__n_estimators":
|
|
|
- 20 + hp.randint("rfm_rf__estimator__n_estimators", 70)}},
|
|
|
-
|
|
|
- {"name": "rfm_lr",
|
|
|
- "object":
|
|
|
- SelectFromModel(estimator=LogisticRegression(n_jobs=-1,
|
|
|
- random_state=33)),
|
|
|
- "params": {
|
|
|
- "estimator__C": hp.uniform("rfm_lr__estimator__C", 0.1, 1000)}},
|
|
|
-
|
|
|
- {"name": "pca",
|
|
|
- "object": PCA(random_state=33),
|
|
|
- "params": {
|
|
|
- "n_components": 3 + hp.randint("pca__n_components", 20)
|
|
|
- }}
|
|
|
- ]
|
|
|
-
|
|
|
-models = [
|
|
|
- {"name": "xgb",
|
|
|
- "object": XGBRFClassifier(n_jobs=-1, eval_metric="map", seed=33),
|
|
|
- "params": {
|
|
|
- "n_estimators": 50 + hp.randint('xgb__n_estimators', 100),
|
|
|
- "max_depth": 3 + hp.randint("xgb__max_depth", 10),
|
|
|
- "learning_rate": hp.loguniform("xgb__learning_rate", 0.01, 0.5)
|
|
|
- }},
|
|
|
-
|
|
|
- {"name": "rf",
|
|
|
- "object": RandomForestClassifier(n_jobs=-1, random_state=33),
|
|
|
- "params": {
|
|
|
- "n_estimators": 50 + hp.randint('rf__n_estimators', 500),
|
|
|
- "max_depth": 3 + hp.randint("rf__max_depth", 10),
|
|
|
- "min_samples_leaf": 1 + hp.randint("rf__min_samples_leaf", 10)
|
|
|
- }},
|
|
|
-
|
|
|
- {"name": "lr",
|
|
|
- "object": LogisticRegression(n_jobs=-1, random_state=33),
|
|
|
- "params": {
|
|
|
- "penalty": hp.choice("lr__penalty", ["l1", "l2"]),
|
|
|
- "C": hp.uniform("lr__C", 0.1, 1000)}},
|
|
|
-
|
|
|
- {"name": "svc",
|
|
|
- "object": SVC(random_state=33),
|
|
|
- "params": {
|
|
|
- "kernel": hp.choice("svc__kernel", ["linear", "poly", "rbf"]),
|
|
|
- "degree": 2 + hp.randint("svc__degree", 3),
|
|
|
- "C": hp.uniform("svc__C", 0.1, 1000)
|
|
|
- }}
|
|
|
- ]
|
|
|
-
|
|
|
-step_list = [encoders, transformers, selectors, models]
|
|
|
-
|
|
|
-space = space_composer(step_list)
|