tanja
/
cdplib


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
							#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jul  6 14:02:24 2020

@author: tanya
@description: space object to pass to HyperoptPipelineSelection class
"""
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel, SelectPercentile,\
    RFE, SelectFpr, f_classif, chi2, mutual_info_classif
from xgboost import XGBRFClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from hyperopt import hp

from cdplib.hyperparameter_space_composer.SpaceComposer import SpaceComposer

# TODO: add sample spaces for encoders and transformers

encoders = []

transformers = []

selectors = [
    {"name": "kbest",
     "object": SelectPercentile(),
     "params": {
       "percentile": 3 + hp.randint("kbest__percentile", 60),
       "score_func": hp.choice("kbest__score_func",
                               [f_classif, chi2, mutual_info_classif])}},

    {"name": "fpr",
     "object": SelectFpr(),
     "params": {
        "score_func": hp.choice("fpr__score_func",
                                [f_classif, chi2]),
        # mutual_info_classif does not work here
        "alpha": hp.uniform("fpr__alpha", 0.1, 0.6)}},

    {"name": "rfe_rf",
     "object":
         RFE(estimator=RandomForestClassifier(n_jobs=-1, random_state=33)),
     "params": {
         "n_features_to_select":
             3 + hp.randint("rfe_rf__n_features_to_select", 200),
         "estimator__n_estimators":
             20 + hp.randint("rfe_rf__estimator__n_estimators", 70)}},

    {"name": "rfm_rf",
     "object":
         SelectFromModel(estimator=RandomForestClassifier(n_jobs=-1,
                                                          random_state=33)),
     "params": {
         "estimator__n_estimators":
             20 + hp.randint("rfm_rf__estimator__n_estimators", 70)}},

    {"name": "rfm_lr",
     "object":
         SelectFromModel(estimator=LogisticRegression(n_jobs=-1,
                                                      random_state=33)),
     "params": {
          "estimator__C": hp.uniform("rfm_lr__estimator__C", 0.1, 1000)}},

    {"name": "std_scaler_pca",
     "object": Pipeline([
             ("scaler", StandardScaler()),
             ("pca", PCA(random_state=33))]),
     "params": {
        "pca__n_components": hp.uniform("pca__n_components", 0.1, 1),
       }}
    ]

models = [
        {"name": "xgb",
         "object": XGBRFClassifier(n_jobs=-1, eval_metric="map", seed=33),
         "params": {
           "n_estimators": 50 + hp.randint('xgb__n_estimators', 100),
           "max_depth": 3 + hp.randint("xgb__max_depth", 10),
           "learning_rate": hp.loguniform("xgb__learning_rate", 0.01, 0.5)
           }},

        {"name": "rf",
         "object": RandomForestClassifier(n_jobs=-1, random_state=33),
         "params": {
           "n_estimators": 50 + hp.randint('rf__n_estimators', 500),
           "max_depth": 3 + hp.randint("rf__max_depth", 10),
           "min_samples_leaf": 1 + hp.randint("rf__min_samples_leaf", 10)
           }},

        # the default solver does not accept l1 penalty
        {"name": "lr",
         "object": LogisticRegression(n_jobs=-1, random_state=33,
                                      solver='liblinear'),
         "params":  {
           "penalty": hp.choice("lr__penalty", ["l1", "l2"]),
           "C": hp.uniform("lr__C", 0.1, 1000)}},

        # svc does not support parallelizaiton, therefore is slow
        {"name": "svc",
         "object": SVC(random_state=33),
         "params": {
            "kernel": hp.choice("svc__kernel", ["linear", "poly", "rbf"]),
            "degree": 2 + hp.randint("svc__degree", 3),
            "C": hp.uniform("svc__C", 0.1, 1000)
            }}
        ]

step_list = [encoders, transformers, selectors, models]

space = SpaceComposer().compose_hyperopt_space(step_list)