#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Wed Dec 9 09:55:52 2020 @author: tanya """ from typing import Union, Iterable, Tuple, List import pandas as pd import numpy as np from itertools import accumulate, repeat, takewhile from cdplib.log import Log def make_expanding_cv(test_proportion: float, start_train_proportion: float, step_proportion: float = None, expanding_test_size: bool = False, data_set_size: Union[float, None] = None, index: Union[pd.Series, np.ndarray, list, None] = None)\ -> Union[Iterable[Tuple[List]], None]: """ """ logger = Log("make_expanding_cv:") try: assert((index is None) != (data_set_size is None)),\ "Set index or data_set_size" index = index if (index is not None)\ else pd.Series(range(data_set_size)) data_set_size = data_set_size or len(index) start_train_size = int(start_train_proportion * data_set_size) step_size = int(step_proportion * data_set_size) test_size = int(test_proportion * data_set_size) train_inds_set = (list(range(train_size)) for train_size in takewhile( lambda x: x <= data_set_size - test_size, accumulate(repeat(start_train_size), lambda x, _: x + step_size))) for train_inds in train_inds_set: if expanding_test_size: yield (index[train_inds], index[train_inds[-1] + 1: train_inds[-1] + 1 + int(test_proportion*len(train_inds))]) else: yield (index[train_inds], index[train_inds[-1] + 1: train_inds[-1] + 1 + test_size]) except Exception as e: logger.log_and_raise_error(("Failed to make expanding cv. " "Exit with error: {}".format(e))) if __name__ == "__main__": logger = Log("Test_expanding_cv: ") logger.info("Start Testing") logger.info("Testing expanding cv: ") cv = make_expanding_cv(data_set_size=50, test_proportion=0.1, start_train_proportion=0.6, step_proportion=0.1, expanding_test_size=True) cv = list(cv) logger.info("Testing expanding cv with datetime index") cv = make_expanding_cv( test_proportion=0.1, start_train_proportion=0.6, step_proportion=0.1, index=pd.date_range(start=pd.to_datetime("2020-01-01"), periods=50)) cv = list(cv) logger.info("Finish testing")