4 年之前 · 04ec30f765
--- a/Pipfile
+++ b/Pipfile
@@ -16,6 +16,9 @@ jsonref = "*"
 
				 simplejson = "*"
			
 
				 mysql = "*"
			
 
				 hyperopt = "*"
			
 
				+mypy = "*"
			
 
				+data-science-types = "*"
			
 
				+pytype = "*"
			
 
				 
			
 
				 [requires]
			
 
				 python_version = "3"
			
--- a/Pipfile.lock
+++ b/Pipfile.lock
@@ -1,7 +1,7 @@
 
				 {
			
 
				     "_meta": {
			
 
				         "hash": {
			
 
				-            "sha256": "5ae0ad9df8502aead1689e37517dd3bb8d75ac1c9554b865563d395fb9c1f60a"
			
 
				+            "sha256": "aaf6cb558761e9ff6ccf0035a08008b15fb12bceb916e49f27a47c406b4e0d2f"
			
 
				         },
			
 
				         "pipfile-spec": 6,
			
 
				         "requires": {
			
@@ -16,24 +16,41 @@
 
				         ]
			
 
				     },
			
 
				     "default": {
			
 
				+        "attrs": {
			
 
				+            "hashes": [
			
 
				+                "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6",
			
 
				+                "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"
			
 
				+            ],
			
 
				+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
			
 
				+            "version": "==20.3.0"
			
 
				+        },
			
 
				         "boltons": {
			
 
				             "hashes": [
			
 
				-                "sha256:6e890b173c5f2dcb4ec62320b3799342ecb1a6a0b2253014455387665d62c213",
			
 
				-                "sha256:b3fc2b711f50cd975e726324d98e0bd5a324dd7e3b81d5e6a1b03c542d0c66c4"
			
 
				+                "sha256:3dd8a8e3c1886e7f7ba3422b50f55a66e1700161bf01b919d098e7d96dd2d9b6",
			
 
				+                "sha256:dd362291a460cc1e0c2e91cc6a60da3036ced77099b623112e8f833e6734bdc5"
			
 
				             ],
			
 
				-            "version": "==20.1.0"
			
 
				+            "version": "==20.2.1"
			
 
				         },
			
 
				         "cdplib": {
			
 
				             "editable": true,
			
 
				             "git": "https://readonly:readonly@intra.acdp.at/gogs/tanja/cdplib.git",
			
 
				-            "ref": "36c286e8f5ff2d441504e2286b2c3408d9756c75"
			
 
				+            "ref": "2eacfa61358654a7e3e9150ae13aed8de9de1dc3"
			
 
				         },
			
 
				         "cloudpickle": {
			
 
				             "hashes": [
			
 
				-                "sha256:38af54d0e7705d87a287bdefe1df00f936aadb1f629dca383e825cca927fa753",
			
 
				-                "sha256:8664761f810efc07dbb301459e413c99b68fcc6d8703912bd39d86618ac631e3"
			
 
				+                "sha256:3a32d0eb0bc6f4d0c57fbc4f3e3780f7a81e6fee0fa935072884d58ae8e1cc7c",
			
 
				+                "sha256:9bc994f9e9447593bd0a45371f0e7ac7333710fcf64a4eb9834bf149f4ef2f32"
			
 
				+            ],
			
 
				+            "markers": "python_version >= '3.5'",
			
 
				+            "version": "==1.6.0"
			
 
				+        },
			
 
				+        "data-science-types": {
			
 
				+            "hashes": [
			
 
				+                "sha256:20ddbaaac3f3299e2091a64e74f78e64f4899f4ab5644bfd97e4694bd7b62ef4",
			
 
				+                "sha256:86218af525896f84f3a39eef254449d795644311a64df78fba5eaf76aa610d6d"
			
 
				             ],
			
 
				-            "version": "==1.3.0"
			
 
				+            "index": "pypi",
			
 
				+            "version": "==0.2.19"
			
 
				         },
			
 
				         "decorator": {
			
 
				             "hashes": [
			
@@ -51,19 +68,26 @@
 
				         },
			
 
				         "hyperopt": {
			
 
				             "hashes": [
			
 
				-                "sha256:52f4534e101f139b074ae626e0b7dc8410854b9410475d3e7f10c429393bb1a2",
			
 
				-                "sha256:8caf0094fe824502932d949ee57bd3c92fe512dbbd93b7b7a78cd0761fa1a78f",
			
 
				-                "sha256:df450eadfc9541086921bf863a5842e7009faef472b08630fd2cab13cdcfe0e6"
			
 
				+                "sha256:bc6047d50f956ae64eebcb34b1fd40f186a93e214957f20e87af2f10195295cc",
			
 
				+                "sha256:dc5c7cceaf33c125b727cf92709e70035d94dd507831dae66406ac762a18a253"
			
 
				             ],
			
 
				             "index": "pypi",
			
 
				-            "version": "==0.2.3"
			
 
				+            "version": "==0.2.5"
			
 
				+        },
			
 
				+        "importlab": {
			
 
				+            "hashes": [
			
 
				+                "sha256:d855350d19dc10a17aabd2fe6f4b428ff1a936071f692fbf686a73694d26a51c"
			
 
				+            ],
			
 
				+            "markers": "python_full_version >= '2.7.0'",
			
 
				+            "version": "==0.5.1"
			
 
				         },
			
 
				         "joblib": {
			
 
				             "hashes": [
			
 
				-                "sha256:0630eea4f5664c463f23fbf5dcfc54a2bc6168902719fa8e19daf033022786c8",
			
 
				-                "sha256:bdb4fd9b72915ffb49fde2229ce482dd7ae79d842ed8c2b4c932441495af1403"
			
 
				+                "sha256:698c311779f347cf6b7e6b8a39bb682277b8ee4aba8cf9507bc0cf4cd4737b72",
			
 
				+                "sha256:9e284edd6be6b71883a63c9b7f124738a3c16195513ad940eae7e3438de885d5"
			
 
				             ],
			
 
				-            "version": "==0.14.1"
			
 
				+            "markers": "python_version >= '3.6'",
			
 
				+            "version": "==0.17.0"
			
 
				         },
			
 
				         "jsonref": {
			
 
				             "hashes": [
			
@@ -73,6 +97,33 @@
 
				             "index": "pypi",
			
 
				             "version": "==0.2"
			
 
				         },
			
 
				+        "mypy": {
			
 
				+            "hashes": [
			
 
				+                "sha256:0a0d102247c16ce93c97066443d11e2d36e6cc2a32d8ccc1f705268970479324",
			
 
				+                "sha256:0d34d6b122597d48a36d6c59e35341f410d4abfa771d96d04ae2c468dd201abc",
			
 
				+                "sha256:2170492030f6faa537647d29945786d297e4862765f0b4ac5930ff62e300d802",
			
 
				+                "sha256:2842d4fbd1b12ab422346376aad03ff5d0805b706102e475e962370f874a5122",
			
 
				+                "sha256:2b21ba45ad9ef2e2eb88ce4aeadd0112d0f5026418324176fd494a6824b74975",
			
 
				+                "sha256:72060bf64f290fb629bd4a67c707a66fd88ca26e413a91384b18db3876e57ed7",
			
 
				+                "sha256:af4e9ff1834e565f1baa74ccf7ae2564ae38c8df2a85b057af1dbbc958eb6666",
			
 
				+                "sha256:bd03b3cf666bff8d710d633d1c56ab7facbdc204d567715cb3b9f85c6e94f669",
			
 
				+                "sha256:c614194e01c85bb2e551c421397e49afb2872c88b5830e3554f0519f9fb1c178",
			
 
				+                "sha256:cf4e7bf7f1214826cf7333627cb2547c0db7e3078723227820d0a2490f117a01",
			
 
				+                "sha256:da56dedcd7cd502ccd3c5dddc656cb36113dd793ad466e894574125945653cea",
			
 
				+                "sha256:e86bdace26c5fe9cf8cb735e7cedfe7850ad92b327ac5d797c656717d2ca66de",
			
 
				+                "sha256:e97e9c13d67fbe524be17e4d8025d51a7dca38f90de2e462243ab8ed8a9178d1",
			
 
				+                "sha256:eea260feb1830a627fb526d22fbb426b750d9f5a47b624e8d5e7e004359b219c"
			
 
				+            ],
			
 
				+            "index": "pypi",
			
 
				+            "version": "==0.790"
			
 
				+        },
			
 
				+        "mypy-extensions": {
			
 
				+            "hashes": [
			
 
				+                "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
			
 
				+                "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
			
 
				+            ],
			
 
				+            "version": "==0.4.3"
			
 
				+        },
			
 
				         "mysql": {
			
 
				             "hashes": [
			
 
				                 "sha256:55e66b5e7b3823b1da5fb2a063e95a628fb850b2a0b76bdcd884faac5d2daa7d"
			
@@ -82,137 +133,175 @@
 
				         },
			
 
				         "mysqlclient": {
			
 
				             "hashes": [
			
 
				-                "sha256:4c82187dd6ab3607150fbb1fa5ef4643118f3da122b8ba31c3149ddd9cf0cb39",
			
 
				-                "sha256:9e6080a7aee4cc6a06b58b59239f20f1d259c1d2fddf68ddeed242d2311c7087",
			
 
				-                "sha256:f3fdaa9a38752a3b214a6fe79d7cae3653731a53e577821f9187e67cbecb2e16",
			
 
				-                "sha256:f646f8d17d02be0872291f258cce3813497bc7888cd4712a577fd1e719b2f213"
			
 
				+                "sha256:3f39855a4ad22805361e782cc4d1010ac74796225fa2d1c03cc16673ccdc983a",
			
 
				+                "sha256:a6b5648f648b16335e3b1aaec93dc3fcc81a9a661180e306936437cc522c810b",
			
 
				+                "sha256:edd42ccaa444b00702d5374b2f5f7585c9d0ce201917f15339f1c3cf91c1b1ed",
			
 
				+                "sha256:fb2f75aea14722390d2d8ddf384ad99da708c707a96656210a7be8af20a2c5e5"
			
 
				             ],
			
 
				-            "version": "==1.4.6"
			
 
				+            "markers": "python_version >= '3.5'",
			
 
				+            "version": "==2.0.1"
			
 
				         },
			
 
				         "networkx": {
			
 
				             "hashes": [
			
 
				-                "sha256:45e56f7ab6fe81652fb4bc9f44faddb0e9025f469f602df14e3b2551c2ea5c8b"
			
 
				+                "sha256:7978955423fbc9639c10498878be59caf99b44dc304c2286162fd24b458c1602",
			
 
				+                "sha256:8c5812e9f798d37c50570d15c4a69d5710a18d77bafc903ee9c5fba7454c616c"
			
 
				+            ],
			
 
				+            "markers": "python_version >= '3.6'",
			
 
				+            "version": "==2.5"
			
 
				+        },
			
 
				+        "ninja": {
			
 
				+            "hashes": [
			
 
				+                "sha256:06a72090f5c5516e57f12699644179504a77585bed6d5f8be9e67219a398ec80",
			
 
				+                "sha256:16fc1bea52a36a91a0e80c3b221d2c1bc9bcf04d0564da9344e349b8c5efd5c6",
			
 
				+                "sha256:1d9ed3b5fdeb646516f54bec92453dcb3000d6771c2fea56451444c988a23e29",
			
 
				+                "sha256:24acc95359308d11243386cf9f076bdc95f438ef6a4e0e357e7c122c5e02816d",
			
 
				+                "sha256:4252ce532304841e47478bb61710fcf9940cf2c91731303490762b6e4f23fd2b",
			
 
				+                "sha256:5c3a8cb54aaaf5d4f692d65121ef47b3e43dea123a6563153d9d97631c0adf4f",
			
 
				+                "sha256:621fd73513a9bef0cb82e8c531a29ef96580b4d6e797f833cce167054ad812f8",
			
 
				+                "sha256:99c6102ae9a8981afe4d06f92508dbeab1e28ec89783fb703411166f4e13c9ee",
			
 
				+                "sha256:a1a9d9455623a3f45557fff6eb5abb3e70910dde28cfb9239e3ca14249149f55",
			
 
				+                "sha256:c6059bd04ad235e2326b39bc71bb7989de8d565084b5f269557704747b2910fa",
			
 
				+                "sha256:fb1ae96811a9b73773014b8a21d710b89d7d5f765427a5e2541e7fb9d530fdd5"
			
 
				             ],
			
 
				-            "version": "==2.2"
			
 
				+            "version": "==1.10.0.post2"
			
 
				         },
			
 
				         "numpy": {
			
 
				             "hashes": [
			
 
				-                "sha256:1598a6de323508cfeed6b7cd6c4efb43324f4692e20d1f76e1feec7f59013448",
			
 
				-                "sha256:1b0ece94018ae21163d1f651b527156e1f03943b986188dd81bc7e066eae9d1c",
			
 
				-                "sha256:2e40be731ad618cb4974d5ba60d373cdf4f1b8dcbf1dcf4d9dff5e212baf69c5",
			
 
				-                "sha256:4ba59db1fcc27ea31368af524dcf874d9277f21fd2e1f7f1e2e0c75ee61419ed",
			
 
				-                "sha256:59ca9c6592da581a03d42cc4e270732552243dc45e87248aa8d636d53812f6a5",
			
 
				-                "sha256:5e0feb76849ca3e83dd396254e47c7dba65b3fa9ed3df67c2556293ae3e16de3",
			
 
				-                "sha256:6d205249a0293e62bbb3898c4c2e1ff8a22f98375a34775a259a0523111a8f6c",
			
 
				-                "sha256:6fcc5a3990e269f86d388f165a089259893851437b904f422d301cdce4ff25c8",
			
 
				-                "sha256:82847f2765835c8e5308f136bc34018d09b49037ec23ecc42b246424c767056b",
			
 
				-                "sha256:87902e5c03355335fc5992a74ba0247a70d937f326d852fc613b7f53516c0963",
			
 
				-                "sha256:9ab21d1cb156a620d3999dd92f7d1c86824c622873841d6b080ca5495fa10fef",
			
 
				-                "sha256:a1baa1dc8ecd88fb2d2a651671a84b9938461e8a8eed13e2f0a812a94084d1fa",
			
 
				-                "sha256:a244f7af80dacf21054386539699ce29bcc64796ed9850c99a34b41305630286",
			
 
				-                "sha256:a35af656a7ba1d3decdd4fae5322b87277de8ac98b7d9da657d9e212ece76a61",
			
 
				-                "sha256:b1fe1a6f3a6f355f6c29789b5927f8bd4f134a4bd9a781099a7c4f66af8850f5",
			
 
				-                "sha256:b5ad0adb51b2dee7d0ee75a69e9871e2ddfb061c73ea8bc439376298141f77f5",
			
 
				-                "sha256:ba3c7a2814ec8a176bb71f91478293d633c08582119e713a0c5351c0f77698da",
			
 
				-                "sha256:cd77d58fb2acf57c1d1ee2835567cd70e6f1835e32090538f17f8a3a99e5e34b",
			
 
				-                "sha256:cdb3a70285e8220875e4d2bc394e49b4988bdb1298ffa4e0bd81b2f613be397c",
			
 
				-                "sha256:deb529c40c3f1e38d53d5ae6cd077c21f1d49e13afc7936f7f868455e16b64a0",
			
 
				-                "sha256:e7894793e6e8540dbeac77c87b489e331947813511108ae097f1715c018b8f3d"
			
 
				+                "sha256:08308c38e44cc926bdfce99498b21eec1f848d24c302519e64203a8da99a97db",
			
 
				+                "sha256:09c12096d843b90eafd01ea1b3307e78ddd47a55855ad402b157b6c4862197ce",
			
 
				+                "sha256:13d166f77d6dc02c0a73c1101dd87fdf01339febec1030bd810dcd53fff3b0f1",
			
 
				+                "sha256:141ec3a3300ab89c7f2b0775289954d193cc8edb621ea05f99db9cb181530512",
			
 
				+                "sha256:16c1b388cc31a9baa06d91a19366fb99ddbe1c7b205293ed072211ee5bac1ed2",
			
 
				+                "sha256:18bed2bcb39e3f758296584337966e68d2d5ba6aab7e038688ad53c8f889f757",
			
 
				+                "sha256:1aeef46a13e51931c0b1cf8ae1168b4a55ecd282e6688fdb0a948cc5a1d5afb9",
			
 
				+                "sha256:27d3f3b9e3406579a8af3a9f262f5339005dd25e0ecf3cf1559ff8a49ed5cbf2",
			
 
				+                "sha256:2a2740aa9733d2e5b2dfb33639d98a64c3b0f24765fed86b0fd2aec07f6a0a08",
			
 
				+                "sha256:4377e10b874e653fe96985c05feed2225c912e328c8a26541f7fc600fb9c637b",
			
 
				+                "sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb",
			
 
				+                "sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc",
			
 
				+                "sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac",
			
 
				+                "sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83",
			
 
				+                "sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36",
			
 
				+                "sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387",
			
 
				+                "sha256:8cac8790a6b1ddf88640a9267ee67b1aee7a57dfa2d2dd33999d080bc8ee3a0f",
			
 
				+                "sha256:8ece138c3a16db8c1ad38f52eb32be6086cc72f403150a79336eb2045723a1ad",
			
 
				+                "sha256:9eeb7d1d04b117ac0d38719915ae169aa6b61fca227b0b7d198d43728f0c879c",
			
 
				+                "sha256:a09f98011236a419ee3f49cedc9ef27d7a1651df07810ae430a6b06576e0b414",
			
 
				+                "sha256:a5d897c14513590a85774180be713f692df6fa8ecf6483e561a6d47309566f37",
			
 
				+                "sha256:ad6f2ff5b1989a4899bf89800a671d71b1612e5ff40866d1f4d8bcf48d4e5764",
			
 
				+                "sha256:c42c4b73121caf0ed6cd795512c9c09c52a7287b04d105d112068c1736d7c753",
			
 
				+                "sha256:cb1017eec5257e9ac6209ac172058c430e834d5d2bc21961dceeb79d111e5909",
			
 
				+                "sha256:d6c7bb82883680e168b55b49c70af29b84b84abb161cbac2800e8fcb6f2109b6",
			
 
				+                "sha256:e452dc66e08a4ce642a961f134814258a082832c78c90351b75c41ad16f79f63",
			
 
				+                "sha256:e5b6ed0f0b42317050c88022349d994fe72bfe35f5908617512cd8c8ef9da2a9",
			
 
				+                "sha256:e9b30d4bd69498fc0c3fe9db5f62fffbb06b8eb9321f92cc970f2969be5e3949",
			
 
				+                "sha256:ec149b90019852266fec2341ce1db513b843e496d5a8e8cdb5ced1923a92faab",
			
 
				+                "sha256:edb01671b3caae1ca00881686003d16c2209e07b7ef8b7639f1867852b948f7c",
			
 
				+                "sha256:f0d3929fe88ee1c155129ecd82f981b8856c5d97bcb0d5f23e9b4242e79d1de3",
			
 
				+                "sha256:f29454410db6ef8126c83bd3c968d143304633d45dc57b51252afbd79d700893",
			
 
				+                "sha256:fe45becb4c2f72a0907c1d0246ea6449fe7a9e2293bb0e11c4e9a32bb0930a15",
			
 
				+                "sha256:fedbd128668ead37f33917820b704784aff695e0019309ad446a6d0b065b57e4"
			
 
				             ],
			
 
				-            "markers": "python_version >= '3.5'",
			
 
				-            "version": "==1.18.2"
			
 
				+            "markers": "python_version >= '3.6'",
			
 
				+            "version": "==1.19.4"
			
 
				         },
			
 
				         "pandas": {
			
 
				             "hashes": [
			
 
				-                "sha256:07c1b58936b80eafdfe694ce964ac21567b80a48d972879a359b3ebb2ea76835",
			
 
				-                "sha256:0ebe327fb088df4d06145227a4aa0998e4f80a9e6aed4b61c1f303bdfdf7c722",
			
 
				-                "sha256:11c7cb654cd3a0e9c54d81761b5920cdc86b373510d829461d8f2ed6d5905266",
			
 
				-                "sha256:12f492dd840e9db1688126216706aa2d1fcd3f4df68a195f9479272d50054645",
			
 
				-                "sha256:167a1315367cea6ec6a5e11e791d9604f8e03f95b57ad227409de35cf850c9c5",
			
 
				-                "sha256:1a7c56f1df8d5ad8571fa251b864231f26b47b59cbe41aa5c0983d17dbb7a8e4",
			
 
				-                "sha256:1fa4bae1a6784aa550a1c9e168422798104a85bf9c77a1063ea77ee6f8452e3a",
			
 
				-                "sha256:32f42e322fb903d0e189a4c10b75ba70d90958cc4f66a1781ed027f1a1d14586",
			
 
				-                "sha256:387dc7b3c0424327fe3218f81e05fc27832772a5dffbed385013161be58df90b",
			
 
				-                "sha256:6597df07ea361231e60c00692d8a8099b519ed741c04e65821e632bc9ccb924c",
			
 
				-                "sha256:743bba36e99d4440403beb45a6f4f3a667c090c00394c176092b0b910666189b",
			
 
				-                "sha256:858a0d890d957ae62338624e4aeaf1de436dba2c2c0772570a686eaca8b4fc85",
			
 
				-                "sha256:863c3e4b7ae550749a0bb77fa22e601a36df9d2905afef34a6965bed092ba9e5",
			
 
				-                "sha256:a210c91a02ec5ff05617a298ad6f137b9f6f5771bf31f2d6b6367d7f71486639",
			
 
				-                "sha256:ca84a44cf727f211752e91eab2d1c6c1ab0f0540d5636a8382a3af428542826e",
			
 
				-                "sha256:d234bcf669e8b4d6cbcd99e3ce7a8918414520aeb113e2a81aeb02d0a533d7f7"
			
 
				+                "sha256:09e0503758ad61afe81c9069505f8cb8c1e36ea8cc1e6826a95823ef5b327daf",
			
 
				+                "sha256:0a11a6290ef3667575cbd4785a1b62d658c25a2fd70a5adedba32e156a8f1773",
			
 
				+                "sha256:0d9a38a59242a2f6298fff45d09768b78b6eb0c52af5919ea9e45965d7ba56d9",
			
 
				+                "sha256:112c5ba0f9ea0f60b2cc38c25f87ca1d5ca10f71efbee8e0f1bee9cf584ed5d5",
			
 
				+                "sha256:185cf8c8f38b169dbf7001e1a88c511f653fbb9dfa3e048f5e19c38049e991dc",
			
 
				+                "sha256:3aa8e10768c730cc1b610aca688f588831fa70b65a26cb549fbb9f35049a05e0",
			
 
				+                "sha256:41746d520f2b50409dffdba29a15c42caa7babae15616bcf80800d8cfcae3d3e",
			
 
				+                "sha256:43cea38cbcadb900829858884f49745eb1f42f92609d368cabcc674b03e90efc",
			
 
				+                "sha256:5378f58172bd63d8c16dd5d008d7dcdd55bf803fcdbe7da2dcb65dbbf322f05b",
			
 
				+                "sha256:54404abb1cd3f89d01f1fb5350607815326790efb4789be60508f458cdd5ccbf",
			
 
				+                "sha256:5dac3aeaac5feb1016e94bde851eb2012d1733a222b8afa788202b836c97dad5",
			
 
				+                "sha256:5fdb2a61e477ce58d3f1fdf2470ee142d9f0dde4969032edaf0b8f1a9dafeaa2",
			
 
				+                "sha256:6613c7815ee0b20222178ad32ec144061cb07e6a746970c9160af1ebe3ad43b4",
			
 
				+                "sha256:6d2b5b58e7df46b2c010ec78d7fb9ab20abf1d306d0614d3432e7478993fbdb0",
			
 
				+                "sha256:8a5d7e57b9df2c0a9a202840b2881bb1f7a648eba12dd2d919ac07a33a36a97f",
			
 
				+                "sha256:8b4c2055ebd6e497e5ecc06efa5b8aa76f59d15233356eb10dad22a03b757805",
			
 
				+                "sha256:a15653480e5b92ee376f8458197a58cca89a6e95d12cccb4c2d933df5cecc63f",
			
 
				+                "sha256:a7d2547b601ecc9a53fd41561de49a43d2231728ad65c7713d6b616cd02ddbed",
			
 
				+                "sha256:a979d0404b135c63954dea79e6246c45dd45371a88631cdbb4877d844e6de3b6",
			
 
				+                "sha256:b1f8111635700de7ac350b639e7e452b06fc541a328cf6193cf8fc638804bab8",
			
 
				+                "sha256:c5a3597880a7a29a31ebd39b73b2c824316ae63a05c3c8a5ce2aea3fc68afe35",
			
 
				+                "sha256:c681e8fcc47a767bf868341d8f0d76923733cbdcabd6ec3a3560695c69f14a1e",
			
 
				+                "sha256:cf135a08f306ebbcfea6da8bf775217613917be23e5074c69215b91e180caab4",
			
 
				+                "sha256:e2b8557fe6d0a18db4d61c028c6af61bfed44ef90e419ed6fadbdc079eba141e"
			
 
				             ],
			
 
				             "index": "pypi",
			
 
				-            "version": "==1.0.3"
			
 
				+            "version": "==1.1.4"
			
 
				         },
			
 
				         "pymongo": {
			
 
				             "hashes": [
			
 
				-                "sha256:01b4e10027aef5bb9ecefbc26f5df3368ce34aef81df43850f701e716e3fe16d",
			
 
				-                "sha256:0fc5aa1b1acf7f61af46fe0414e6a4d0c234b339db4c03a63da48599acf1cbfc",
			
 
				-                "sha256:1396eb7151e0558b1f817e4b9d7697d5599e5c40d839a9f7270bd90af994ad82",
			
 
				-                "sha256:18e84a3ec5e73adcb4187b8e5541b2ad61d716026ed9863267e650300d8bea33",
			
 
				-                "sha256:19adf2848b80cb349b9891cc854581bbf24c338be9a3260e73159bdeb2264464",
			
 
				-                "sha256:20ee0475aa2ba437b0a14806f125d696f90a8433d820fb558fdd6f052acde103",
			
 
				-                "sha256:26798795097bdeb571f13942beef7e0b60125397811c75b7aa9214d89880dd1d",
			
 
				-                "sha256:26e707a4eb851ec27bb969b5f1413b9b2eac28fe34271fa72329100317ea7c73",
			
 
				-                "sha256:2a3c7ad01553b27ec553688a1e6445e7f40355fb37d925c11fcb50b504e367f8",
			
 
				-                "sha256:2f07b27dbf303ea53f4147a7922ce91a26b34a0011131471d8aaf73151fdee9a",
			
 
				-                "sha256:316f0cf543013d0c085e15a2c8abe0db70f93c9722c0f99b6f3318ff69477d70",
			
 
				-                "sha256:31d11a600eea0c60de22c8bdcb58cda63c762891facdcb74248c36713240987f",
			
 
				-                "sha256:334ef3ffd0df87ea83a0054454336159f8ad9c1b389e19c0032d9cb8410660e6",
			
 
				-                "sha256:358ba4693c01022d507b96a980ded855a32dbdccc3c9331d0667be5e967f30ed",
			
 
				-                "sha256:3a6568bc53103df260f5c7d2da36dffc5202b9a36c85540bba1836a774943794",
			
 
				-                "sha256:444bf2f44264578c4085bb04493bfed0e5c1b4fe7c2704504d769f955cc78fe4",
			
 
				-                "sha256:47a00b22c52ee59dffc2aad02d0bbfb20c26ec5b8de8900492bf13ad6901cf35",
			
 
				-                "sha256:4c067db43b331fc709080d441cb2e157114fec60749667d12186cc3fc8e7a951",
			
 
				-                "sha256:4c092310f804a5d45a1bcaa4191d6d016c457b6ed3982a622c35f729ff1c7f6b",
			
 
				-                "sha256:53b711b33134e292ef8499835a3df10909c58df53a2a0308f598c432e9a62892",
			
 
				-                "sha256:568d6bee70652d8a5af1cd3eec48b4ca1696fb1773b80719ebbd2925b72cb8f6",
			
 
				-                "sha256:56fa55032782b7f8e0bf6956420d11e2d4e9860598dfe9c504edec53af0fc372",
			
 
				-                "sha256:5a2c492680c61b440272341294172fa3b3751797b1ab983533a770e4fb0a67ac",
			
 
				-                "sha256:61235cc39b5b2f593086d1d38f3fc130b2d125bd8fc8621d35bc5b6bdeb92bd2",
			
 
				-                "sha256:619ac9aaf681434b4d4718d1b31aa2f0fce64f2b3f8435688fcbdc0c818b6c54",
			
 
				-                "sha256:6238ac1f483494011abde5286282afdfacd8926659e222ba9b74c67008d3a58c",
			
 
				-                "sha256:63752a72ca4d4e1386278bd43d14232f51718b409e7ac86bcf8810826b531113",
			
 
				-                "sha256:6fdc5ccb43864065d40dd838437952e9e3da9821b7eac605ba46ada77f846bdf",
			
 
				-                "sha256:7abc3a6825a346fa4621a6f63e3b662bbb9e0f6ffc32d30a459d695f20fb1a8b",
			
 
				-                "sha256:7aef381bb9ae8a3821abd7f9d4d93978dbd99072b48522e181baeffcd95b56ae",
			
 
				-                "sha256:80df3caf251fe61a3f0c9614adc6e2bfcffd1cd3345280896766712fb4b4d6d7",
			
 
				-                "sha256:95f970f34b59987dee6f360d2e7d30e181d58957b85dff929eee4423739bd151",
			
 
				-                "sha256:993257f6ca3cde55332af1f62af3e04ca89ce63c08b56a387cdd46136c72f2fa",
			
 
				-                "sha256:9c0a57390549affc2b5dda24a38de03a5c7cbc58750cd161ff5d106c3c6eec80",
			
 
				-                "sha256:a0794e987d55d2f719cc95fcf980fc62d12b80e287e6a761c4be14c60bd9fecc",
			
 
				-                "sha256:a3b98121e68bf370dd8ea09df67e916f93ea95b52fc010902312168c4d1aff5d",
			
 
				-                "sha256:a60756d55f0887023b3899e6c2923ba5f0042fb11b1d17810b4e07395404f33e",
			
 
				-                "sha256:a676bd2fbc2309092b9bbb0083d35718b5420af3a42135ebb1e4c3633f56604d",
			
 
				-                "sha256:a732838c78554c1257ff2492f5c8c4c7312d0aecd7f732149e255f3749edd5ee",
			
 
				-                "sha256:ad3dc88dfe61f0f1f9b99c6bc833ea2f45203a937a18f0d2faa57c6952656012",
			
 
				-                "sha256:ae65d65fde4135ef423a2608587c9ef585a3551fc2e4e431e7c7e527047581be",
			
 
				-                "sha256:b070a4f064a9edb70f921bfdc270725cff7a78c22036dd37a767c51393fb956f",
			
 
				-                "sha256:b6da85949aa91e9f8c521681344bd2e163de894a5492337fba8b05c409225a4f",
			
 
				-                "sha256:bbf47110765b2a999803a7de457567389253f8670f7daafb98e059c899ce9764",
			
 
				-                "sha256:bd9c1e6f92b4888ae3ef7ae23262c513b962f09f3fb3b48581dde5df7d7a860a",
			
 
				-                "sha256:c06b3f998d2d7160db58db69adfb807d2ec307e883e2f17f6b87a1ef6c723f11",
			
 
				-                "sha256:c318fb70542be16d3d4063cde6010b1e4d328993a793529c15a619251f517c39",
			
 
				-                "sha256:c4aef42e5fa4c9d5a99f751fb79caa880dac7eaf8a65121549318b984676a1b7",
			
 
				-                "sha256:c9ca545e93a9c2a3bdaa2e6e21f7a43267ff0813e8055adf2b591c13164c0c57",
			
 
				-                "sha256:da2c3220eb55c4239dd8b982e213da0b79023cac59fe54ca09365f2bc7e4ad32",
			
 
				-                "sha256:dd8055da300535eefd446b30995c0813cc4394873c9509323762a93e97c04c03",
			
 
				-                "sha256:e2b46e092ea54b732d98c476720386ff2ccd126de1e52076b470b117bff7e409",
			
 
				-                "sha256:e334c4f39a2863a239d38b5829e442a87f241a92da9941861ee6ec5d6380b7fe",
			
 
				-                "sha256:e5c54f04ca42bbb5153aec5d4f2e3d9f81e316945220ac318abd4083308143f5",
			
 
				-                "sha256:f4d06764a06b137e48db6d569dc95614d9d225c89842c885669ee8abc9f28c7a",
			
 
				-                "sha256:f96333f9d2517c752c20a35ff95de5fc2763ac8cdb1653df0f6f45d281620606"
			
 
				+                "sha256:03dc64a9aa7a5d405aea5c56db95835f6a2fa31b3502c5af1760e0e99210be30",
			
 
				+                "sha256:05fcc6f9c60e6efe5219fbb5a30258adb3d3e5cbd317068f3d73c09727f2abb6",
			
 
				+                "sha256:076a7f2f7c251635cf6116ac8e45eefac77758ee5a77ab7bd2f63999e957613b",
			
 
				+                "sha256:137e6fa718c7eff270dbd2fc4b90d94b1a69c9e9eb3f3de9e850a7fd33c822dc",
			
 
				+                "sha256:1f865b1d1c191d785106f54df9abdc7d2f45a946b45fd1ea0a641b4f982a2a77",
			
 
				+                "sha256:213c445fe7e654621c6309e874627c35354b46ef3ee807f5a1927dc4b30e1a67",
			
 
				+                "sha256:25e617daf47d8dfd4e152c880cd0741cbdb48e51f54b8de9ddbfe74ecd87dd16",
			
 
				+                "sha256:3d9bb1ba935a90ec4809a8031efd988bdb13cdba05d9e9a3e9bf151bf759ecde",
			
 
				+                "sha256:40696a9a53faa7d85aaa6fd7bef1cae08f7882640bad08c350fb59dee7ad069b",
			
 
				+                "sha256:421aa1b92c291c429668bd8d8d8ec2bd00f183483a756928e3afbf2b6f941f00",
			
 
				+                "sha256:4437300eb3a5e9cc1a73b07d22c77302f872f339caca97e9bf8cf45eca8fa0d2",
			
 
				+                "sha256:455f4deb00158d5ec8b1d3092df6abb681b225774ab8a59b3510293b4c8530e3",
			
 
				+                "sha256:475a34a0745c456ceffaec4ce86b7e0983478f1b6140890dff7b161e7bcd895b",
			
 
				+                "sha256:4797c0080f41eba90404335e5ded3aa66731d303293a675ff097ce4ea3025bb9",
			
 
				+                "sha256:4ae23fbbe9eadf61279a26eba866bbf161a6f7e2ffad14a42cf20e9cb8e94166",
			
 
				+                "sha256:4b32744901ee9990aa8cd488ec85634f443526def1e5190a407dc107148249d7",
			
 
				+                "sha256:50127b13b38e8e586d5e97d342689405edbd74ad0bd891d97ee126a8c7b6e45f",
			
 
				+                "sha256:50531caa7b4be1c4ed5e2d5793a4e51cc9bd62a919a6fd3299ef7c902e206eab",
			
 
				+                "sha256:63a5387e496a98170ffe638b435c0832c0f2011a6f4ff7a2880f17669fff8c03",
			
 
				+                "sha256:68220b81850de8e966d4667d5c325a96c6ac0d6adb3d18935d6e3d325d441f48",
			
 
				+                "sha256:689142dc0c150e9cb7c012d84cac2c346d40beb891323afb6caf18ec4caafae0",
			
 
				+                "sha256:6a15e2bee5c4188369a87ed6f02de804651152634a46cca91966a11c8abd2550",
			
 
				+                "sha256:7122ffe597b531fb065d3314e704a6fe152b81820ca5f38543e70ffcc95ecfd4",
			
 
				+                "sha256:7307024b18266b302f4265da84bb1effb5d18999ef35b30d17592959568d5c0a",
			
 
				+                "sha256:7a4a6f5b818988a3917ec4baa91d1143242bdfece8d38305020463955961266a",
			
 
				+                "sha256:83c5a3ecd96a9f3f11cfe6dfcbcec7323265340eb24cc996acaecea129865a3a",
			
 
				+                "sha256:890b0f1e18dbd898aeb0ab9eae1ab159c6bcbe87f0abb065b0044581d8614062",
			
 
				+                "sha256:8deda1f7b4c03242f2a8037706d9584e703f3d8c74d6d9cac5833db36fe16c42",
			
 
				+                "sha256:8ea13d0348b4c96b437d944d7068d59ed4a6c98aaa6c40d8537a2981313f1c66",
			
 
				+                "sha256:91e96bf85b7c07c827d339a386e8a3cf2e90ef098c42595227f729922d0851df",
			
 
				+                "sha256:96782ebb3c9e91e174c333208b272ea144ed2a684413afb1038e3b3342230d72",
			
 
				+                "sha256:9755c726aa6788f076114dfdc03b92b03ff8860316cca00902cce88bcdb5fedd",
			
 
				+                "sha256:9dbab90c348c512e03f146e93a5e2610acec76df391043ecd46b6b775d5397e6",
			
 
				+                "sha256:9ee0eef254e340cc11c379f797af3977992a7f2c176f1a658740c94bf677e13c",
			
 
				+                "sha256:9fc17fdac8f1973850d42e51e8ba6149d93b1993ed6768a24f352f926dd3d587",
			
 
				+                "sha256:a2787319dc69854acdfd6452e6a8ba8f929aeb20843c7f090e04159fc18e6245",
			
 
				+                "sha256:b7c522292407fa04d8195032493aac937e253ad9ae524aab43b9d9d242571f03",
			
 
				+                "sha256:bd312794f51e37dcf77f013d40650fe4fbb211dd55ef2863839c37480bd44369",
			
 
				+                "sha256:c0d660a186e36c526366edf8a64391874fe53cf8b7039224137aee0163c046df",
			
 
				+                "sha256:c4869141e20769b65d2d72686e7a7eb141ce9f3168106bed3e7dcced54eb2422",
			
 
				+                "sha256:cc4057f692ac35bbe82a0a908d42ce3a281c9e913290fac37d7fa3bd01307dfb",
			
 
				+                "sha256:cccf1e7806f12300e3a3b48f219e111000c2538483e85c869c35c1ae591e6ce9",
			
 
				+                "sha256:ce208f80f398522e49d9db789065c8ad2cd37b21bd6b23d30053474b7416af11",
			
 
				+                "sha256:d0565481dc196986c484a7fb13214fc6402201f7fb55c65fd215b3324962fe6c",
			
 
				+                "sha256:d1b3366329c45a474b3bbc9b9c95d4c686e03f35da7fd12bc144626d1f2a7c04",
			
 
				+                "sha256:d226e0d4b9192d95079a9a29c04dd81816b1ce8903b8c174a39224fe978547cb",
			
 
				+                "sha256:d38b35f6eef4237b1d0d8e845fc1546dad85c55eba447e28c211da8c7ef9697c",
			
 
				+                "sha256:d64c98277ea80e4484f1332ab107e8dfd173a7dcf1bdbf10a9cccc97aaab145f",
			
 
				+                "sha256:d9de8427a5601799784eb0e7fa1b031aa64086ce04de29df775a8ca37eedac41",
			
 
				+                "sha256:e6a15cf8f887d9f578dd49c6fb3a99d53e1d922fdd67a245a67488d77bf56eb2",
			
 
				+                "sha256:e8c446882cbb3774cd78c738c9f58220606b702b7c1655f1423357dc51674054",
			
 
				+                "sha256:e8d188ee39bd0ffe76603da887706e4e7b471f613625899ddf1e27867dc6a0d3",
			
 
				+                "sha256:ef76535776c0708a85258f6dc51d36a2df12633c735f6d197ed7dfcaa7449b99",
			
 
				+                "sha256:f6efca006a81e1197b925a7d7b16b8f61980697bb6746587aad8842865233218"
			
 
				             ],
			
 
				             "index": "pypi",
			
 
				-            "version": "==3.10.1"
			
 
				+            "version": "==3.11.0"
			
 
				         },
			
 
				         "pymysql": {
			
 
				             "hashes": [
			
 
				-                "sha256:3943fbbbc1e902f41daf7f9165519f140c4451c179380677e6a848587042561a",
			
 
				-                "sha256:d8c059dcd81dedb85a9f034d5e22dcb4442c0b201908bede99e306d65ea7c8e7"
			
 
				+                "sha256:263040d2779a3b84930f7ac9da5132be0fefcd6f453a885756656103f8ee1fdd",
			
 
				+                "sha256:44f47128dda8676e021c8d2dbb49a82be9e4ab158b9f03e897152a3a287c69ea"
			
 
				             ],
			
 
				             "index": "pypi",
			
 
				-            "version": "==0.9.3"
			
 
				+            "version": "==0.10.1"
			
 
				         },
			
 
				         "python-dateutil": {
			
 
				             "hashes": [
			
@@ -222,108 +311,153 @@
 
				             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
			
 
				             "version": "==2.8.1"
			
 
				         },
			
 
				+        "pytype": {
			
 
				+            "hashes": [
			
 
				+                "sha256:01c2dc3664b550e5c571c432035eda85c5b1ba0bc2675f50bd24f226fda25fc2",
			
 
				+                "sha256:1b63bfccdd68a8f8a80358fccf09c2a52b2e8d0e079e7ae9c034ba5df4356418",
			
 
				+                "sha256:409ff5f52e767ec957014d1c5c1abf2e246446896d333c25f8f2a19de150f85e",
			
 
				+                "sha256:6353e37f0df5037a1f18d0692b9b0b2d71ed0bb1e3b1d6d8d29458ef1a18cb81",
			
 
				+                "sha256:926dea04b6fc9e396b69281679dbbe982f3825d8a3590ba63e671460d58ff192",
			
 
				+                "sha256:e2ea11478665f7496f2e6f9b38956a01e47ab18462961ae5acfeb99c937dcef0",
			
 
				+                "sha256:e97ff9dea170897e35fd1bf5934863176c7d97fbf533d2020ff0ab751dc2e389"
			
 
				+            ],
			
 
				+            "index": "pypi",
			
 
				+            "version": "==2020.11.3"
			
 
				+        },
			
 
				         "pytz": {
			
 
				             "hashes": [
			
 
				-                "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d",
			
 
				-                "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"
			
 
				+                "sha256:3e6b7dd2d1e0a59084bcee14a17af60c5c562cdc16d828e8eba2e683d3a7e268",
			
 
				+                "sha256:5c55e189b682d420be27c6995ba6edce0c0a77dd67bfbe2ae6607134d5851ffd"
			
 
				             ],
			
 
				-            "version": "==2019.3"
			
 
				+            "version": "==2020.4"
			
 
				+        },
			
 
				+        "pyyaml": {
			
 
				+            "hashes": [
			
 
				+                "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
			
 
				+                "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76",
			
 
				+                "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
			
 
				+                "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648",
			
 
				+                "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
			
 
				+                "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f",
			
 
				+                "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2",
			
 
				+                "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
			
 
				+                "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
			
 
				+                "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
			
 
				+                "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"
			
 
				+            ],
			
 
				+            "version": "==5.3.1"
			
 
				         },
			
 
				         "scikit-learn": {
			
 
				             "hashes": [
			
 
				-                "sha256:1bf45e62799b6938357cfce19f72e3751448c4b27010e4f98553da669b5bbd86",
			
 
				-                "sha256:267ad874b54c67b479c3b45eb132ef4a56ab2b27963410624a413a4e2a3fc388",
			
 
				-                "sha256:2d1bb83d6c51a81193d8a6b5f31930e2959c0e1019d49bdd03f54163735dae4b",
			
 
				-                "sha256:349ba3d837fb3f7cb2b91486c43713e4b7de17f9e852f165049b1b7ac2f81478",
			
 
				-                "sha256:3f4d8eea3531d3eaf613fa33f711113dfff6021d57a49c9d319af4afb46f72f0",
			
 
				-                "sha256:4990f0e166292d2a0f0ee528233723bcfd238bfdb3ec2512a9e27f5695362f35",
			
 
				-                "sha256:57538d138ba54407d21e27c306735cbd42a6aae0df6a5a30c7a6edde46b0017d",
			
 
				-                "sha256:5b722e8bb708f254af028dc2da86d23df5371cba57e24f889b672e7b15423caa",
			
 
				-                "sha256:6043e2c4ccfc68328c331b0fc19691be8fb02bd76d694704843a23ad651de902",
			
 
				-                "sha256:672ea38eb59b739a8907ec063642b486bcb5a2073dda5b72b7983eeaf1fd67c1",
			
 
				-                "sha256:73207dca6e70f8f611f28add185cf3a793c8232a1722f21d82259560dc35cd50",
			
 
				-                "sha256:83fc104a799cb340054e485c25dfeee712b36f5638fb374eba45a9db490f16ff",
			
 
				-                "sha256:8416150ab505f1813da02cdbdd9f367b05bfc75cf251235015bb09f8674358a0",
			
 
				-                "sha256:84e759a766c315deb5c85139ff879edbb0aabcddb9358acf499564ed1c21e337",
			
 
				-                "sha256:8ed66ab27b3d68e57bb1f315fc35e595a5c4a1f108c3420943de4d18fc40e615",
			
 
				-                "sha256:a7f8aa93f61aaad080b29a9018db93ded0586692c03ddf2122e47dd1d3a14e1b",
			
 
				-                "sha256:ddd3bf82977908ff69303115dd5697606e669d8a7eafd7d83bb153ef9e11bd5e",
			
 
				-                "sha256:de9933297f8659ee3bb330eafdd80d74cd73d5dab39a9026b65a4156bc479063",
			
 
				-                "sha256:ea91a70a992ada395efc3d510cf011dc2d99dc9037bb38cd1cb00e14745005f5",
			
 
				-                "sha256:eb4c9f0019abb374a2e55150f070a333c8f990b850d1eb4dfc2765fc317ffc7c",
			
 
				-                "sha256:ffce8abfdcd459e72e5b91727b247b401b22253cbd18d251f842a60e26262d6f"
			
 
				+                "sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca",
			
 
				+                "sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc",
			
 
				+                "sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea",
			
 
				+                "sha256:20766f515e6cd6f954554387dfae705d93c7b544ec0e6c6a5d8e006f6f7ef480",
			
 
				+                "sha256:2aa95c2f17d2f80534156215c87bee72b6aa314a7f8b8fe92a2d71f47280570d",
			
 
				+                "sha256:5ce7a8021c9defc2b75620571b350acc4a7d9763c25b7593621ef50f3bd019a2",
			
 
				+                "sha256:6c28a1d00aae7c3c9568f61aafeaad813f0f01c729bee4fd9479e2132b215c1d",
			
 
				+                "sha256:7671bbeddd7f4f9a6968f3b5442dac5f22bf1ba06709ef888cc9132ad354a9ab",
			
 
				+                "sha256:914ac2b45a058d3f1338d7736200f7f3b094857758895f8667be8a81ff443b5b",
			
 
				+                "sha256:98508723f44c61896a4e15894b2016762a55555fbf09365a0bb1870ecbd442de",
			
 
				+                "sha256:a64817b050efd50f9abcfd311870073e500ae11b299683a519fbb52d85e08d25",
			
 
				+                "sha256:cb3e76380312e1f86abd20340ab1d5b3cc46a26f6593d3c33c9ea3e4c7134028",
			
 
				+                "sha256:d0dcaa54263307075cb93d0bee3ceb02821093b1b3d25f66021987d305d01dce",
			
 
				+                "sha256:d9a1ce5f099f29c7c33181cc4386660e0ba891b21a60dc036bf369e3a3ee3aec",
			
 
				+                "sha256:da8e7c302003dd765d92a5616678e591f347460ac7b53e53d667be7dfe6d1b10",
			
 
				+                "sha256:daf276c465c38ef736a79bd79fc80a249f746bcbcae50c40945428f7ece074f8"
			
 
				             ],
			
 
				-            "markers": "python_version >= '3.5'",
			
 
				-            "version": "==0.22.2.post1"
			
 
				+            "markers": "python_version >= '3.6'",
			
 
				+            "version": "==0.23.2"
			
 
				         },
			
 
				         "scipy": {
			
 
				             "hashes": [
			
 
				-                "sha256:00af72998a46c25bdb5824d2b729e7dabec0c765f9deb0b504f928591f5ff9d4",
			
 
				-                "sha256:0902a620a381f101e184a958459b36d3ee50f5effd186db76e131cbefcbb96f7",
			
 
				-                "sha256:1e3190466d669d658233e8a583b854f6386dd62d655539b77b3fa25bfb2abb70",
			
 
				-                "sha256:2cce3f9847a1a51019e8c5b47620da93950e58ebc611f13e0d11f4980ca5fecb",
			
 
				-                "sha256:3092857f36b690a321a662fe5496cb816a7f4eecd875e1d36793d92d3f884073",
			
 
				-                "sha256:386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa",
			
 
				-                "sha256:71eb180f22c49066f25d6df16f8709f215723317cc951d99e54dc88020ea57be",
			
 
				-                "sha256:770254a280d741dd3436919d47e35712fb081a6ff8bafc0f319382b954b77802",
			
 
				-                "sha256:787cc50cab3020a865640aba3485e9fbd161d4d3b0d03a967df1a2881320512d",
			
 
				-                "sha256:8a07760d5c7f3a92e440ad3aedcc98891e915ce857664282ae3c0220f3301eb6",
			
 
				-                "sha256:8d3bc3993b8e4be7eade6dcc6fd59a412d96d3a33fa42b0fa45dc9e24495ede9",
			
 
				-                "sha256:9508a7c628a165c2c835f2497837bf6ac80eb25291055f56c129df3c943cbaf8",
			
 
				-                "sha256:a144811318853a23d32a07bc7fd5561ff0cac5da643d96ed94a4ffe967d89672",
			
 
				-                "sha256:a1aae70d52d0b074d8121333bc807a485f9f1e6a69742010b33780df2e60cfe0",
			
 
				-                "sha256:a2d6df9eb074af7f08866598e4ef068a2b310d98f87dc23bd1b90ec7bdcec802",
			
 
				-                "sha256:bb517872058a1f087c4528e7429b4a44533a902644987e7b2fe35ecc223bc408",
			
 
				-                "sha256:c5cac0c0387272ee0e789e94a570ac51deb01c796b37fb2aad1fb13f85e2f97d",
			
 
				-                "sha256:cc971a82ea1170e677443108703a2ec9ff0f70752258d0e9f5433d00dda01f59",
			
 
				-                "sha256:dba8306f6da99e37ea08c08fef6e274b5bf8567bb094d1dbe86a20e532aca088",
			
 
				-                "sha256:dc60bb302f48acf6da8ca4444cfa17d52c63c5415302a9ee77b3b21618090521",
			
 
				-                "sha256:dee1bbf3a6c8f73b6b218cb28eed8dd13347ea2f87d572ce19b289d6fd3fbc59"
			
 
				+                "sha256:168c45c0c32e23f613db7c9e4e780bc61982d71dcd406ead746c7c7c2f2004ce",
			
 
				+                "sha256:213bc59191da2f479984ad4ec39406bf949a99aba70e9237b916ce7547b6ef42",
			
 
				+                "sha256:25b241034215247481f53355e05f9e25462682b13bd9191359075682adcd9554",
			
 
				+                "sha256:2c872de0c69ed20fb1a9b9cf6f77298b04a26f0b8720a5457be08be254366c6e",
			
 
				+                "sha256:3397c129b479846d7eaa18f999369a24322d008fac0782e7828fa567358c36ce",
			
 
				+                "sha256:368c0f69f93186309e1b4beb8e26d51dd6f5010b79264c0f1e9ca00cd92ea8c9",
			
 
				+                "sha256:3d5db5d815370c28d938cf9b0809dade4acf7aba57eaf7ef733bfedc9b2474c4",
			
 
				+                "sha256:4598cf03136067000855d6b44d7a1f4f46994164bcd450fb2c3d481afc25dd06",
			
 
				+                "sha256:4a453d5e5689de62e5d38edf40af3f17560bfd63c9c5bd228c18c1f99afa155b",
			
 
				+                "sha256:4f12d13ffbc16e988fa40809cbbd7a8b45bc05ff6ea0ba8e3e41f6f4db3a9e47",
			
 
				+                "sha256:634568a3018bc16a83cda28d4f7aed0d803dd5618facb36e977e53b2df868443",
			
 
				+                "sha256:65923bc3809524e46fb7eb4d6346552cbb6a1ffc41be748535aa502a2e3d3389",
			
 
				+                "sha256:6b0ceb23560f46dd236a8ad4378fc40bad1783e997604ba845e131d6c680963e",
			
 
				+                "sha256:8c8d6ca19c8497344b810b0b0344f8375af5f6bb9c98bd42e33f747417ab3f57",
			
 
				+                "sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62",
			
 
				+                "sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d",
			
 
				+                "sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437",
			
 
				+                "sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2",
			
 
				+                "sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54",
			
 
				+                "sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474",
			
 
				+                "sha256:e360cb2299028d0b0d0f65a5c5e51fc16a335f1603aa2357c25766c8dab56938",
			
 
				+                "sha256:e98d49a5717369d8241d6cf33ecb0ca72deee392414118198a8e5b4c35c56340",
			
 
				+                "sha256:ed572470af2438b526ea574ff8f05e7f39b44ac37f712105e57fc4d53a6fb660",
			
 
				+                "sha256:f87b39f4d69cf7d7529d7b1098cb712033b17ea7714aed831b95628f483fd012",
			
 
				+                "sha256:fa789583fc94a7689b45834453fec095245c7e69c58561dc159b5d5277057e4c"
			
 
				             ],
			
 
				-            "markers": "python_version >= '3.5'",
			
 
				-            "version": "==1.4.1"
			
 
				+            "markers": "python_version >= '3.6'",
			
 
				+            "version": "==1.5.4"
			
 
				         },
			
 
				         "simplejson": {
			
 
				             "hashes": [
			
 
				-                "sha256:0fe3994207485efb63d8f10a833ff31236ed27e3b23dadd0bf51c9900313f8f2",
			
 
				-                "sha256:17163e643dbf125bb552de17c826b0161c68c970335d270e174363d19e7ea882",
			
 
				-                "sha256:1d1e929cdd15151f3c0b2efe953b3281b2fd5ad5f234f77aca725f28486466f6",
			
 
				-                "sha256:1d346c2c1d7dd79c118f0cc7ec5a1c4127e0c8ffc83e7b13fc5709ff78c9bb84",
			
 
				-                "sha256:1ea59f570b9d4916ae5540a9181f9c978e16863383738b69a70363bc5e63c4cb",
			
 
				-                "sha256:1fbba86098bbfc1f85c5b69dc9a6d009055104354e0d9880bb00b692e30e0078",
			
 
				-                "sha256:229edb079d5dd81bf12da952d4d825bd68d1241381b37d3acf961b384c9934de",
			
 
				-                "sha256:22a7acb81968a7c64eba7526af2cf566e7e2ded1cb5c83f0906b17ff1540f866",
			
 
				-                "sha256:2b4b2b738b3b99819a17feaf118265d0753d5536049ea570b3c43b51c4701e81",
			
 
				-                "sha256:4cf91aab51b02b3327c9d51897960c554f00891f9b31abd8a2f50fd4a0071ce8",
			
 
				-                "sha256:4fd5f79590694ebff8dc980708e1c182d41ce1fda599a12189f0ca96bf41ad70",
			
 
				-                "sha256:5cfd495527f8b85ce21db806567de52d98f5078a8e9427b18e251c68bd573a26",
			
 
				-                "sha256:60aad424e47c5803276e332b2a861ed7a0d46560e8af53790c4c4fb3420c26c2",
			
 
				-                "sha256:7739940d68b200877a15a5ff5149e1599737d6dd55e302625650629350466418",
			
 
				-                "sha256:7cce4bac7e0d66f3a080b80212c2238e063211fe327f98d764c6acbc214497fc",
			
 
				-                "sha256:8027bd5f1e633eb61b8239994e6fc3aba0346e76294beac22a892eb8faa92ba1",
			
 
				-                "sha256:86afc5b5cbd42d706efd33f280fec7bd7e2772ef54e3f34cf6b30777cd19a614",
			
 
				-                "sha256:87d349517b572964350cc1adc5a31b493bbcee284505e81637d0174b2758ba17",
			
 
				-                "sha256:8de378d589eccbc75941e480b4d5b4db66f22e4232f87543b136b1f093fff342",
			
 
				-                "sha256:926bcbef9eb60e798eabda9cd0bbcb0fca70d2779aa0aa56845749d973eb7ad5",
			
 
				-                "sha256:9a126c3a91df5b1403e965ba63b304a50b53d8efc908a8c71545ed72535374a3",
			
 
				-                "sha256:ad8dd3454d0c65c0f92945ac86f7b9efb67fa2040ba1b0189540e984df904378",
			
 
				-                "sha256:d140e9376e7f73c1f9e0a8e3836caf5eec57bbafd99259d56979da05a6356388",
			
 
				-                "sha256:da00675e5e483ead345429d4f1374ab8b949fba4429d60e71ee9d030ced64037",
			
 
				-                "sha256:daaf4d11db982791be74b23ff4729af2c7da79316de0bebf880fa2d60bcc8c5a",
			
 
				-                "sha256:f4b64a1031acf33e281fd9052336d6dad4d35eee3404c95431c8c6bc7a9c0588",
			
 
				-                "sha256:fc046afda0ed8f5295212068266c92991ab1f4a50c6a7144b69364bdee4a0159",
			
 
				-                "sha256:fc9051d249dd5512e541f20330a74592f7a65b2d62e18122ca89bf71f94db748"
			
 
				+                "sha256:034550078a11664d77bc1a8364c90bb7eef0e44c2dbb1fd0a4d92e3997088667",
			
 
				+                "sha256:05b43d568300c1cd43f95ff4bfcff984bc658aa001be91efb3bb21df9d6288d3",
			
 
				+                "sha256:0dd9d9c738cb008bfc0862c9b8fa6743495c03a0ed543884bf92fb7d30f8d043",
			
 
				+                "sha256:10fc250c3edea4abc15d930d77274ddb8df4803453dde7ad50c2f5565a18a4bb",
			
 
				+                "sha256:2862beabfb9097a745a961426fe7daf66e1714151da8bb9a0c430dde3d59c7c0",
			
 
				+                "sha256:292c2e3f53be314cc59853bd20a35bf1f965f3bc121e007ab6fd526ed412a85d",
			
 
				+                "sha256:2d3eab2c3fe52007d703a26f71cf649a8c771fcdd949a3ae73041ba6797cfcf8",
			
 
				+                "sha256:2e7b57c2c146f8e4dadf84977a83f7ee50da17c8861fd7faf694d55e3274784f",
			
 
				+                "sha256:311f5dc2af07361725033b13cc3d0351de3da8bede3397d45650784c3f21fbcf",
			
 
				+                "sha256:344e2d920a7f27b4023c087ab539877a1e39ce8e3e90b867e0bfa97829824748",
			
 
				+                "sha256:3fabde09af43e0cbdee407555383063f8b45bfb52c361bc5da83fcffdb4fd278",
			
 
				+                "sha256:42b8b8dd0799f78e067e2aaae97e60d58a8f63582939af60abce4c48631a0aa4",
			
 
				+                "sha256:4b3442249d5e3893b90cb9f72c7d6ce4d2ea144d2c0d9f75b9ae1e5460f3121a",
			
 
				+                "sha256:55d65f9cc1b733d85ef95ab11f559cce55c7649a2160da2ac7a078534da676c8",
			
 
				+                "sha256:5c659a0efc80aaaba57fcd878855c8534ecb655a28ac8508885c50648e6e659d",
			
 
				+                "sha256:72d8a3ffca19a901002d6b068cf746be85747571c6a7ba12cbcf427bfb4ed971",
			
 
				+                "sha256:75ecc79f26d99222a084fbdd1ce5aad3ac3a8bd535cd9059528452da38b68841",
			
 
				+                "sha256:76ac9605bf2f6d9b56abf6f9da9047a8782574ad3531c82eae774947ae99cc3f",
			
 
				+                "sha256:7d276f69bfc8c7ba6c717ba8deaf28f9d3c8450ff0aa8713f5a3280e232be16b",
			
 
				+                "sha256:7f10f8ba9c1b1430addc7dd385fc322e221559d3ae49b812aebf57470ce8de45",
			
 
				+                "sha256:8042040af86a494a23c189b5aa0ea9433769cc029707833f261a79c98e3375f9",
			
 
				+                "sha256:813846738277729d7db71b82176204abc7fdae2f566e2d9fcf874f9b6472e3e6",
			
 
				+                "sha256:845a14f6deb124a3bcb98a62def067a67462a000e0508f256f9c18eff5847efc",
			
 
				+                "sha256:869a183c8e44bc03be1b2bbcc9ec4338e37fa8557fc506bf6115887c1d3bb956",
			
 
				+                "sha256:8acf76443cfb5c949b6e781c154278c059b09ac717d2757a830c869ba000cf8d",
			
 
				+                "sha256:8f713ea65958ef40049b6c45c40c206ab363db9591ff5a49d89b448933fa5746",
			
 
				+                "sha256:934115642c8ba9659b402c8bdbdedb48651fb94b576e3b3efd1ccb079609b04a",
			
 
				+                "sha256:9551f23e09300a9a528f7af20e35c9f79686d46d646152a0c8fc41d2d074d9b0",
			
 
				+                "sha256:9a2b7543559f8a1c9ed72724b549d8cc3515da7daf3e79813a15bdc4a769de25",
			
 
				+                "sha256:a55c76254d7cf8d4494bc508e7abb993a82a192d0db4552421e5139235604625",
			
 
				+                "sha256:ad8f41c2357b73bc9e8606d2fa226233bf4d55d85a8982ecdfd55823a6959995",
			
 
				+                "sha256:af4868da7dd53296cd7630687161d53a7ebe2e63814234631445697bd7c29f46",
			
 
				+                "sha256:afebfc3dd3520d37056f641969ce320b071bc7a0800639c71877b90d053e087f",
			
 
				+                "sha256:b59aa298137ca74a744c1e6e22cfc0bf9dca3a2f41f51bc92eb05695155d905a",
			
 
				+                "sha256:bc00d1210567a4cdd215ac6e17dc00cb9893ee521cee701adfd0fa43f7c73139",
			
 
				+                "sha256:c1cb29b1fced01f97e6d5631c3edc2dadb424d1f4421dad079cb13fc97acb42f",
			
 
				+                "sha256:c94dc64b1a389a416fc4218cd4799aa3756f25940cae33530a4f7f2f54f166da",
			
 
				+                "sha256:ceaa28a5bce8a46a130cd223e895080e258a88d51bf6e8de2fc54a6ef7e38c34",
			
 
				+                "sha256:cff6453e25204d3369c47b97dd34783ca820611bd334779d22192da23784194b",
			
 
				+                "sha256:d0b64409df09edb4c365d95004775c988259efe9be39697d7315c42b7a5e7e94",
			
 
				+                "sha256:d4813b30cb62d3b63ccc60dd12f2121780c7a3068db692daeb90f989877aaf04",
			
 
				+                "sha256:da3c55cdc66cfc3fffb607db49a42448785ea2732f055ac1549b69dcb392663b",
			
 
				+                "sha256:e058c7656c44fb494a11443191e381355388443d543f6fc1a245d5d238544396",
			
 
				+                "sha256:fed0f22bf1313ff79c7fc318f7199d6c2f96d4de3234b2f12a1eab350e597c06",
			
 
				+                "sha256:ffd4e4877a78c84d693e491b223385e0271278f5f4e1476a4962dca6824ecfeb"
			
 
				             ],
			
 
				             "index": "pypi",
			
 
				-            "version": "==3.17.0"
			
 
				+            "version": "==3.17.2"
			
 
				         },
			
 
				         "six": {
			
 
				             "hashes": [
			
 
				-                "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a",
			
 
				-                "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"
			
 
				+                "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
			
 
				+                "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
			
 
				             ],
			
 
				             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
			
 
				-            "version": "==1.14.0"
			
 
				+            "version": "==1.15.0"
			
 
				         },
			
 
				         "sklearn": {
			
 
				             "hashes": [
			
@@ -333,50 +467,120 @@
 
				         },
			
 
				         "sqlalchemy": {
			
 
				             "hashes": [
			
 
				-                "sha256:083e383a1dca8384d0ea6378bd182d83c600ed4ff4ec8247d3b2442cf70db1ad",
			
 
				-                "sha256:0a690a6486658d03cc6a73536d46e796b6570ac1f8a7ec133f9e28c448b69828",
			
 
				-                "sha256:114b6ace30001f056e944cebd46daef38fdb41ebb98f5e5940241a03ed6cad43",
			
 
				-                "sha256:128f6179325f7597a46403dde0bf148478f868df44841348dfc8d158e00db1f9",
			
 
				-                "sha256:13d48cd8b925b6893a4e59b2dfb3e59a5204fd8c98289aad353af78bd214db49",
			
 
				-                "sha256:211a1ce7e825f7142121144bac76f53ac28b12172716a710f4bf3eab477e730b",
			
 
				-                "sha256:2dc57ee80b76813759cccd1a7affedf9c4dbe5b065a91fb6092c9d8151d66078",
			
 
				-                "sha256:3e625e283eecc15aee5b1ef77203bfb542563fa4a9aa622c7643c7b55438ff49",
			
 
				-                "sha256:43078c7ec0457387c79b8d52fff90a7ad352ca4c7aa841c366238c3e2cf52fdf",
			
 
				-                "sha256:5b1bf3c2c2dca738235ce08079783ef04f1a7fc5b21cf24adaae77f2da4e73c3",
			
 
				-                "sha256:6056b671aeda3fc451382e52ab8a753c0d5f66ef2a5ccc8fa5ba7abd20988b4d",
			
 
				-                "sha256:68d78cf4a9dfade2e6cf57c4be19f7b82ed66e67dacf93b32bb390c9bed12749",
			
 
				-                "sha256:7025c639ce7e170db845e94006cf5f404e243e6fc00d6c86fa19e8ad8d411880",
			
 
				-                "sha256:7224e126c00b8178dfd227bc337ba5e754b197a3867d33b9f30dc0208f773d70",
			
 
				-                "sha256:7d98e0785c4cd7ae30b4a451416db71f5724a1839025544b4edbd92e00b91f0f",
			
 
				-                "sha256:8d8c21e9d4efef01351bf28513648ceb988031be4159745a7ad1b3e28c8ff68a",
			
 
				-                "sha256:bbb545da054e6297242a1bb1ba88e7a8ffb679f518258d66798ec712b82e4e07",
			
 
				-                "sha256:d00b393f05dbd4ecd65c989b7f5a81110eae4baea7a6a4cdd94c20a908d1456e",
			
 
				-                "sha256:e18752cecaef61031252ca72031d4d6247b3212ebb84748fc5d1a0d2029c23ea"
			
 
				+                "sha256:009e8388d4d551a2107632921320886650b46332f61dc935e70c8bcf37d8e0d6",
			
 
				+                "sha256:0157c269701d88f5faf1fa0e4560e4d814f210c01a5b55df3cab95e9346a8bcc",
			
 
				+                "sha256:0a92745bb1ebbcb3985ed7bda379b94627f0edbc6c82e9e4bac4fb5647ae609a",
			
 
				+                "sha256:0cca1844ba870e81c03633a99aa3dc62256fb96323431a5dec7d4e503c26372d",
			
 
				+                "sha256:166917a729b9226decff29416f212c516227c2eb8a9c9f920d69ced24e30109f",
			
 
				+                "sha256:1f5f369202912be72fdf9a8f25067a5ece31a2b38507bb869306f173336348da",
			
 
				+                "sha256:2909dffe5c9a615b7e6c92d1ac2d31e3026dc436440a4f750f4749d114d88ceb",
			
 
				+                "sha256:2b5dafed97f778e9901b79cc01b88d39c605e0545b4541f2551a2fd785adc15b",
			
 
				+                "sha256:2e9bd5b23bba8ae8ce4219c9333974ff5e103c857d9ff0e4b73dc4cb244c7d86",
			
 
				+                "sha256:3aa6d45e149a16aa1f0c46816397e12313d5e37f22205c26e06975e150ffcf2a",
			
 
				+                "sha256:4bdbdb8ca577c6c366d15791747c1de6ab14529115a2eb52774240c412a7b403",
			
 
				+                "sha256:53fd857c6c8ffc0aa6a5a3a2619f6a74247e42ec9e46b836a8ffa4abe7aab327",
			
 
				+                "sha256:5cdfe54c1e37279dc70d92815464b77cd8ee30725adc9350f06074f91dbfeed2",
			
 
				+                "sha256:5d92c18458a4aa27497a986038d5d797b5279268a2de303cd00910658e8d149c",
			
 
				+                "sha256:632b32183c0cb0053194a4085c304bc2320e5299f77e3024556fa2aa395c2a8b",
			
 
				+                "sha256:7c735c7a6db8ee9554a3935e741cf288f7dcbe8706320251eb38c412e6a4281d",
			
 
				+                "sha256:7cd40cb4bc50d9e87b3540b23df6e6b24821ba7e1f305c1492b0806c33dbdbec",
			
 
				+                "sha256:84f0ac4a09971536b38cc5d515d6add7926a7e13baa25135a1dbb6afa351a376",
			
 
				+                "sha256:8dcbf377529a9af167cbfc5b8acec0fadd7c2357fc282a1494c222d3abfc9629",
			
 
				+                "sha256:950f0e17ffba7a7ceb0dd056567bc5ade22a11a75920b0e8298865dc28c0eff6",
			
 
				+                "sha256:9e379674728f43a0cd95c423ac0e95262500f9bfd81d33b999daa8ea1756d162",
			
 
				+                "sha256:b15002b9788ffe84e42baffc334739d3b68008a973d65fad0a410ca5d0531980",
			
 
				+                "sha256:b6f036ecc017ec2e2cc2a40615b41850dc7aaaea6a932628c0afc73ab98ba3fb",
			
 
				+                "sha256:bad73f9888d30f9e1d57ac8829f8a12091bdee4949b91db279569774a866a18e",
			
 
				+                "sha256:bbc58fca72ce45a64bb02b87f73df58e29848b693869e58bd890b2ddbb42d83b",
			
 
				+                "sha256:bca4d367a725694dae3dfdc86cf1d1622b9f414e70bd19651f5ac4fb3aa96d61",
			
 
				+                "sha256:be41d5de7a8e241864189b7530ca4aaf56a5204332caa70555c2d96379e18079",
			
 
				+                "sha256:bf53d8dddfc3e53a5bda65f7f4aa40fae306843641e3e8e701c18a5609471edf",
			
 
				+                "sha256:c092fe282de83d48e64d306b4bce03114859cdbfe19bf8a978a78a0d44ddadb1",
			
 
				+                "sha256:c3ab23ee9674336654bf9cac30eb75ac6acb9150dc4b1391bec533a7a4126471",
			
 
				+                "sha256:ce64a44c867d128ab8e675f587aae7f61bd2db836a3c4ba522d884cd7c298a77",
			
 
				+                "sha256:d05cef4a164b44ffda58200efcb22355350979e000828479971ebca49b82ddb1",
			
 
				+                "sha256:d2f25c7f410338d31666d7ddedfa67570900e248b940d186b48461bd4e5569a1",
			
 
				+                "sha256:d3b709d64b5cf064972b3763b47139e4a0dc4ae28a36437757f7663f67b99710",
			
 
				+                "sha256:e32e3455db14602b6117f0f422f46bc297a3853ae2c322ecd1e2c4c04daf6ed5",
			
 
				+                "sha256:ed53209b5f0f383acb49a927179fa51a6e2259878e164273ebc6815f3a752465",
			
 
				+                "sha256:f605f348f4e6a2ba00acb3399c71d213b92f27f2383fc4abebf7a37368c12142",
			
 
				+                "sha256:fcdb3755a7c355bc29df1b5e6fb8226d5c8b90551d202d69d0076a8a5649d68b"
			
 
				             ],
			
 
				             "index": "pypi",
			
 
				-            "version": "==1.3.16"
			
 
				+            "version": "==1.3.20"
			
 
				         },
			
 
				         "sqlalchemy-utils": {
			
 
				             "hashes": [
			
 
				-                "sha256:f268af5bc03597fe7690d60df3e5f1193254a83e07e4686f720f61587ec4493a"
			
 
				+                "sha256:fb66e9956e41340011b70b80f898fde6064ec1817af77199ee21ace71d7d6ab0"
			
 
				             ],
			
 
				-            "version": "==0.36.3"
			
 
				+            "version": "==0.36.8"
			
 
				         },
			
 
				         "sqlparse": {
			
 
				             "hashes": [
			
 
				-                "sha256:022fb9c87b524d1f7862b3037e541f68597a730a8843245c349fc93e1643dc4e",
			
 
				-                "sha256:e162203737712307dfe78860cc56c8da8a852ab2ee33750e33aeadf38d12c548"
			
 
				+                "sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0",
			
 
				+                "sha256:0f91fd2e829c44362cbcfab3e9ae12e22badaa8a29ad5ff599f9ec109f0454e8"
			
 
				             ],
			
 
				             "index": "pypi",
			
 
				-            "version": "==0.3.1"
			
 
				+            "version": "==0.4.1"
			
 
				+        },
			
 
				+        "threadpoolctl": {
			
 
				+            "hashes": [
			
 
				+                "sha256:38b74ca20ff3bb42caca8b00055111d74159ee95c4370882bbff2b93d24da725",
			
 
				+                "sha256:ddc57c96a38beb63db45d6c159b5ab07b6bced12c45a1f07b2b92f272aebfa6b"
			
 
				+            ],
			
 
				+            "markers": "python_version >= '3.5'",
			
 
				+            "version": "==2.1.0"
			
 
				         },
			
 
				         "tqdm": {
			
 
				             "hashes": [
			
 
				-                "sha256:00339634a22c10a7a22476ee946bbde2dbe48d042ded784e4d88e0236eca5d81",
			
 
				-                "sha256:ea9e3fd6bd9a37e8783d75bfc4c1faf3c6813da6bd1c3e776488b41ec683af94"
			
 
				+                "sha256:9ad44aaf0fc3697c06f6e05c7cf025dd66bc7bcb7613c66d85f4464c47ac8fad",
			
 
				+                "sha256:ef54779f1c09f346b2b5a8e5c61f96fbcb639929e640e59f8cf810794f406432"
			
 
				             ],
			
 
				             "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
			
 
				-            "version": "==4.45.0"
			
 
				+            "version": "==4.51.0"
			
 
				+        },
			
 
				+        "typed-ast": {
			
 
				+            "hashes": [
			
 
				+                "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355",
			
 
				+                "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919",
			
 
				+                "sha256:0d8110d78a5736e16e26213114a38ca35cb15b6515d535413b090bd50951556d",
			
 
				+                "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa",
			
 
				+                "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652",
			
 
				+                "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75",
			
 
				+                "sha256:3742b32cf1c6ef124d57f95be609c473d7ec4c14d0090e5a5e05a15269fb4d0c",
			
 
				+                "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01",
			
 
				+                "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d",
			
 
				+                "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1",
			
 
				+                "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907",
			
 
				+                "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c",
			
 
				+                "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3",
			
 
				+                "sha256:7e4c9d7658aaa1fc80018593abdf8598bf91325af6af5cce4ce7c73bc45ea53d",
			
 
				+                "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b",
			
 
				+                "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614",
			
 
				+                "sha256:92c325624e304ebf0e025d1224b77dd4e6393f18aab8d829b5b7e04afe9b7a2c",
			
 
				+                "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb",
			
 
				+                "sha256:b52ccf7cfe4ce2a1064b18594381bccf4179c2ecf7f513134ec2f993dd4ab395",
			
 
				+                "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b",
			
 
				+                "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41",
			
 
				+                "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6",
			
 
				+                "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34",
			
 
				+                "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe",
			
 
				+                "sha256:d648b8e3bf2fe648745c8ffcee3db3ff903d0817a01a12dd6a6ea7a8f4889072",
			
 
				+                "sha256:f208eb7aff048f6bea9586e61af041ddf7f9ade7caed625742af423f6bae3298",
			
 
				+                "sha256:fac11badff8313e23717f3dada86a15389d0708275bddf766cca67a84ead3e91",
			
 
				+                "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4",
			
 
				+                "sha256:fcf135e17cc74dbfbc05894ebca928ffeb23d9790b3167a674921db19082401f",
			
 
				+                "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"
			
 
				+            ],
			
 
				+            "version": "==1.4.1"
			
 
				+        },
			
 
				+        "typing-extensions": {
			
 
				+            "hashes": [
			
 
				+                "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918",
			
 
				+                "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c",
			
 
				+                "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f"
			
 
				+            ],
			
 
				+            "version": "==3.7.4.3"
			
 
				         }
			
 
				     },
			
 
				     "develop": {}
			
--- a/cdplib/db_handlers/SQLHandler.py
+++ b/cdplib/db_handlers/SQLHandler.py
@@ -508,7 +508,6 @@ class SQLHandler:
 
				         :rtype: DataFrame
			
 
				         '''
			
 
				         try:
			
 
				-            
			
 
				             connection = self._engine.connect()
			
 
				 
			
 
				             data = pd.read_sql(sql=query,
			
@@ -516,7 +515,7 @@ class SQLHandler:
 
				                                **read_sql_kwargs)
			
 
				 
			
 
				             connection.close()
			
 
				-           
			
 
				+
			
 
				             return data
			
 
				 
			
 
				         except Exception as e:
			
--- a/cdplib/fine_tuning/FineTunedClassiferCV.py
+++ b/cdplib/fine_tuning/FineTunedClassiferCV.py
@@ -0,0 +1,173 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Thu Apr 23 08:51:53 2020
			
 
				+
			
 
				+@author: tanya
			
 
				+
			
 
				+@description: class for fine-tuning a sklearn classifier
			
 
				+(optimizing the probability threshold)
			
 
				+"""
			
 
				+
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+
			
 
				+from typing import Callable
			
 
				+
			
 
				+from sklearn.base import (BaseEstimator, ClassifierMixin,
			
 
				+                          clone, MetaEstimatorMixin)
			
 
				+
			
 
				+from cdplib.log import Log
			
 
				+
			
 
				+from cdplib.utils.TyperConverter import TypeConverter
			
 
				+
			
 
				+
			
 
				+class FineTunedClassifierCV(BaseEstimator, ClassifierMixin,
			
 
				+                            MetaEstimatorMixin):
			
 
				+    """
			
 
				+    Probability threshold tuning for a given estimator.
			
 
				+    Overrides the method predict of the given sklearn classifer
			
 
				+    and returns predictions with the optimal value of
			
 
				+    the probability threshold.
			
 
				+
			
 
				+    An object of this class can be passed to an sklearn Pipeline
			
 
				+    """
			
 
				+    def __init__(self, estimator, cost_func: Callable, greater_is_better: bool,
			
 
				+                 cv=None, threshold_step: float = 0.1):
			
 
				+        """
			
 
				+        """
			
 
				+        self.estimator = estimator
			
 
				+
			
 
				+        self.is_fitted = False
			
 
				+
			
 
				+        self.greater_is_better = greater_is_better
			
 
				+
			
 
				+        if cv is None:
			
 
				+            self.cv = ...
			
 
				+        else:
			
 
				+            self.cv = cv
			
 
				+
			
 
				+        self.cost_func = cost_func
			
 
				+
			
 
				+        self.threshold_step = threshold_step
			
 
				+
			
 
				+        self.optimal_threshold = 0.5
			
 
				+
			
 
				+        self._logger = Log("FineTunedClassifyCV")
			
 
				+
			
 
				+    def _get_best_threshold(self, y_val: (pd.DataFrame, np.array),
			
 
				+                            proba_pred: (pd.DataFrame, np.array)):
			
 
				+        '''
			
 
				+        '''
			
 
				+        costs = {}
			
 
				+
			
 
				+        for t in np.arange(self.threshold_step, 1, self.threshold_step):
			
 
				+            costs[t] = self.cost_func(y_val, (proba_pred >= t).astype(int))
			
 
				+
			
 
				+        if self.greater_is_better:
			
 
				+            return max(costs, key=costs.get)
			
 
				+        else:
			
 
				+            return min(costs, key=costs.get)
			
 
				+
			
 
				+    def fit(self, X: (pd.DataFrame, np.array),
			
 
				+            y: (pd.DataFrame, np.array) = None,
			
 
				+            **fit_args):
			
 
				+        """
			
 
				+        """
			
 
				+        X = TypeConverter().convert_to_ndarray(X)
			
 
				+        if y is not None:
			
 
				+            y = TypeConverter().convert_to_ndarray(X)
			
 
				+
			
 
				+        optimal_thrs_per_fold = []
			
 
				+
			
 
				+        for train_inds, val_inds in self.cv:
			
 
				+            X_train, X_val = X[train_inds], X[val_inds]
			
 
				+
			
 
				+            if y is not None:
			
 
				+                y_train, y_val = y[train_inds], y[val_inds]
			
 
				+            else:
			
 
				+                y_train, y_val = None, None
			
 
				+
			
 
				+            estimator = clone(fine_tuned_clf.estimator)
			
 
				+
			
 
				+            estimator.fit(X_train, y_train, **fit_args)
			
 
				+
			
 
				+            proba_pred = estimator.predict_proba(X_val)
			
 
				+
			
 
				+            optimal_thr = self._get_best_threshold(y_val, proba_pred)
			
 
				+
			
 
				+            optimal_thrs_per_fold.append(optimal_thr)
			
 
				+
			
 
				+        self.optimal_threshold = np.mean(optimal_thrs_per_fold)
			
 
				+
			
 
				+        self.estimator.fit(X, **fit_args)
			
 
				+
			
 
				+    def predict(self, X: (pd.DataFrame, np.array)) -> np.array:
			
 
				+        """
			
 
				+        """
			
 
				+        if self.is_fitted:
			
 
				+
			
 
				+            proba_pred = self.estimator.predict_proba(X)
			
 
				+
			
 
				+            return (proba_pred >= self.optimal_threshold).astype(int)
			
 
				+
			
 
				+        else:
			
 
				+            self._logger.warn("You should fit first")
			
 
				+
			
 
				+    def get_params(self):
			
 
				+        """
			
 
				+        """
			
 
				+        params = self.estimator.get_params()
			
 
				+
			
 
				+        params.update({"cv": self.cv, "cost_func": self.cost_func})
			
 
				+
			
 
				+        return params
			
 
				+
			
 
				+    def set_params(self, **params: dict):
			
 
				+        """
			
 
				+        """
			
 
				+        for param in params:
			
 
				+            if param == "cv":
			
 
				+                self.cv = params[param]
			
 
				+                params.pop(param)
			
 
				+
			
 
				+            elif param == "cost_func":
			
 
				+                self.cost_func = params[param]
			
 
				+                params.pop(param)
			
 
				+
			
 
				+        self.estimator.set_params(**params)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # test
			
 
				+    from sklearn.datasets import load_iris
			
 
				+    from sklearn.metrics import accuracy_score
			
 
				+    import gc
			
 
				+    from xgboost import XGBRFClassifier
			
 
				+
			
 
				+    data = load_iris()
			
 
				+    X, y = data["data"], data["target"]
			
 
				+    y = (y==1).astype(int)
			
 
				+    del data
			
 
				+    gc.collect()
			
 
				+
			
 
				+    # make a custom cv object
			
 
				+    val_len = len(X)//10
			
 
				+    split_inds = range(len(X)//2, len(X), val_len)
			
 
				+
			
 
				+    cv = []
			
 
				+
			
 
				+    for i in split_inds:
			
 
				+        train_inds = list(range(i))
			
 
				+        val_inds = list(range(i, i + val_len))
			
 
				+        cv.append((train_inds, val_inds))
			
 
				+
			
 
				+    clf = XGBRFClassifier()
			
 
				+
			
 
				+    fine_tuned_clf = FineTunedClassifierCV(estimator=clf,
			
 
				+                                           cv=cv,
			
 
				+                                           greater_is_better=True,
			
 
				+                                           cost_func=accuracy_score)
			
 
				+
			
 
				+    fine_tuned_clf.fit(X=X, y=y)
			
 
				+
			
--- a/cdplib/gridsearch/GridSearchPipelineSelector.py
+++ b/cdplib/gridsearch/GridSearchPipelineSelector.py
@@ -14,16 +14,15 @@ Created on Wed Sep 30 14:15:17 2020
 
				 """
			
 
				 
			
 
				 import os
			
 
				-import sys
			
 
				 import datetime
			
 
				+import numpy as np
			
 
				 from itertools import product
			
 
				 from collections import ChainMap
			
 
				 from sklearn.pipeline import Pipeline
			
 
				+from typing import Callable, Optional, Literal, Dict, Union, List
			
 
				 
			
 
				 from cdplib.pipeline_selector.PipelineSelector import PipelineSelector
			
 
				 
			
 
				-sys.path.append(os.getcwd())
			
 
				-
			
 
				 
			
 
				 class GridSearchPipelineSelector(PipelineSelector):
			
 
				     """
			
@@ -36,17 +35,19 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				      if needed.
			
 
				     """
			
 
				     def __init__(self,
			
 
				-                 cost_func,
			
 
				+                 cost_func: Union[Callable, str],
			
 
				                  greater_is_better: bool,
			
 
				                  trials_path: str,
			
 
				-                 backup_trials_freq: int = 1,
			
 
				-                 cross_val_averaging_func: callable = None,
			
 
				-                 additional_metrics: dict = None,
			
 
				-                 strategy_name: str = None,
			
 
				-                 stdout_log_level: str = "INFO"
			
 
				+                 backup_trials_freq: Optional[int] = None,
			
 
				+                 cross_val_averaging_func: Callable = np.mean,
			
 
				+                 additional_metrics: Optional[Dict[str, Callable]] = None,
			
 
				+                 strategy_name: Optional[str] = None,
			
 
				+                 stdout_log_level: Literal["INFO", "WARNING", "ERROR"]
			
 
				+                 = "INFO"
			
 
				                  ):
			
 
				         """
			
 
				-        :param callable cost_func: function to minimize or maximize
			
 
				+        ::param Callable cost_func: function to minimize or maximize
			
 
				+            over the elements of a given (pipeline/hyperparameter) space
			
 
				 
			
 
				         :param bool greater_is_better: when True
			
 
				             cost_func is maximized, else minimized.
			
@@ -56,25 +57,24 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				             select information about the obtained scores, score variations,
			
 
				             and pipelines, and parameters tried out so far. If a trials object
			
 
				             already exists at the given path, it is loaded and the
			
 
				-            search is continued, else, the search is started from
			
 
				-            the beginning.
			
 
				+            search is continued, else, the search is started from scratch.
			
 
				 
			
 
				         :param backup_trials_freq: frequecy in interations (trials)
			
 
				             of saving the trials object at the trials_path.
			
 
				+            if None, the trials object is backed up avery time
			
 
				+            the score improves.
			
 
				 
			
 
				-        :param str log_path: Optional, when not provided logs to stdout.
			
 
				+        :param Callable cross_val_averaging_func: Function to aggregate
			
 
				+            the cross-validation scores.
			
 
				+            Example different from the mean: mean - c*var.
			
 
				 
			
 
				-        :param callable averaging_func: optional,
			
 
				-            when not provided set to mean. Function
			
 
				-            to aggregate the cross-validated values of the cost function.
			
 
				-            Classic situation is to take the mean,
			
 
				-            another example is, for example mean() - c*var().
			
 
				         :param additional_metics: dict of additional metrics to save
			
 
				             of the form {"metric_name": metric} where metric is a Callable.
			
 
				 
			
 
				-        :param str strategy_name: a name might be asigned to the trials,
			
 
				-            a strategy is defined by the data set, cv object, cost function.
			
 
				-            When the strategy changes, one should start with new trials.
			
 
				+        :param str strategy_name:
			
 
				+            a strategy is defined by the data set (columns/features and rows),
			
 
				+            cv object, cost function.
			
 
				+            When the strategy changes, one must start with new trials.
			
 
				 
			
 
				         :param str stdout_log_level: can be INFO, WARNING, ERROR
			
 
				         """
			
@@ -99,7 +99,7 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def run_trials(self):
			
 
				+    def run_trials(self) -> None:
			
 
				         """
			
 
				         """
			
 
				         try:
			
@@ -115,22 +115,25 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				             # with all different combinations of
			
 
				             # parameters for different pipelines
			
 
				             # from the space definition.
			
 
				-            space_unfolded = ({"name": pipeline_dist["name"],
			
 
				-                               "pipeline": pipeline_dist["pipeline"],
			
 
				+            space_unfolded = ({"name": param_dist["name"],
			
 
				+                               "pipeline": param_dist["pipeline"],
			
 
				                                "params": param_set}
			
 
				-                              for pipeline_dist in self._space
			
 
				+                              for param_dist in self._space
			
 
				                               for param_set in
			
 
				                               (dict(ChainMap(*tup)) for tup in
			
 
				                                product(*[[{k: v} for v in
			
 
				-                                          pipeline_dist["params"][k]]
			
 
				-                                         for k in pipeline_dist["params"]])))
			
 
				+                                          param_dist["params"][k]]
			
 
				+                                         for k in param_dist["params"]])))
			
 
				 
			
 
				             for space_element in space_unfolded:
			
 
				 
			
 
				+                # uniquely identifies the current space element
			
 
				                 trial_id = {"name": space_element["name"],
			
 
				                             "params": space_element["params"],
			
 
				                             "status": 'ok'}
			
 
				 
			
 
				+                # verify if the current pipline/parameters
			
 
				+                # were already tested before
			
 
				                 if trial_id in done_trial_ids:
			
 
				                     continue
			
 
				 
			
@@ -159,15 +162,12 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def number_of_trials(self) -> int:
			
 
				+    def number_of_trials(self) -> Union[int, None]:
			
 
				         """
			
 
				         Number of trials already run in the current trials object
			
 
				         """
			
 
				         try:
			
 
				-            if self._trials is None:
			
 
				-                return 0
			
 
				-            else:
			
 
				-                return len(self._trials)
			
 
				+            return len(self._trials)
			
 
				 
			
 
				         except Exception as e:
			
 
				             err = ("Failed to retrieve the number of trials. "
			
@@ -176,11 +176,11 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def best_trial(self) -> dict:
			
 
				+    def best_trial(self) -> Union[dict, None]:
			
 
				         """
			
 
				         """
			
 
				         try:
			
 
				-            assert(self._trials is not None),\
			
 
				+            assert(len(self._trials) > 0),\
			
 
				                 ("Trials object is empty. "
			
 
				                  "Call run_trials method.")
			
 
				 
			
@@ -193,11 +193,11 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def best_trial_score(self) -> float:
			
 
				+    def best_trial_score(self) -> Union[float, None]:
			
 
				         '''
			
 
				         '''
			
 
				         try:
			
 
				-            assert(self._trials is not None),\
			
 
				+            assert(len(self._trials) > 0),\
			
 
				                 ("Trials object is empty. "
			
 
				                  "Call run_trials method.")
			
 
				 
			
@@ -210,11 +210,11 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def best_trial_score_variance(self) -> float:
			
 
				+    def best_trial_score_variance(self) -> Union[float, None]:
			
 
				         '''
			
 
				         '''
			
 
				         try:
			
 
				-            assert(self._trials is not None),\
			
 
				+            assert(len(self._trials) > 0),\
			
 
				                 ("Trials object is empty. "
			
 
				                  "Call run_trials method.")
			
 
				 
			
@@ -227,11 +227,11 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def best_trial_pipeline(self) -> Pipeline:
			
 
				+    def best_trial_pipeline(self) -> Union[Pipeline, None]:
			
 
				         '''
			
 
				         '''
			
 
				         try:
			
 
				-            assert(self._trials is not None),\
			
 
				+            assert(len(self._trials) > 0),\
			
 
				                 ("Trials object is empty. "
			
 
				                  "Call run_trials method.")
			
 
				 
			
@@ -243,16 +243,14 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def get_n_best_trial_pipelines(self, n: int) -> list:
			
 
				+    def get_n_best_trial_pipelines(self, n: int)\
			
 
				+            -> Union[List[Pipeline], None]:
			
 
				         """
			
 
				         N best pipelines with corresponding
			
 
				         best hyperparameters
			
 
				         """
			
 
				         try:
			
 
				-            assert(isinstance(n, int)),\
			
 
				-                "Parameter n must be an int"
			
 
				-
			
 
				-            assert(self._trials is not None),\
			
 
				+            assert(len(self._trials) > 0),\
			
 
				                 ("Trials object is empty. "
			
 
				                  "Call run_trials method.")
			
 
				 
			
@@ -266,17 +264,15 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def get_n_best_trial_pipelines_of_each_type(self, n: int) -> list:
			
 
				+    def get_n_best_trial_pipelines_of_each_type(self, n: int)\
			
 
				+            -> Union[Dict[str, List[Pipeline]], None]:
			
 
				         """
			
 
				         If the hyperparameter search is done over multiple
			
 
				         pipelines, then returns n different pipeline-types
			
 
				         with corresponding hyperparameters
			
 
				         """
			
 
				         try:
			
 
				-            assert(isinstance(n, int)),\
			
 
				-                "Parameter n must be an int"
			
 
				-
			
 
				-            assert(self._trials is not None),\
			
 
				+            assert(len(self._trials) > 0),\
			
 
				                 ("Trials object is empty. "
			
 
				                  "Call run_trials method.")
			
 
				 
			
@@ -295,7 +291,7 @@ class GridSearchPipelineSelector(PipelineSelector):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def trials_to_excel(self, path: str):
			
 
				+    def trials_to_excel(self, path: str) -> None:
			
 
				         """
			
 
				         Trials object in the shape of table written to excel,
			
 
				         should contain the run number, pipeline (as str),
			
--- a/cdplib/hyperopt/HyperoptPipelineSelection.py
+++ b/cdplib/hyperopt/HyperoptPipelineSelection.py
@@ -480,8 +480,6 @@ class HyperoptPipelineSelection:
 
				                         trials=self._trials,
			
 
				                         max_evals=len(self._trials.trials) + niter)
			
 
				 
			
 
				-            # print('AAAA', str(niter))
			
 
				-
			
 
				             self._logger.info(
			
 
				                     "Best score is {0} with variance {1}"
			
 
				                     .format(
			
@@ -589,8 +587,8 @@ class HyperoptPipelineSelection:
 
				                 losses = [self._ith_trial_loss(i)
			
 
				                           for i in range(len(self._trials.trials))]
			
 
				 
			
 
				-            best_n_indices = [losses.index(l)
			
 
				-                              for l in sorted(list(set(losses)))[:n]]
			
 
				+            best_n_indices = [losses.index(ll)
			
 
				+                              for ll in sorted(list(set(losses)))[:n]]
			
 
				 
			
 
				             return [self._ith_trial_pipeline(i) for i in best_n_indices]
			
 
				         else:
			
--- a/cdplib/hyperopt/HyperoptPipelineSelector.py
+++ b/cdplib/hyperopt/HyperoptPipelineSelector.py
@@ -21,8 +21,6 @@ from copy import deepcopy
 
				 
			
 
				 import datetime
			
 
				 
			
 
				-from typing import Callable
			
 
				-
			
 
				 import pandas as pd
			
 
				 import numpy as np
			
 
				 
			
@@ -30,7 +28,10 @@ from sklearn.pipeline import Pipeline
 
				 
			
 
				 from hyperopt import fmin, tpe, rand, Trials, space_eval
			
 
				 
			
 
				-from cdplib.pipeline_selector.PipelineSelector import PipelineSelector
			
 
				+from cdplib.pipeline_selector.PipelineSelector import PipelineSelector,\
			
 
				+     SpaceElementType
			
 
				+
			
 
				+from typing import Callable, Optional, Literal, Dict, Union, List
			
 
				 
			
 
				 
			
 
				 class HyperoptPipelineSelector(PipelineSelector):
			
@@ -52,16 +53,18 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				     a better pipeline was found.
			
 
				     """
			
 
				     def __init__(self,
			
 
				-                 cost_func: (Callable, str),
			
 
				+                 cost_func: Union[Callable, str],
			
 
				                  greater_is_better: bool,
			
 
				                  trials_path: str,
			
 
				-                 backup_trials_freq: int = None,
			
 
				-                 cross_val_averaging_func: Callable = None,
			
 
				-                 additional_metrics: dict = None,
			
 
				-                 strategy_name: str = None,
			
 
				-                 stdout_log_level: str = "INFO"):
			
 
				+                 backup_trials_freq: Optional[int] = None,
			
 
				+                 cross_val_averaging_func: Callable = np.mean,
			
 
				+                 additional_metrics: Optional[Dict[str, Callable]] = None,
			
 
				+                 strategy_name: Optional[str] = None,
			
 
				+                 stdout_log_level: Literal["INFO", "WARNING", "ERROR"]
			
 
				+                 = "INFO"):
			
 
				         """
			
 
				-        :param callable cost_func: function to minimize or maximize
			
 
				+        param Callable cost_func: function to minimize or maximize
			
 
				+            over the elements of a given (pipeline/hyperparameter) space
			
 
				 
			
 
				         :param bool greater_is_better: when True
			
 
				             cost_func is maximized, else minimized.
			
@@ -71,25 +74,24 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				             select information about the obtained scores, score variations,
			
 
				             and pipelines, and parameters tried out so far. If a trials object
			
 
				             already exists at the given path, it is loaded and the
			
 
				-            search is continued, else, the search is started from
			
 
				-            the beginning.
			
 
				+            search is continued, else, the search is started from scratch.
			
 
				 
			
 
				         :param backup_trials_freq: frequecy in interations (trials)
			
 
				             of saving the trials object at the trials_path.
			
 
				+            if None, the trials object is backed up avery time
			
 
				+            the score improves.
			
 
				 
			
 
				-        :param str log_path: Optional, when not provided logs to stdout.
			
 
				+        :param Callable cross_val_averaging_func: Function to aggregate
			
 
				+            the cross-validation scores.
			
 
				+            Example different from the mean: mean - c*var.
			
 
				 
			
 
				-        :param callable averaging_func: optional,
			
 
				-            when not provided set to mean. Function
			
 
				-            to aggregate the cross-validated values of the cost function.
			
 
				-            Classic situation is to take the mean,
			
 
				-            another example is, for example mean() - c*var().
			
 
				         :param additional_metics: dict of additional metrics to save
			
 
				             of the form {"metric_name": metric} where metric is a Callable.
			
 
				 
			
 
				-        :param str strategy_name: a name might be asigned to the trials,
			
 
				-            a strategy is defined by the data set, cv object, cost function.
			
 
				-            When the strategy changes, one should start with new trials.
			
 
				+        :param str strategy_name:
			
 
				+            a strategy is defined by the data set (columns/features and rows),
			
 
				+            cv object, cost function.
			
 
				+            When the strategy changes, one must start with new trials.
			
 
				 
			
 
				         :param str stdout_log_level: can be INFO, WARNING, ERROR
			
 
				         """
			
@@ -116,30 +118,19 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				 
			
 
				     def run_trials(self,
			
 
				                    niter: int,
			
 
				-                   algo: callable = tpe.suggest):
			
 
				+                   algo: Literal[tpe.suggest, rand.suggest] = tpe.suggest)\
			
 
				+            -> None:
			
 
				         '''
			
 
				         Method performing the search of the best pipeline in the given space.
			
 
				         Calls fmin function from the hyperopt library to minimize the output of
			
 
				         _objective.
			
 
				 
			
 
				         :params int niter: number of search iterations
			
 
				-        :param callable algo: now can only take values tpe for a tree-based
			
 
				-            random search or random for random search
			
 
				+        :param algo: now can only take supported by the hyperopt library.
			
 
				+            For now these are tpe.suggest for a tree-based bayesian search
			
 
				+            or rad.suggest for randomized search
			
 
				         '''
			
 
				         try:
			
 
				-            assert(self.attached_space),\
			
 
				-                ("Space must be attach to be able to "
			
 
				-                 "retrieve this information.")
			
 
				-
			
 
				-            assert(isinstance(niter, int)),\
			
 
				-                "Parameter 'niter' must be of int type"
			
 
				-
			
 
				-            # right now only two algorithms are provided by hyperopt
			
 
				-            assert(algo in [tpe.suggest, rand.suggest]),\
			
 
				-                ("Parameter 'algo' can be now only tpe or random. "
			
 
				-                 "If other algorithms have been developped by "
			
 
				-                 "by hyperopt, plased add them to the list.")
			
 
				-
			
 
				             self._trials = self._trials or Trials()
			
 
				 
			
 
				             self._logger.info(("Starting {0} iterations of search "
			
@@ -171,11 +162,13 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				             self._backup_trials()
			
 
				 
			
 
				         except Exception as e:
			
 
				-            raise ValueError(("Failed to select best "
			
 
				-                             "pipeline! Exit with error: {}").format(e))
			
 
				+            err = ("Failed to select best "
			
 
				+                   "pipeline! Exit with error: {}").format(e)
			
 
				+
			
 
				+            self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def number_of_trials(self) -> int:
			
 
				+    def number_of_trials(self) -> Union[int, None]:
			
 
				         """
			
 
				         :return: number of trials run so far
			
 
				             with the given Trials object
			
@@ -187,9 +180,11 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				         except Exception as e:
			
 
				             err = ("Failed to retrieve the number of trials. "
			
 
				                    "Exit with error {}".format(e))
			
 
				+
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def _get_space_element_from_trial(self, trial) -> dict:
			
 
				+    def _get_space_element_from_trial(self, trial: Dict)\
			
 
				+            -> Union[Dict[SpaceElementType], None]:
			
 
				         """
			
 
				         Hyperopt trials object does not contain the space
			
 
				              elements that result in the corresponding trials.
			
@@ -224,7 +219,8 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def _get_space_element_from_index(self, i: int) -> dict:
			
 
				+    def _get_space_element_from_index(self, i: int)\
			
 
				+            -> Union[Dict[SpaceElementType], None]:
			
 
				         """
			
 
				         Gets the space element of shape
			
 
				         {"name": NAME, "params": PARAMS, "pipeline": PIPELINE}
			
@@ -243,7 +239,7 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def _get_pipeline_from_index(self, i: int) -> Pipeline:
			
 
				+    def _get_pipeline_from_index(self, i: int) -> Union[Pipeline, None]:
			
 
				         """
			
 
				         Gets a pipeline with set parameters from the trial number i
			
 
				         """
			
@@ -259,16 +255,19 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def best_trial(self) -> dict:
			
 
				+    def best_trial(self) -> Union[Dict, None]:
			
 
				         """
			
 
				         :return: dictionary with the summary of the best trial
			
 
				             and space element (name, pipeline, params)
			
 
				             resulting in the best trial
			
 
				         """
			
 
				         if len(self._trials.trials) == 0:
			
 
				+
			
 
				             self._logger.log_and_throw_warning("Trials object is empty")
			
 
				             return {}
			
 
				+
			
 
				         else:
			
 
				+
			
 
				             try:
			
 
				                 best_trial = deepcopy(self._trials.best_trial)
			
 
				 
			
@@ -297,7 +296,7 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				                 self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def best_trial_score(self) -> float:
			
 
				+    def best_trial_score(self) -> Union[float, None]:
			
 
				         """
			
 
				         """
			
 
				         try:
			
@@ -313,7 +312,7 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def best_trial_score_variance(self) -> float:
			
 
				+    def best_trial_score_variance(self) -> Union[float, None]:
			
 
				         """
			
 
				         """
			
 
				         try:
			
@@ -329,7 +328,7 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				     @property
			
 
				-    def best_trial_pipeline(self) -> Pipeline:
			
 
				+    def best_trial_pipeline(self) -> Union[Pipeline, None]:
			
 
				         """
			
 
				         """
			
 
				         try:
			
@@ -344,15 +343,13 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def get_n_best_trial_pipelines(self, n: int) -> list:
			
 
				+    def get_n_best_trial_pipelines(self, n: int)\
			
 
				+            -> Union[List[Pipeline], None]:
			
 
				         """
			
 
				         :return: the list of n best pipelines
			
 
				         documented in trials
			
 
				         """
			
 
				         try:
			
 
				-            assert(isinstance(n, int)),\
			
 
				-                "Parameter n must be an int"
			
 
				-
			
 
				             if len(self._trials.trials) == 0:
			
 
				                 return []
			
 
				             else:
			
@@ -369,15 +366,13 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def get_n_best_trial_pipelines_of_each_type(self, n: int) -> dict:
			
 
				+    def get_n_best_trial_pipelines_of_each_type(self, n: int)\
			
 
				+            -> Union[Dict[str, List[Pipeline]], None]:
			
 
				         """
			
 
				         :return: a dictiionry where keys are pipeline names,
			
 
				         and values are lists of best pipelines with this name
			
 
				         """
			
 
				         try:
			
 
				-            assert(isinstance(n, int)),\
			
 
				-                "Parameter n must be an int"
			
 
				-
			
 
				             scores = [trial["result"]["score"]
			
 
				                       for trial in self._trials.trials]
			
 
				 
			
@@ -401,7 +396,7 @@ class HyperoptPipelineSelector(PipelineSelector):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def trials_to_excel(self, path: str = None):
			
 
				+    def trials_to_excel(self, path: str = None) -> None:
			
 
				         """
			
 
				         Saves an excel file with pipeline names, scores,
			
 
				         parameters, and timestamps.
			
@@ -431,8 +426,8 @@ if __name__ == '__main__':
 
				     from sklearn.datasets import load_breast_cancer
			
 
				     from cdplib.log import Log
			
 
				     from cdplib.db_handlers import MongodbHandler
			
 
				-    # from cdplib.hyperopt.space_sample import space
			
 
				-    from cdplib.hyperopt.composed_space_sample import space
			
 
				+    from cdplib.hyperopt.space_sample import space
			
 
				+    # from cdplib.hyperopt.composed_space_sample import space
			
 
				 
			
 
				     trials_path = "hyperopt_trials_TEST.pkl"
			
 
				     additional_metrics = {"precision": precision_score}
			
@@ -472,9 +467,14 @@ if __name__ == '__main__':
 
				 
			
 
				     try:
			
 
				 
			
 
				+        # TODO: this line causes a pytype to throw not-callable error
			
 
				+        # works fine with pytype on other class methods.
			
 
				         save_method = MongodbHandler().insert_data_into_collection
			
 
				         save_kwargs = {'collection_name': collection_name}
			
 
				 
			
 
				+        # save_method = pd.DataFrame.to_excel()
			
 
				+        # save_kwargs = {'excel_writer': "TEST.xlsx"}
			
 
				+
			
 
				         hs.configer_summary_saving(save_method=save_method,
			
 
				                                    kwargs=save_kwargs)
			
 
				 
			
@@ -482,8 +482,8 @@ if __name__ == '__main__':
 
				 
			
 
				     except Exception as e:
			
 
				 
			
 
				-        logger.warn(("Could not configure summary saving in mongo. "
			
 
				-                     "Exit with error: {}".format(e)))
			
 
				+        logger.warning(("Could not configure summary saving in mongo. "
			
 
				+                        "Exit with error: {}".format(e)))
			
 
				 
			
 
				     hs.run_trials(niter=10)
			
 
				 
			
--- a/cdplib/ml_validation/CVComposer.py
+++ b/cdplib/ml_validation/CVComposer.py
@@ -0,0 +1,208 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Wed Dec  9 10:27:39 2020
			
 
				+
			
 
				+@author: tanya
			
 
				+"""
			
 
				+
			
 
				+from typing import Union, Iterable, Tuple, List, NewType
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+from itertools import accumulate, repeat, takewhile, chain
			
 
				+
			
 
				+from cdplib.log import Log
			
 
				+
			
 
				+
			
 
				+CVType = NewType("CVType", Iterable[Tuple[List]])
			
 
				+
			
 
				+DataSetType = NewType("DataSetType",
			
 
				+                      Union[pd.DataFrame, pd.Sereis, np.ndarray, List])
			
 
				+
			
 
				+
			
 
				+class CVComposer:
			
 
				+    """
			
 
				+    Groups methods for composing cv objects
			
 
				+    that follow standards from sklearn,
			
 
				+    these cv objects can be passed to algorithms like gridsearch, etc
			
 
				+    """
			
 
				+    def __init__(self):
			
 
				+        """
			
 
				+        """
			
 
				+        self._logger = Log("CVComposer: ")
			
 
				+
			
 
				+    def dummy_cv(
			
 
				+            self,
			
 
				+            train_set_size: Union[int, None] = None,
			
 
				+            train_index: Union[pd.Series, np.ndarray, None] = None,
			
 
				+            test_set_size: Union[int, None] = None,
			
 
				+            test_index: DataSetType = None) -> CVType:
			
 
				+        """
			
 
				+        """
			
 
				+        assert((train_index is None) != (train_set_size is None)),\
			
 
				+            "Set train_index or train_set_size"
			
 
				+
			
 
				+        assert((test_index is None) != (test_set_size is None)),\
			
 
				+            "Set train_index or train_set_size"
			
 
				+
			
 
				+        train_index = train_index if (train_index is not None)\
			
 
				+            else list(range(train_set_size))
			
 
				+
			
 
				+        test_index = test_index if (test_index is not None)\
			
 
				+            else list(range(train_set_size, train_set_size + test_set_size))
			
 
				+
			
 
				+        return [(train_index, test_index)]
			
 
				+
			
 
				+    def dummy_cv_and_concatenated_data_set(
			
 
				+            self,
			
 
				+            X_train: DataSetType,
			
 
				+            y_train: Union[DataSetType, None] = None,
			
 
				+            X_test: DataSetType,
			
 
				+            y_test: Union[DataSetType, None] = None)\
			
 
				+            -> Tuple[DataSetType, DataSetType, CVType]:
			
 
				+        """
			
 
				+        """
			
 
				+        assert((y_test is None) == (y_train is None))
			
 
				+
			
 
				+        use_index = (isinstance(X_train, pd.DataFrame) and
			
 
				+                     isinstance(X_test, pd.DataFrame) and
			
 
				+                     (len(set(X_train.index) and set(X_test.index)) == 0))
			
 
				+
			
 
				+        if use_index:
			
 
				+
			
 
				+            cv = self.dummy_cv(train_index=X_train.index,
			
 
				+                               test_index=X_test.index)
			
 
				+
			
 
				+            X = pd.concat([X_train, X_test], ignore_index=False, axis=0)
			
 
				+
			
 
				+        else:
			
 
				+            cv = self.dummy_cv(train_size=len(X_train),
			
 
				+                               test_size=len(X_test))
			
 
				+
			
 
				+            X = np.concatenate([X_train, X_test])
			
 
				+
			
 
				+        use_target_index = use_index and (
			
 
				+                    isinstance(y_train, pd.Series) and
			
 
				+                    isinstance(y_test, pd.Series) and
			
 
				+                    (X_train.index.equals(y_train.index)) and
			
 
				+                    (X_test.index.equals(y_test.index)))
			
 
				+
			
 
				+        if use_target_index:
			
 
				+
			
 
				+            y = pd.concat([y_train, y_test], ignore_index=False, axis=0)
			
 
				+
			
 
				+        else:
			
 
				+
			
 
				+            y = np.concatenate([y_train, y_test]) if (y_train is not None)\
			
 
				+                else None
			
 
				+
			
 
				+        result_to_np = (
			
 
				+            (isinstance(X_train, pd.DataFrame) !=
			
 
				+             isinstance(X_test, pd.DataFrame)) or
			
 
				+            (isinstance(X_train, pd.DataFrame)) and
			
 
				+            (len(set(X_train.index) and set(X_test.index)) != 0))
			
 
				+
			
 
				+        if result_to_np:
			
 
				+            self._logger.log_and_throw_warning(
			
 
				+                    "The concatenated dataframe is converted to numpy")
			
 
				+
			
 
				+        return cv, X, y
			
 
				+
			
 
				+    def expanding_cv(self, test_proportion: float,
			
 
				+                     start_train_proportion: float,
			
 
				+                     step_proportion: float = None,
			
 
				+                     expanding_test_size: bool = False,
			
 
				+                     data_set_size: Union[float, None] = None,
			
 
				+                     index: Union[pd.Series, np.ndarray, list, None] = None)\
			
 
				+            -> Union[Iterable[Tuple[List]], None]:
			
 
				+        """
			
 
				+        """
			
 
				+        try:
			
 
				+            assert((index is None) != (data_set_size is None)),\
			
 
				+                "Set index or data_set_size"
			
 
				+
			
 
				+            index = pd.Series(index) if (index is not None)\
			
 
				+                else pd.Series(range(data_set_size))
			
 
				+
			
 
				+            data_set_size = data_set_size or len(index)
			
 
				+
			
 
				+            start_train_size = int(start_train_proportion * data_set_size)
			
 
				+            step_size = int(step_proportion * data_set_size)
			
 
				+
			
 
				+            test_size = int(test_proportion * data_set_size)
			
 
				+
			
 
				+            train_inds_set = (list(range(train_size))
			
 
				+                              for train_size in
			
 
				+                              takewhile(
			
 
				+                                      lambda x: x <= data_set_size - test_size,
			
 
				+                                      accumulate(repeat(start_train_size),
			
 
				+                                                 lambda x, _: x + step_size)))
			
 
				+
			
 
				+            for train_inds in train_inds_set:
			
 
				+
			
 
				+                if expanding_test_size:
			
 
				+
			
 
				+                    yield (index[train_inds],
			
 
				+                           index[train_inds[-1] + 1:
			
 
				+                                 train_inds[-1] + 1
			
 
				+                                 + int(test_proportion*len(train_inds))])
			
 
				+
			
 
				+                else:
			
 
				+
			
 
				+                    yield (index[train_inds],
			
 
				+                           index[train_inds[-1] + 1:
			
 
				+                                 train_inds[-1] + 1 + test_size])
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            self._logger.log_and_raise_error(("Failed to make expanding cv. "
			
 
				+                                              "Exit with error: {}".format(e)))
			
 
				+
			
 
				+    def sliding_window_cv(
			
 
				+        self,
			
 
				+        test_proportion: float,
			
 
				+        train_proportion: float,
			
 
				+        step_proportion: float = None,
			
 
				+        data_set_size: Union[float, None] = None,
			
 
				+        index: Union[pd.Series, np.ndarray, list, None] = None)\
			
 
				+            -> Union[Iterable[Tuple[List]], None]:
			
 
				+        """
			
 
				+        """
			
 
				+        try:
			
 
				+            assert((index is None) != (data_set_size is None)),\
			
 
				+                "Set index or data_set_size"
			
 
				+
			
 
				+            index = pd.Series(index) if (index is not None)\
			
 
				+                else pd.Series(range(data_set_size))
			
 
				+
			
 
				+            data_set_size = data_set_size or len(index)
			
 
				+
			
 
				+            train_size = int(train_proportion * data_set_size)
			
 
				+            test_size = int(test_proportion * data_set_size)
			
 
				+            step_size = int(step_proportion * data_set_size)
			
 
				+
			
 
				+            train_sizes = takewhile(lambda x: x <= data_set_size - test_size,
			
 
				+                                    accumulate(repeat(train_size),
			
 
				+                                               lambda x, _: x + step_size))
			
 
				+
			
 
				+            train_starts = takewhile(lambda x: x <= data_set_size
			
 
				+                                     - train_size - test_size,
			
 
				+                                     accumulate(repeat(step_size),
			
 
				+                                                lambda x, _: x + step_size))
			
 
				+
			
 
				+            train_starts = chain([0], train_starts)
			
 
				+
			
 
				+            train_inds_set = list(range(train_start, train_size)
			
 
				+                                  for train_start, train_size in
			
 
				+                                  zip(train_starts, train_sizes))
			
 
				+
			
 
				+            cv = ((index[train_inds], index[train_inds[-1] + 1:
			
 
				+                                            train_inds[-1] + 1 + test_size])
			
 
				+                  for train_inds in train_inds_set)
			
 
				+
			
 
				+            return cv
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            self._logger.log_and_raise_error(
			
 
				+                    ("Failed to make sliding window cv. "
			
 
				+                     "Exit with error: {}".format(e)))
			
 
				+
			
--- a/cdplib/ml_validation/__init__.py
+++ b/cdplib/ml_validation/__init__.py
--- a/cdplib/ml_validation/cross_validate_with_fine_tuning.py
+++ b/cdplib/ml_validation/cross_validate_with_fine_tuning.py
@@ -0,0 +1,491 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Thu Oct 29 13:58:23 2020
			
 
				+
			
 
				+@author: tanya
			
 
				+
			
 
				+
			
 
				+@description:
			
 
				+
			
 
				+* Input:
			
 
				+    - pipeline/hyperparameter space
			
 
				+    - data_train
			
 
				+    - cv
			
 
				+    - cv_folds
			
 
				+
			
 
				+* For each pipeline:
			
 
				+
			
 
				+    -> Split data_train into folds according to cv
			
 
				+
			
 
				+     -> For each fold:
			
 
				+
			
 
				+         => get data_train_fold, data_test_fold, cv_fold
			
 
				+
			
 
				+         => split data_train_fold into subfolds according to cv_fold
			
 
				+
			
 
				+         => For each subfold:
			
 
				+
			
 
				+             ==> get data_train_subfold, data_test_subfold
			
 
				+
			
 
				+             ==> train pipeline on data_train_subfold
			
 
				+
			
 
				+             ==> find best_threshold_subfold on data_test_subfold
			
 
				+
			
 
				+        => Find averaged_threshold_fold averaged over best_threshold_subfold
			
 
				+
			
 
				+        => train pipeline on data_train_fold
			
 
				+
			
 
				+        => find score_fold on data_test_fold with proba_threshold_fold
			
 
				+
			
 
				+        => find best_threshold_fold on data_test_fold
			
 
				+
			
 
				+    -> find score averaged over score_fold
			
 
				+
			
 
				+    -> find averaged_threshold averaged over best_threshold_fold
			
 
				+
			
 
				+* choose (pipeline/hyperparameters, threshold) in the space with best score
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+from itertools import zip_longest
			
 
				+from typing import Union, Callable, Dict, Iterable, Tuple, List
			
 
				+from copy import deepcopy
			
 
				+from itertools import accumulate, repeat, takewhile, chain
			
 
				+
			
 
				+from sklearn.model_selection import StratifiedKFold
			
 
				+
			
 
				+from cdplib.log import Log
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+aa = make_sliding_window_cv(data_set_size=50,
			
 
				+                            test_proportion=0.1,
			
 
				+                            train_proportion=0.6,
			
 
				+                            step_proportion=0.1)
			
 
				+
			
 
				+aa = list(aa)
			
 
				+
			
 
				+aa = make_sliding_window_cv(test_proportion=0.1,
			
 
				+                            train_proportion=0.6,
			
 
				+                            step_proportion=0.05,
			
 
				+                            index=pd.date_range(start=pd.to_datetime("2020-01-01"), periods=50))
			
 
				+
			
 
				+aa = list(aa)
			
 
				+
			
 
				+
			
 
				+# TODO: write with yield !!!!
			
 
				+
			
 
				+def make_nested_expanding_cv(
			
 
				+        test_proportion: float,
			
 
				+        start_train_proportion: float,
			
 
				+        step_proportion: float = None,
			
 
				+        expanding_test_size: bool = False,
			
 
				+        data_set_size: Union[float, None] = None,
			
 
				+        index: Union[pd.Series, np.ndarray, list, None] = None)\
			
 
				+        -> Iterable[Tuple[List]]:
			
 
				+    """
			
 
				+    """
			
 
				+    logger = Log("make_nested_expanding_cv:")
			
 
				+
			
 
				+    try:
			
 
				+        cv = make_expanding_cv(test_proportion=test_proportion,
			
 
				+                               start_train_proportion=start_train_proportion,
			
 
				+                               step_proportion=step_proportion,
			
 
				+                               expanding_test_size=expanding_test_size,
			
 
				+                               data_set_size=data_set_size,
			
 
				+                               index=index)
			
 
				+
			
 
				+        nested_cv = []
			
 
				+
			
 
				+        for train_inds, test_inds in cv:
			
 
				+
			
 
				+            fold_index = train_inds if index is not None\
			
 
				+                else None
			
 
				+
			
 
				+            fold_size = len(train_inds) if index is None else None
			
 
				+
			
 
				+            fold_cv = make_expanding_cv(
			
 
				+                    test_proportion=test_proportion,
			
 
				+                    start_train_proportion=start_train_proportion,
			
 
				+                    step_proportion=step_proportion,
			
 
				+                    expanding_test_size=expanding_test_size,
			
 
				+                    data_set_size=fold_size,
			
 
				+                    index=fold_index)
			
 
				+
			
 
				+            nested_cv.append(list(fold_cv))
			
 
				+
			
 
				+        return nested_cv
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.log_and_raise_error(("Failed to make nested expanding cv. "
			
 
				+                                    "Exit with error: {}".format(e)))
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+for train_inds, test_inds in aa:
			
 
				+    print(len(test_inds)/(len(train_inds) + len(test_inds)))
			
 
				+    print(len(test_inds)/50)
			
 
				+
			
 
				+aaa = list(aaa)
			
 
				+
			
 
				+for aaa_cv in aaa:
			
 
				+    for train_inds, test_inds in aaa_cv:
			
 
				+        print(len(test_inds)/(len(train_inds) + len(test_inds)))
			
 
				+        print(len(test_inds)/50)
			
 
				+
			
 
				+aaa = make_nested_expanding_cv(#data_set_size=50,
			
 
				+                               test_proportion=0.1,
			
 
				+                               start_train_proportion=0.6,
			
 
				+                               step_proportion=0.1,
			
 
				+                               index=pd.date_range(start=pd.to_datetime("2020-01-01"), periods=50))
			
 
				+
			
 
				+aaa = list(aaa)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def cv_slice_dataset(X, y, train_inds, test_inds)\
			
 
				+        -> Tuple[Union[pd.DataFrame, np.ndarray],
			
 
				+                 Union[pd.Series, np.ndarray]]:
			
 
				+    """
			
 
				+    """
			
 
				+    if isinstance(X, pd.DataFrame):
			
 
				+        X_train = X.loc[train_inds]
			
 
				+        X_val = X.loc[test_inds]
			
 
				+    else:
			
 
				+        X_train = X[train_inds]
			
 
				+        X_val = X[test_inds]
			
 
				+
			
 
				+    if y is not None:
			
 
				+        y_train = y[train_inds]
			
 
				+        y_val = y[test_inds]
			
 
				+
			
 
				+    return X_train, X_val, y_train, y_val
			
 
				+
			
 
				+
			
 
				+def get_optimal_proba_threshold(score_func: Callable,
			
 
				+                                y_true: Union[pd.Series, np.ndarray],
			
 
				+                                proba: Union[pd.Series, np.ndarray],
			
 
				+                                threshold_set: Union[Iterable, None] = None):
			
 
				+    """
			
 
				+    """
			
 
				+    scores = {}
			
 
				+
			
 
				+    if threshold_set is None:
			
 
				+        threshold_set = np.arange(0, 1, 0.1)
			
 
				+
			
 
				+    for threshold in threshold_set:
			
 
				+
			
 
				+        y_pred = (proba >= threshold).astype(int)
			
 
				+
			
 
				+        scores[threshold] = score_func(y_true, y_pred)
			
 
				+
			
 
				+    return max(scores, key=scores.get)
			
 
				+
			
 
				+
			
 
				+def cross_validate_with_optimal_threshold(
			
 
				+        estimator: object,
			
 
				+        score_func: Callable,
			
 
				+        X_train: Union[pd.DataFrame, np.ndarray],
			
 
				+        y_train: Union[pd.Series, np.ndarray, None] = None,
			
 
				+        X_val: Union[pd.DataFrame, np.ndarray, None] = None,
			
 
				+        y_val: Union[pd.Series, np.ndarray, None] = None,
			
 
				+        X_val_threshold: Union[pd.DataFrame, np.ndarray, None] = None,
			
 
				+        y_val_threshold: Union[pd.Series, np.ndarray, None] = None,
			
 
				+        cv: Union[Iterable, int, None] = None,
			
 
				+        cv_threshold: Union[Iterable, int, None] = None,
			
 
				+        additional_metrics: Union[Dict[str, Callable], None] = None,
			
 
				+        threshold_set: Union[Iterable, None] = None,
			
 
				+        scores: Dict = None)\
			
 
				+            -> Dict:
			
 
				+    """
			
 
				+    """
			
 
				+    logger = Log("cross_validate_with_optimal_threshold:")
			
 
				+
			
 
				+    X_train = deepcopy(X_train)
			
 
				+    y_train = deepcopy(y_train)
			
 
				+    X_val = deepcopy(X_val)
			
 
				+    y_val = deepcopy(y_val)
			
 
				+    X_val_threshold = deepcopy(X_val_threshold)
			
 
				+    y_val_threshold = deepcopy(y_val_threshold)
			
 
				+
			
 
				+    scores = scores or {"test_threshold": [],
			
 
				+                        "test_score": [],
			
 
				+                        "train_score": []}
			
 
				+
			
 
				+    additional_metrics = additional_metrics or {}
			
 
				+
			
 
				+    for metric_name, metric in additional_metrics.items():
			
 
				+        if "test_" + metric_name not in scores:
			
 
				+            scores["test_" + metric_name] = []
			
 
				+            scores["train_" + metric_name] = []
			
 
				+
			
 
				+    if cv is None:
			
 
				+
			
 
				+        # test score is calculated on X_vals
			
 
				+
			
 
				+        assert((X_val is not None) and (y_val is not None)),\
			
 
				+            "Validation set must be set"
			
 
				+
			
 
				+        if cv_threshold is None:
			
 
				+
			
 
				+            refit = (X_val_threshold is not None)
			
 
				+
			
 
				+            # if a validation set for proba threshold tuning is not given,
			
 
				+            # we use the validation set on which we calculate the test score
			
 
				+            # (this might lead to overfitting)
			
 
				+
			
 
				+            X_val_threshold = X_val_threshold if refit else deepcopy(X_val)
			
 
				+            y_val_threshold = y_val_threshold if refit else deepcopy(y_val)
			
 
				+
			
 
				+            cv_threshold, X_train, y_train = make_dummy_cv(
			
 
				+                    X_train=X_train,
			
 
				+                    y_train=y_train,
			
 
				+                    X_val=X_val_threshold,
			
 
				+                    y_val=y_val_threshold)
			
 
				+        else:
			
 
				+
			
 
				+            # if cv_threshold is given, we find the optimal threshold
			
 
				+            # on each fold and output the average value for the threshold
			
 
				+
			
 
				+            if (X_val_threshold is not None):
			
 
				+                logger.log_and_throw_warning((
			
 
				+                        "X_val_threshold is set "
			
 
				+                        "but cv_threshold will be used"))
			
 
				+
			
 
				+            if isinstance(cv_threshold, int):
			
 
				+                cv_threshold = StratifiedKFold(n_splits=cv_threshold)\
			
 
				+                    .split(X=X_train, y=y_train)
			
 
				+
			
 
				+            refit = True
			
 
				+
			
 
				+        thresholds = []
			
 
				+
			
 
				+        for train_inds, val_inds in cv_threshold:
			
 
				+
			
 
				+            print("----- In cv threshold fold")
			
 
				+
			
 
				+            X_train_fold, X_val_fold, y_train_fold, y_val_fold =\
			
 
				+                cv_slice_dataset(X=X_train,
			
 
				+                                 y=y_train,
			
 
				+                                 train_inds=train_inds,
			
 
				+                                 test_inds=val_inds)
			
 
				+
			
 
				+            estimator.fit(X_train_fold, y_train_fold)
			
 
				+
			
 
				+            proba_val = estimator.predict_proba(X_val_fold)[:, 1]
			
 
				+
			
 
				+            threshold = get_optimal_proba_threshold(score_func=score_func,
			
 
				+                                                    y_true=y_val_fold,
			
 
				+                                                    proba=proba_val)
			
 
				+
			
 
				+            thresholds.append(threshold)
			
 
				+
			
 
				+            print("----- Threshold:", threshold)
			
 
				+
			
 
				+        scores["test_threshold"].append(np.mean(thresholds))
			
 
				+
			
 
				+        if refit:
			
 
				+
			
 
				+            estimator.fit(X_train, y_train)
			
 
				+
			
 
				+            proba_val = estimator.predict_proba(X_val)[:, 1]
			
 
				+
			
 
				+        proba_train = estimator.predict_proba(X_train)[:, 1]
			
 
				+
			
 
				+        pred_train = (proba_train >= threshold)
			
 
				+        pred_val = (proba_val >= threshold)
			
 
				+
			
 
				+        train_score = score_func(y_train, pred_train)
			
 
				+        test_score = score_func(y_val, pred_val)
			
 
				+
			
 
				+        for metric_name, metric in additional_metrics.items():
			
 
				+            scores["train_" + metric_name].append(metric(y_train, pred_train))
			
 
				+            scores["test_" + metric_name].append(metric(y_val, pred_val))
			
 
				+
			
 
				+        scores["train_score"].append(train_score)
			
 
				+        scores["test_score"].append(test_score)
			
 
				+
			
 
				+        return scores
			
 
				+
			
 
				+    else:
			
 
				+
			
 
				+        if isinstance(cv, int):
			
 
				+            cv = StratifiedKFold(n_splits=cv).split(X=X_train, y=y_train)
			
 
				+
			
 
				+        cv_threshold = cv_threshold or []
			
 
				+
			
 
				+        for (train_inds, val_inds), cv_fold in zip_longest(cv, cv_threshold):
			
 
				+
			
 
				+            print("=== In cv fold")
			
 
				+
			
 
				+            X_train_fold, X_val_fold, y_train_fold, y_val_fold =\
			
 
				+                cv_slice_dataset(X=X_train,
			
 
				+                                 y=y_train,
			
 
				+                                 train_inds=train_inds,
			
 
				+                                 test_inds=val_inds)
			
 
				+
			
 
				+            scores = cross_validate_with_optimal_threshold(
			
 
				+                    estimator=estimator,
			
 
				+                    score_func=score_func,
			
 
				+                    X_train=X_train_fold,
			
 
				+                    y_train=y_train_fold,
			
 
				+                    X_val=X_val_fold,
			
 
				+                    y_val=y_val_fold,
			
 
				+                    cv_threshold=cv_fold,
			
 
				+                    additional_metrics=additional_metrics,
			
 
				+                    threshold_set=threshold_set,
			
 
				+                    scores=scores)
			
 
				+
			
 
				+            print("=== scores:", scores)
			
 
				+
			
 
				+        return scores
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    from sklearn.metrics import accuracy_score, precision_score
			
 
				+    from sklearn.datasets import load_breast_cancer
			
 
				+    from xgboost import XGBRFClassifier
			
 
				+    from sklearn.model_selection import train_test_split
			
 
				+
			
 
				+    data_loader = load_breast_cancer()
			
 
				+
			
 
				+    X = data_loader["data"]
			
 
				+    y = data_loader["target"]
			
 
				+
			
 
				+    X_train, X_val, y_train, y_val = train_test_split(X, y)
			
 
				+
			
 
				+    estimator = XGBRFClassifier()
			
 
				+
			
 
				+    score_func = accuracy_score
			
 
				+
			
 
				+    additional_metrics = {"precision": precision_score}
			
 
				+
			
 
				+    averaged_scores = []
			
 
				+    averaged_thresholds = []
			
 
				+
			
 
				+    print("\nTesting cv=None, cv_threshold=None, X_val_threshold=None\n")
			
 
				+
			
 
				+    scores = cross_validate_with_optimal_threshold(
			
 
				+            estimator=estimator,
			
 
				+            score_func=accuracy_score,
			
 
				+            X_train=X_train,
			
 
				+            y_train=y_train,
			
 
				+            X_val=X_val,
			
 
				+            y_val=y_val,
			
 
				+            X_val_threshold=None,
			
 
				+            y_val_threshold=None,
			
 
				+            cv=None,
			
 
				+            cv_threshold=None,
			
 
				+            additional_metrics=additional_metrics)
			
 
				+
			
 
				+    print("\nScores:", scores)
			
 
				+
			
 
				+    averaged_scores.append(np.mean(scores["test_score"]))
			
 
				+    averaged_thresholds.append(np.mean(scores["test_threshold"]))
			
 
				+
			
 
				+    print("\n ########################################################## \n")
			
 
				+
			
 
				+    X_train, X_val_threshold, y_train, y_val_threshold =\
			
 
				+        train_test_split(X_train, y_train)
			
 
				+
			
 
				+    print("\nTesting cv=None, cv_threshold=None, X_val_threshold\n")
			
 
				+
			
 
				+    scores = cross_validate_with_optimal_threshold(
			
 
				+            estimator=estimator,
			
 
				+            score_func=accuracy_score,
			
 
				+            X_train=X_train,
			
 
				+            y_train=y_train,
			
 
				+            X_val=X_val,
			
 
				+            y_val=y_val,
			
 
				+            X_val_threshold=X_val_threshold,
			
 
				+            y_val_threshold=y_val_threshold,
			
 
				+            cv=None,
			
 
				+            cv_threshold=None,
			
 
				+            additional_metrics=additional_metrics)
			
 
				+
			
 
				+    print("\nScores:", scores)
			
 
				+
			
 
				+    averaged_scores.append(np.mean(scores["test_score"]))
			
 
				+    averaged_thresholds.append(np.mean(scores["test_threshold"]))
			
 
				+
			
 
				+    print("\n ########################################################## \n")
			
 
				+
			
 
				+    print("\nTesting cv=None, cv_threshold=3 \n")
			
 
				+
			
 
				+    scores = cross_validate_with_optimal_threshold(
			
 
				+            estimator=estimator,
			
 
				+            score_func=accuracy_score,
			
 
				+            X_train=X_train,
			
 
				+            y_train=y_train,
			
 
				+            X_val=X_val,
			
 
				+            y_val=y_val,
			
 
				+            X_val_threshold=X_val_threshold,
			
 
				+            y_val_threshold=y_val_threshold,
			
 
				+            cv=None,
			
 
				+            cv_threshold=3,
			
 
				+            additional_metrics=additional_metrics)
			
 
				+
			
 
				+    print("\nScores:", scores)
			
 
				+
			
 
				+    averaged_scores.append(np.mean(scores["test_score"]))
			
 
				+    averaged_thresholds.append(np.mean(scores["test_threshold"]))
			
 
				+
			
 
				+    print("\n ########################################################## \n")
			
 
				+
			
 
				+    print("\nTesting cv=3, cv_threshold=None \n")
			
 
				+
			
 
				+    scores = cross_validate_with_optimal_threshold(
			
 
				+            estimator=estimator,
			
 
				+            score_func=accuracy_score,
			
 
				+            X_train=X_train,
			
 
				+            y_train=y_train,
			
 
				+            X_val=X_val,
			
 
				+            y_val=y_val,
			
 
				+            X_val_threshold=X_val_threshold,
			
 
				+            y_val_threshold=y_val_threshold,
			
 
				+            cv=3,
			
 
				+            cv_threshold=None,
			
 
				+            additional_metrics=additional_metrics)
			
 
				+
			
 
				+    print("\nScores:", scores)
			
 
				+
			
 
				+    print("\n ########################################################## \n")
			
 
				+
			
 
				+    print("\nTesting cv=3, cv_threshold=[3, 3, 3] \n")
			
 
				+
			
 
				+    scores = cross_validate_with_optimal_threshold(
			
 
				+            estimator=estimator,
			
 
				+            score_func=accuracy_score,
			
 
				+            X_train=X_train,
			
 
				+            y_train=y_train,
			
 
				+            X_val=X_val,
			
 
				+            y_val=y_val,
			
 
				+            X_val_threshold=X_val_threshold,
			
 
				+            y_val_threshold=y_val_threshold,
			
 
				+            cv=3,
			
 
				+            cv_threshold=[3, 3, 3],
			
 
				+            additional_metrics=additional_metrics)
			
 
				+
			
 
				+    print("\nScores:", scores)
			
 
				+
			
 
				+    averaged_scores.append(np.mean(scores["test_score"]))
			
 
				+    averaged_thresholds.append(np.mean(scores["test_threshold"]))
			
 
				+
			
 
				+    print("\n ########################################################## \n")
			
 
				+
			
 
				+    # TODO: check overwriting X_train,
			
 
				+    # additional metrics append instead of overwrite
			
 
				+    # check the length of cv_threshold
			
 
				+    # test custom cv, cv_threshold
			
 
				+
			
 
				+    print("\n Averaged test score:", averaged_scores)
			
 
				+    print("\n Averaged threshold:", averaged_thresholds)
			
--- a/cdplib/ml_validation/expanding_cv.py
+++ b/cdplib/ml_validation/expanding_cv.py
@@ -0,0 +1,97 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Wed Dec  9 09:55:52 2020
			
 
				+
			
 
				+@author: tanya
			
 
				+"""
			
 
				+
			
 
				+from typing import Union, Iterable, Tuple, List
			
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+from itertools import accumulate, repeat, takewhile
			
 
				+
			
 
				+from cdplib.log import Log
			
 
				+
			
 
				+
			
 
				+def make_expanding_cv(test_proportion: float,
			
 
				+                      start_train_proportion: float,
			
 
				+                      step_proportion: float = None,
			
 
				+                      expanding_test_size: bool = False,
			
 
				+                      data_set_size: Union[float, None] = None,
			
 
				+                      index: Union[pd.Series, np.ndarray, list, None] = None)\
			
 
				+        -> Union[Iterable[Tuple[List]], None]:
			
 
				+    """
			
 
				+
			
 
				+    """
			
 
				+    logger = Log("make_expanding_cv:")
			
 
				+
			
 
				+    try:
			
 
				+        assert((index is None) != (data_set_size is None)),\
			
 
				+            "Set index or data_set_size"
			
 
				+
			
 
				+        index = index if (index is not None)\
			
 
				+            else pd.Series(range(data_set_size))
			
 
				+
			
 
				+        data_set_size = data_set_size or len(index)
			
 
				+
			
 
				+        start_train_size = int(start_train_proportion * data_set_size)
			
 
				+        step_size = int(step_proportion * data_set_size)
			
 
				+
			
 
				+        test_size = int(test_proportion * data_set_size)
			
 
				+
			
 
				+        train_inds_set = (list(range(train_size))
			
 
				+                          for train_size in
			
 
				+                          takewhile(
			
 
				+                                  lambda x: x <= data_set_size - test_size,
			
 
				+                                  accumulate(repeat(start_train_size),
			
 
				+                                             lambda x, _: x + step_size)))
			
 
				+
			
 
				+        for train_inds in train_inds_set:
			
 
				+
			
 
				+            if expanding_test_size:
			
 
				+
			
 
				+                yield (index[train_inds],
			
 
				+                       index[train_inds[-1] + 1:
			
 
				+                             train_inds[-1] + 1
			
 
				+                             + int(test_proportion*len(train_inds))])
			
 
				+
			
 
				+            else:
			
 
				+
			
 
				+                yield (index[train_inds],
			
 
				+                       index[train_inds[-1] + 1:
			
 
				+                             train_inds[-1] + 1 + test_size])
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.log_and_raise_error(("Failed to make expanding cv. "
			
 
				+                                    "Exit with error: {}".format(e)))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+
			
 
				+    logger = Log("Test_expanding_cv: ")
			
 
				+
			
 
				+    logger.info("Start Testing")
			
 
				+
			
 
				+    logger.info("Testing expanding cv: ")
			
 
				+
			
 
				+    cv = make_expanding_cv(data_set_size=50,
			
 
				+                           test_proportion=0.1,
			
 
				+                           start_train_proportion=0.6,
			
 
				+                           step_proportion=0.1,
			
 
				+                           expanding_test_size=True)
			
 
				+
			
 
				+    cv = list(cv)
			
 
				+
			
 
				+    logger.info("Testing expanding cv with datetime index")
			
 
				+
			
 
				+    cv = make_expanding_cv(
			
 
				+            test_proportion=0.1,
			
 
				+            start_train_proportion=0.6,
			
 
				+            step_proportion=0.1,
			
 
				+            index=pd.date_range(start=pd.to_datetime("2020-01-01"),
			
 
				+                                periods=50))
			
 
				+
			
 
				+    cv = list(cv)
			
 
				+
			
 
				+    logger.info("Finish testing")
			
--- a/cdplib/pipeline_selector/PipelineSelector.py
+++ b/cdplib/pipeline_selector/PipelineSelector.py
@@ -24,8 +24,10 @@ import time
 
				 import datetime
			
 
				 import numpy as np
			
 
				 import pandas as pd
			
 
				+from copy import deepcopy
			
 
				 from abc import ABC, abstractmethod, abstractproperty
			
 
				-from typing import Callable
			
 
				+from typing import Callable, Optional, TypedDict,\
			
 
				+    Literal, Dict, Iterable, List, Tuple, Union
			
 
				 import functools
			
 
				 from sklearn.pipeline import Pipeline
			
 
				 from sklearn.model_selection import cross_validate as sklearn_cross_validation
			
@@ -34,10 +36,17 @@ from hyperopt import STATUS_OK, STATUS_FAIL
 
				 from cdplib.log import Log
			
 
				 from cdplib.utils import ExceptionsHandler
			
 
				 from cdplib.utils import LoadingUtils
			
 
				+from cdplib.ml_validation import CVComposer
			
 
				 
			
 
				 sys.path.append(os.getcwd())
			
 
				 
			
 
				 
			
 
				+class SpaceElementType(TypedDict):
			
 
				+    name: str
			
 
				+    pipeline: Pipeline
			
 
				+    params: dict
			
 
				+
			
 
				+
			
 
				 class PipelineSelector(ABC):
			
 
				     """
			
 
				     An abstract class for selecting a machine learning
			
@@ -53,16 +62,20 @@ class PipelineSelector(ABC):
 
				     Children classes: hyperopt and custom gridsearch.
			
 
				     """
			
 
				     def __init__(self,
			
 
				-                 cost_func: (Callable, str),
			
 
				+                 cost_func: Union[Callable, str],
			
 
				                  greater_is_better: bool,
			
 
				                  trials_path: str,
			
 
				-                 backup_trials_freq: int = None,
			
 
				-                 cross_val_averaging_func: Callable = None,
			
 
				-                 additional_metrics: dict = None,
			
 
				-                 strategy_name: str = None,
			
 
				-                 stdout_log_level: str = "INFO"):
			
 
				+                 backup_trials_freq: Optional[int] = None,
			
 
				+                 cross_val_averaging_func: Callable = np.mean,
			
 
				+                 additional_metrics: Optional[Dict[str, Callable]] = None,
			
 
				+                 additional_averaging_funcs:
			
 
				+                     Optional[Dict[str, Callable]] = None,
			
 
				+                 strategy_name: Optional[str] = None,
			
 
				+                 stdout_log_level: Literal["INFO", "WARNING", "ERROR"]
			
 
				+                 = "INFO"):
			
 
				         """
			
 
				         :param Callable cost_func: function to minimize or maximize
			
 
				+            over the elements of a given (pipeline/hyperparameter) space
			
 
				 
			
 
				         :param bool greater_is_better: when True
			
 
				             cost_func is maximized, else minimized.
			
@@ -72,62 +85,42 @@ class PipelineSelector(ABC):
 
				             select information about the obtained scores, score variations,
			
 
				             and pipelines, and parameters tried out so far. If a trials object
			
 
				             already exists at the given path, it is loaded and the
			
 
				-            search is continued, else, the search is started from
			
 
				-            the beginning.
			
 
				+            search is continued, else, the search is started from scratch.
			
 
				 
			
 
				         :param backup_trials_freq: frequecy in interations (trials)
			
 
				             of saving the trials object at the trials_path.
			
 
				             if None, the trials object is backed up avery time
			
 
				             the score improves.
			
 
				 
			
 
				-        :param str log_path: Optional, when not provided logs to stdout.
			
 
				+        :param Callable cross_val_averaging_func: Function to aggregate
			
 
				+            the cross-validation scores of the cost_func.
			
 
				+            Example different from the mean: mean - c*var.
			
 
				+
			
 
				+        :param additional_metics: dict of additional metrics to keep track of
			
 
				+            in the trials of the form {"metric_name": metric}.
			
 
				 
			
 
				-        :param Callable cross_val_averaging_func: optional,
			
 
				-            when not provided set to mean. Function
			
 
				-            to aggregate the cross-validated values of the cost function.
			
 
				-            Classic situation is to take the mean,
			
 
				-            another example is, for example mean() - c*var().
			
 
				+        :param additional_averaging_funcs: functions used to aggregate
			
 
				+            the output of the cross_validate function.
			
 
				+            The output always contains the scores of the cost_func,
			
 
				+            additional_metrics (if it is not empty),
			
 
				+            but it can also contain additional information
			
 
				+            (like probability threshold for example)
			
 
				+            if different from cross_val_averaging_func.
			
 
				+            Of the form {"metric_name": averaging_func}
			
 
				 
			
 
				-        :param additional_metics: dict of additional metrics to save
			
 
				-            of the form {"metric_name": metric} where metric is a Callable.
			
 
				+            Remark:
			
 
				 
			
 
				-        :param str strategy_name: a name might be asigned to the trials,
			
 
				-            a strategy is defined by the data set, cv object, cost function.
			
 
				-            When the strategy changes, one should start with new trials.
			
 
				+        :param str strategy_name:
			
 
				+            a strategy is defined by the data set (columns/features and rows),
			
 
				+            cv object, cost function.
			
 
				+            When the strategy changes, one must start with new trials.
			
 
				 
			
 
				         :param str stdout_log_level: can be INFO, WARNING, ERROR
			
 
				         """
			
 
				-        try:
			
 
				+        self._logger = Log("PipelineSelector: ",
			
 
				+                           stdout_log_level=stdout_log_level)
			
 
				 
			
 
				-            self._logger = Log("PipelineSelector: ",
			
 
				-                               stdout_log_level=stdout_log_level)
			
 
				-
			
 
				-            input_errors = [
			
 
				-                    (cost_func, Callable,
			
 
				-                     "Parameter 'cost_func' must be a Callable"),
			
 
				-                    (greater_is_better, bool,
			
 
				-                     "Parameter 'greater_is_better' must be bool type"),
			
 
				-                    (trials_path, str,
			
 
				-                     "Parameter 'trials_path' must be of string type"),
			
 
				-                    (cross_val_averaging_func, (Callable, None.__class__),
			
 
				-                     ("Parameter 'cross_val_averaging_func'"
			
 
				-                      "must be a Callable")),
			
 
				-                    (backup_trials_freq, (int, None.__class__),
			
 
				-                     "Parameter backup_trials_freq must be an int"),
			
 
				-                    (additional_metrics, (dict, None.__class__),
			
 
				-                     "Parameter additional_metrics must be a dict"),
			
 
				-                    (strategy_name, (str, None.__class__),
			
 
				-                     "Parameter strategy_name must be a str"),
			
 
				-                    (stdout_log_level, str,
			
 
				-                     "Parameter stdout_log_level must be a str")]
			
 
				-
			
 
				-            for p, t, err in input_errors:
			
 
				-                assert((isinstance(p, t))), err
			
 
				-
			
 
				-            assert((additional_metrics is None) or
			
 
				-                   all([isinstance(metric, Callable)
			
 
				-                        for metric in additional_metrics.values()])),\
			
 
				-                "Metrics in additional_metrics must be Callables"
			
 
				+        try:
			
 
				 
			
 
				             ExceptionsHandler(self._logger)\
			
 
				                 .assert_is_directory(path=trials_path)
			
@@ -143,18 +136,14 @@ class PipelineSelector(ABC):
 
				             self._score_factor = (not greater_is_better) - greater_is_better
			
 
				             self.trials_path = trials_path
			
 
				             self._backup_trials_freq = backup_trials_freq
			
 
				-            self._cross_val_averaging_func = cross_val_averaging_func\
			
 
				-                or np.mean
			
 
				-            self._additional_metrics = additional_metrics or {}
			
 
				             self._strategy_name = strategy_name
			
 
				             self._data_path = None
			
 
				             self._cv_path = None
			
 
				 
			
 
				-            # best_score can be also read from trials
			
 
				-            # but is kept explicitely in order not to
			
 
				-            # search through the trials object every time
			
 
				-            # loss is the opposite of score
			
 
				-            self.best_score = np.nan
			
 
				+            self._X = None
			
 
				+            self._y = None
			
 
				+            self._cv = None
			
 
				+            self._space = None
			
 
				 
			
 
				             # if cross-valition is not configured,
			
 
				             # sklearn cross-validation method is taken by default
			
@@ -164,23 +153,17 @@ class PipelineSelector(ABC):
 
				             # it is loaded and the search is continued. Else,
			
 
				             # the search is started from the beginning.
			
 
				             if os.path.isfile(self.trials_path):
			
 
				-                try:
			
 
				-                    with open(self.trials_path, "rb") as f:
			
 
				-                        self._trials = pickle.load(f)
			
 
				 
			
 
				-                    self._start_iteration = self.number_of_trials
			
 
				+                with open(self.trials_path, "rb") as f:
			
 
				+                    self._trials = pickle.load(f)
			
 
				 
			
 
				-                    self.best_score = self.best_trial_score
			
 
				+                self._start_iteration = self.number_of_trials
			
 
				 
			
 
				-                    self._logger.info(("Loaded an existing trials object"
			
 
				-                                       "Consisting of {} trials")
			
 
				-                                      .format(self._start_iteration))
			
 
				+                self.best_score = self.best_trial_score
			
 
				 
			
 
				-                except Exception as e:
			
 
				-                    err = ("Trials object could not be loaded. "
			
 
				-                           "Exit with error {}").format(e)
			
 
				-                    self._logger.log_and_raise_error(err)
			
 
				-                    self._trials = None
			
 
				+                self._logger.info(("Loaded an existing trials object"
			
 
				+                                   "Consisting of {} trials")
			
 
				+                                  .format(self._start_iteration))
			
 
				 
			
 
				             else:
			
 
				                 self._logger.warning(("No existing trials object was found, "
			
@@ -188,6 +171,7 @@ class PipelineSelector(ABC):
 
				 
			
 
				                 self._trials = None
			
 
				                 self._start_iteration = 0
			
 
				+                self.best_score = np.nan
			
 
				 
			
 
				             # keeping track of the current search iteration
			
 
				             self._iteration = self._start_iteration
			
@@ -203,10 +187,9 @@ class PipelineSelector(ABC):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def _backup_trials(self):
			
 
				+    def _backup_trials(self) -> None:
			
 
				         '''
			
 
				-        Pickles (Saves) the trials object.
			
 
				-        Used in a scheduler.
			
 
				+        Pickles (Saves) the trials object in binary format.
			
 
				         '''
			
 
				         try:
			
 
				             with open(self.trials_path, "wb") as f:
			
@@ -218,30 +201,21 @@ class PipelineSelector(ABC):
 
				 
			
 
				     def configure_cross_validation(self,
			
 
				                                    cross_validation: Callable,
			
 
				-                                   kwargs: dict = None):
			
 
				+                                   kwargs: dict = None) -> None:
			
 
				         """
			
 
				         Method for attaching a custom cross-validation function
			
 
				+
			
 
				         :param cross_validation: a function that has the same
			
 
				              signature as sklearn.model_selection.cross_validate
			
 
				         """
			
 
				         try:
			
 
				-            assert(isinstance(cross_validation, Callable)),\
			
 
				-                "Parameter cross_validation must be a function"
			
 
				-
			
 
				             kwargs = kwargs or {}
			
 
				 
			
 
				-            assert(isinstance(kwargs, dict)),\
			
 
				-                "Paramter kwargs must be a dict"
			
 
				-
			
 
				             self._cross_validation = functools.partial(
			
 
				                     self._cross_validation, **kwargs)
			
 
				 
			
 
				             self.configured_cross_validation = True
			
 
				 
			
 
				-            if hasattr(cross_validation, "__name__"):
			
 
				-                self.best_result["cross_validation"] =\
			
 
				-                    cross_validation.__name__
			
 
				-
			
 
				             self._logger.info("Configured cross validation")
			
 
				 
			
 
				         except Exception as e:
			
@@ -252,8 +226,12 @@ class PipelineSelector(ABC):
 
				 
			
 
				     def configure_cross_validation_from_module(self,
			
 
				                                                module_path: str,
			
 
				-                                               name: str):
			
 
				+                                               name: str) -> None:
			
 
				         """
			
 
				+        Attaches a cross-validation funciton defined in
			
 
				+        a different python model. This function must have
			
 
				+        the same signature as sklearn.model_seclection.cross_validate
			
 
				+
			
 
				         :param str module_path: path to python module
			
 
				             where the cross_validation function is defined.
			
 
				 
			
@@ -261,18 +239,12 @@ class PipelineSelector(ABC):
 
				             loaded froma python module.
			
 
				         """
			
 
				         try:
			
 
				-            assert(isinstance(module_path, str) and
			
 
				-                   isinstance(name, str)),\
			
 
				-                   "Parameters module_path and name must be of str type"
			
 
				-
			
 
				             self._cross_validation = \
			
 
				                 LoadingUtils().load_from_module(
			
 
				                         module_path=module_path, name=name)
			
 
				 
			
 
				             self.configured_cross_validation = True
			
 
				 
			
 
				-            self.best_result["cross_validation"] = name
			
 
				-
			
 
				             self._logger.info("Configured cross validation")
			
 
				 
			
 
				         except Exception as e:
			
@@ -281,8 +253,11 @@ class PipelineSelector(ABC):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def attach_space(self, space):
			
 
				+    def attach_space(self, space) -> None:
			
 
				         """
			
 
				+        Method for attaching the pipeline/hyperparameter space
			
 
				+        over which the score_func is optimized.
			
 
				+
			
 
				         :param space: space where
			
 
				             the search is performed. A space might be either
			
 
				             a list of dictionaries or a hyperopt space object
			
@@ -291,17 +266,21 @@ class PipelineSelector(ABC):
 
				         """
			
 
				         try:
			
 
				             self._space = space
			
 
				-            self._logger.info("Attached parameter distribution space")
			
 
				+
			
 
				             self.attached_space = True
			
 
				 
			
 
				+            self._logger.info("Attached parameter distribution space")
			
 
				+
			
 
				         except Exception as e:
			
 
				             err = ("Failed to attach space. "
			
 
				                    "Exit with error: {}".format(e))
			
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def attach_space_from_module(self, module_path: str, name: str):
			
 
				+    def attach_space_from_module(self, module_path: str, name: str) -> None:
			
 
				         """
			
 
				+        Attaches a space defined in a different python module.
			
 
				+
			
 
				         :param str module_path: path to python module
			
 
				             where the space is defined.
			
 
				 
			
@@ -309,34 +288,34 @@ class PipelineSelector(ABC):
 
				             a python module.
			
 
				         """
			
 
				         try:
			
 
				-            assert(isinstance(module_path, str) and
			
 
				-                   isinstance(name, str)),\
			
 
				-                   "Parameters module_path and name must be of str type"
			
 
				-
			
 
				             self._space = LoadingUtils().load_from_module(
			
 
				                     module_path=module_path, name=name)
			
 
				 
			
 
				-            self._logger.info("Attached parameter distribution space")
			
 
				-
			
 
				             self.attached_space = True
			
 
				 
			
 
				+            self._logger.info("Attached parameter distribution space")
			
 
				+
			
 
				         except Exception as e:
			
 
				             err = ("Failed to attach space from module. "
			
 
				                    "Exit with error {}".format(e))
			
 
				 
			
 
				             self._logger.loger_and_raise_error(err)
			
 
				 
			
 
				-    def attach_data(self, X_train: (pd.DataFrame, np.ndarray),
			
 
				-                    y_train: (pd.DataFrame, pd.Series, np.ndarray) = None,
			
 
				-                    X_val: (pd.DataFrame, np.ndarray) = None,
			
 
				-                    y_val: (pd.DataFrame, pd.Series, np.ndarray) = None,
			
 
				-                    cv: (list, int) = None):
			
 
				+    def attach_data(self, X_train: Union[pd.DataFrame, np.ndarray],
			
 
				+                    y_train: Optional[pd.DataFrame, pd.Series, np.ndarray]
			
 
				+                    = None,
			
 
				+                    X_val: Optional[pd.DataFrame, np.ndarray]
			
 
				+                    = None,
			
 
				+                    y_val: Optional[pd.DataFrame, pd.Series, np.ndarray]
			
 
				+                    = None,
			
 
				+                    cv: Optional[Iterable[Tuple[List[int], List[int]]]]
			
 
				+                    = None) -> None:
			
 
				         '''
			
 
				         :param array X_train: data on which
			
 
				             machine learning pipelines are trained
			
 
				 
			
 
				         :param array y_train: optional, vector with targets,
			
 
				-            (not all algorithms require a targets)
			
 
				+            (None in case of unsupervided learning)
			
 
				 
			
 
				         :param array X_val: optional, validation data.
			
 
				             When not provided, cross-validated value
			
@@ -344,53 +323,49 @@ class PipelineSelector(ABC):
 
				 
			
 
				         :param array y_val: optional, validation targets
			
 
				 
			
 
				-        :param list cv: list of tuples containing
			
 
				+        :param list cv: iterabe of tuples containing
			
 
				             train and validation indices or an integer representing
			
 
				             the number of folds for a random split of data
			
 
				             during cross-validation
			
 
				             example: [([0,1,2], [3,4]), ([1,2,3], [4,5])]
			
 
				         '''
			
 
				         try:
			
 
				-            NoneType = None.__class__
			
 
				-
			
 
				-            input_err = "Non-valid combination of train and val data types"
			
 
				+            assert((cv is None) == (X_val is not None)),\
			
 
				+                "Either cv or X_val must be provided"
			
 
				 
			
 
				             if cv is None:
			
 
				-                assert(isinstance(X_train, (pd.DataFrame, np.ndarray)) and
			
 
				-                       isinstance(X_val, (pd.DataFrame, np.ndarray)) and
			
 
				-                       isinstance(y_train, (pd.Series, np.ndarray,
			
 
				-                                            pd.DataFrame, NoneType)) and
			
 
				-                       isinstance(y_val, (pd.Series, np.ndarray)) and
			
 
				-                       (y_val is None) == (y_train is None)), input_err
			
 
				-
			
 
				-                # cost is evaluated with a cross validation function
			
 
				-                # that accepts an array and a cv object with
			
 
				-                # indices of the fold splits.
			
 
				+
			
 
				+                assert((y_val is None) == (y_train is None)),\
			
 
				+                    "y_train and y_val must be simultanious"
			
 
				+
			
 
				                 # Here we create a trivial cv object
			
 
				                 # with one validation split.
			
 
				+                cv = CVComposer.dummy_cv()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				 
			
 
				                 train_inds = list(range(len(X_train)))
			
 
				                 val_inds = list(range(len(X_train),
			
 
				                                       len(X_train) + len(X_val)))
			
 
				 
			
 
				                 self._cv = [(train_inds, val_inds)]
			
 
				+
			
 
				                 self._X = np.concatenate([X_train, X_val])
			
 
				                 self._y = None if y_train is None\
			
 
				                     else np.concatenate([y_train, y_val])
			
 
				 
			
 
				             else:
			
 
				-                assert(isinstance(X_train, (pd.DataFrame, np.ndarray)) and
			
 
				-                       isinstance(y_train, (pd.Series, np.ndarray,
			
 
				-                                            pd.DataFrame, NoneType)) and
			
 
				-                       (X_val is None) and (y_val is None)), input_err
			
 
				 
			
 
				                 self._cv = cv
			
 
				                 self._X = X_train
			
 
				                 self._y = y_train
			
 
				 
			
 
				-            self._logger.info("Attached data")
			
 
				             self.attached_data = True
			
 
				 
			
 
				+            self._logger.info("Attached data")
			
 
				+
			
 
				         except Exception as e:
			
 
				             err = ("Failed to attach data. "
			
 
				                    "Exit with error: {}".format(e))
			
@@ -399,17 +374,23 @@ class PipelineSelector(ABC):
 
				 
			
 
				     def attach_data_from_hdf5(self,
			
 
				                               data_hdf5_store_path: str,
			
 
				-                              cv_pickle_path: str = None):
			
 
				-        """
			
 
				-        Method for attaching data from a hdf5 store.
			
 
				-             The hdf5 store is a binary file,
			
 
				-             after loading it, it is a dictionary with keys
			
 
				-             X_train (y_train, X_val, y_val). The cv is loaded
			
 
				-             from a pickle file. The reason to separate the data
			
 
				-             store from the cv store, is the hdf5 is optimized to
			
 
				-             store large dataframes (especially with simple types) and
			
 
				-             a a small list of lists like a cv-object is better
			
 
				-             to be stored as a pickle file.
			
 
				+                              cv_pickle_path: str = None) -> None:
			
 
				+        """
			
 
				+        Method for attaching data from a hdf5 store
			
 
				+         and a cv object from a pickled file.
			
 
				+
			
 
				+         The hdf5 store is a binary file,
			
 
				+         after loading it, it is a dictionary with keys
			
 
				+         X_train (y_train, X_val, y_val).
			
 
				+
			
 
				+         The cv is loaded from a pickle file.
			
 
				+
			
 
				+         The reason to separate the data
			
 
				+         store from the cv store, is the hdf5 is optimized to
			
 
				+         store large dataframes (especially with simple types) and
			
 
				+         a a small list of lists like a cv-object is better
			
 
				+         to be stored as a pickle file.
			
 
				+
			
 
				         :param str data_hdf5_store_path: path to the hdf5 store
			
 
				             with train and validation data
			
 
				         :param str cv_pickle_path: path to the pickle file with
			
@@ -423,19 +404,16 @@ class PipelineSelector(ABC):
 
				 
			
 
				             self._data_path = data_hdf5_store_path
			
 
				 
			
 
				-            data_input = {}
			
 
				-
			
 
				-            for key in ["/X_train", "/y_train", "/X_val", "/y_val"]:
			
 
				-                if key not in store.keys():
			
 
				-                    data_input[key.replace("/", "")] = None
			
 
				-                else:
			
 
				-                    data_input[key.replace("/", "")] = store[key]
			
 
				+            data_input = {key: store["key"] if key in store else None
			
 
				+                          for key in ["X_train", "y_train", "X_val", "y_val"]}
			
 
				 
			
 
				             if cv_pickle_path is not None:
			
 
				+
			
 
				                 assert(os.path.isfile(cv_pickle_path)),\
			
 
				                     "Parameter cv_pickle_path is not a file"
			
 
				 
			
 
				                 data_input["cv"] = pickle.load(open(cv_pickle_path, "rb"))
			
 
				+
			
 
				                 self._cv_path = cv_pickle_path
			
 
				 
			
 
				             else:
			
@@ -449,21 +427,60 @@ class PipelineSelector(ABC):
 
				             err = "Failed to attach data. Exit with error: {}".format(e)
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				+    @property
			
 
				+    def default_summary(self) -> dict:
			
 
				+        """
			
 
				+        Default summary of the strategy.
			
 
				+        Every the _objective function is called
			
 
				+        the current score and the information
			
 
				+        about the tested space element is added to the
			
 
				+        summary and it is saved to the Trials.
			
 
				+        If summary saving is configured it is also
			
 
				+        saved to a file, or a database when the score improves.
			
 
				+        """
			
 
				+        summary = {}
			
 
				+
			
 
				+        if self._strategy_name is not None:
			
 
				+            summary["strategy_name"] = self._strategy_name
			
 
				+
			
 
				+        if isinstance(self._cost_func, str):
			
 
				+            summary["cost_func"] = self._cost_func
			
 
				+
			
 
				+        elif hasattr(self._cost_func, "__name__"):
			
 
				+            summary["cost_func"] = self._cost_func.__name__
			
 
				+
			
 
				+        summary["trials_path"] = self.trials_path
			
 
				+
			
 
				+        if self._data_path is not None:
			
 
				+            summary["data_path"] = self._data_path
			
 
				+
			
 
				+        if self._cv_path is not None:
			
 
				+            summary["cv_path"] = self._cv_path
			
 
				+
			
 
				+        summary["start_tuning_time"] = self.start_tuning_time
			
 
				+
			
 
				+        summary["iteration"] = self._iteration
			
 
				+
			
 
				+        return summary
			
 
				+
			
 
				     def configer_summary_saving(self,
			
 
				-                                save_method: Callable = None,
			
 
				-                                kwargs: dict = None):
			
 
				-        """
			
 
				-        Attaching a method for saving information about
			
 
				-             the trials/space/strategy and the result of
			
 
				-             the current best pipeline. This method can
			
 
				-             save the result in a txt or a json file,
			
 
				-             or in a database for example. Arguments like
			
 
				-             file path or the table name can be specified in kwargs.
			
 
				+                                save_method: Callable
			
 
				+                                = functools.partial(
			
 
				+                                        pd.DataFrame.to_excel,
			
 
				+                                        **{"path_or_buf": "result.csv"}),
			
 
				+                                kwargs: Optional[dict] = None) -> None:
			
 
				+        """
			
 
				+        When the score calculated by _objective function improves,
			
 
				+        the default summary is updated with information about the
			
 
				+        current score and pipeline/hyperparameters
			
 
				+        and can be saved to a file or database, depending
			
 
				+        on the configured save_method.
			
 
				+
			
 
				         :param Callable save_method: method for saving the result
			
 
				             of the pipeline selection. The method must accept
			
 
				-            a pandas DataFrame as argument. See self._save_result
			
 
				-            method for the format of the argument being saved.
			
 
				-            By default, saving to a csv file.
			
 
				+            a pandas DataFrame as argument.
			
 
				+            By default, saving to an excel file.
			
 
				+
			
 
				             Examples:
			
 
				                 functools.partial(pd.DataFrame.to_csv,
			
 
				                                   **{"path_or_buf": <PATH>})
			
@@ -476,13 +493,11 @@ class PipelineSelector(ABC):
 
				                                   **{"collection_name": <NAME>})
			
 
				 
			
 
				             using functools can be avoided by providing the kwarg argument
			
 
				+
			
 
				         :param dict kwargs: a dictionary with keyword arguments
			
 
				             (like tablename) to provide to the save_method
			
 
				         """
			
 
				         try:
			
 
				-            save_method = save_method or functools.partial(
			
 
				-                    pd.DataFrame.to_excel, **{"path_or_buf": "result.csv"})
			
 
				-
			
 
				             kwargs = kwargs or {}
			
 
				 
			
 
				             self._save_method = functools.partial(save_method, **kwargs)
			
@@ -494,10 +509,16 @@ class PipelineSelector(ABC):
 
				         except Exception as e:
			
 
				             err = ("Failed to configure the summary saving. "
			
 
				                    "Exit with error {}".format(e))
			
 
				+
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def _save_summary(self, summary: dict):
			
 
				+    def _save_summary(self, summary: dict) -> None:
			
 
				         """
			
 
				+        When the score calculated by _objective function improves,
			
 
				+        the default summary is updated with information about the
			
 
				+        current score and pipeline/hyperparameters
			
 
				+        and can be saved to a file or database, depending
			
 
				+        on the configured save_method.
			
 
				         """
			
 
				         try:
			
 
				             assert(self.configured_summary_saving),\
			
@@ -511,29 +532,40 @@ class PipelineSelector(ABC):
 
				 
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def _evaluate(self, pipeline: Pipeline,
			
 
				-                  scoring: Callable = None,
			
 
				-                  cross_validation: Callable = None) -> dict:
			
 
				+    def _evaluate(self, pipeline: Pipeline) -> Union[Dict[str, float], None]:
			
 
				         """
			
 
				-        This method is called in _objective.
			
 
				+        Calculates the averaged cross-validated score and score variance,
			
 
				+        as well as the averaged values and variances of the additional metrics.
			
 
				+
			
 
				+        This method is called in the _objective function that is
			
 
				+        passed to the hyperopt optimizer.
			
 
				 
			
 
				-        Calculates the cost on the attached data.
			
 
				         This function can be overriden, when the cost
			
 
				         needs to be calculated differently,
			
 
				         for example with a tensorflow model.
			
 
				 
			
 
				         :param Pipeline pipeline: machine learning pipeline
			
 
				             that will be evaluated with cross-validation
			
 
				-        :param cross_validation: a function that has the same
			
 
				-             signature as sklearn.model_selection.cross_validate
			
 
				 
			
 
				         :return: dictionary with the aggregated
			
 
				-            cross-validation score and
			
 
				-            the score variance.
			
 
				+            cross-validation scores and
			
 
				+            the score variances for the scores in the output
			
 
				+            of the cross-validation function.
			
 
				+
			
 
				+            form of the output:
			
 
				+                {"score": 10, #score used in optimization,
			
 
				+                 "score_variance": 0.5
			
 
				+                 "additional_metric1": 5,
			
 
				+                 "additional_metric1_variance": 7}
			
 
				+
			
 
				+            a custom cross-validation function can also include for
			
 
				+            example probability threshold for each fold, then
			
 
				+            the output of this function will include the average
			
 
				+            value and the variance of the probability threshold
			
 
				+            over the folds.
			
 
				         """
			
 
				         try:
			
 
				-
			
 
				-            scoring = {"score": make_scorer(self._cost_func)}
			
 
				+            scoring = {"score": make_scorer(self.cost_func)}
			
 
				 
			
 
				             scoring.update({metric_name: make_scorer(metric)
			
 
				                             for metric_name, metric
			
@@ -543,13 +575,19 @@ class PipelineSelector(ABC):
 
				                     estimator=pipeline,
			
 
				                     X=self._X,
			
 
				                     y=self._y,
			
 
				-                    cv=self._cv or 5,
			
 
				-                    scoring=scoring,
			
 
				+                    cv=self._cv,
			
 
				+                    scoring=self._scoring,
			
 
				                     error_score=np.nan)
			
 
				 
			
 
				+            averaging_funcs = {
			
 
				+                    metric_name: self._additional_averaging_funcs[metric_name]
			
 
				+                    if metric_name in self._additional_averaging_funcs
			
 
				+                    else self._cross_val_averaging_func
			
 
				+                    for metric_name in scores}
			
 
				+
			
 
				             scores_average = {
			
 
				                     metric_name.replace("test_", ""):
			
 
				-                    self._cross_val_averaging_func(scores[metric_name])
			
 
				+                    averaging_funcs[metric_name](scores[metric_name])
			
 
				                     for metric_name in scores
			
 
				                     if metric_name.startswith("test")}
			
 
				 
			
@@ -563,12 +601,13 @@ class PipelineSelector(ABC):
 
				 
			
 
				         except Exception as e:
			
 
				             err = "Failed to evaluate pipeline. Exit with error: {}".format(e)
			
 
				+
			
 
				             self._logger.log_and_raise_error(err)
			
 
				 
			
 
				-    def _objective(self, space_element: dict) -> dict:
			
 
				+    def _objective(self, space_element: SpaceElementType) -> dict:
			
 
				         '''
			
 
				-        This method is called in search_for_best_pipeline
			
 
				-        inside the hyperopt fmin method.
			
 
				+        This method is called in run_trials method
			
 
				+        that is using the hyperopt fmin opmizer.
			
 
				 
			
 
				         Uses _evaluate method.
			
 
				 
			
@@ -581,12 +620,10 @@ class PipelineSelector(ABC):
 
				 
			
 
				         :Warning: fmin minimizes the loss,
			
 
				         when _evaluate returns a value to be maximized,
			
 
				-        it should be multiplied by -1 to obtain loss.
			
 
				+        it is multiplied by -1 to obtain loss.
			
 
				 
			
 
				-        :param dict space_element: must contain keys
			
 
				-            name (with the name of the pipeline),
			
 
				-            pipeline (Pipeline object),
			
 
				-            params (dict of pipeline params)
			
 
				+        :param SpaceElementType space_element: element
			
 
				+            of the space over which the optimization is done
			
 
				 
			
 
				         :output: dictionary with keys
			
 
				             loss (minimized value),
			
@@ -596,18 +633,9 @@ class PipelineSelector(ABC):
 
				             score_variance,
			
 
				             timestamp (end of execution),
			
 
				             train_time: execution time
			
 
				+            and other keys given in self.default_summary
			
 
				         '''
			
 
				         try:
			
 
				-            assert(isinstance(space_element, dict) and
			
 
				-                   set(['name', 'pipeline', 'params'])
			
 
				-                   <= space_element.keys()),\
			
 
				-                 "Space elements are of wrong form"
			
 
				-
			
 
				-            assert(isinstance(space_element['name'], str) and
			
 
				-                   isinstance(space_element['pipeline'], Pipeline) and
			
 
				-                   isinstance(space_element['params'], dict)),\
			
 
				-                "Space elements are of wrong form"
			
 
				-
			
 
				             start_time = time.time()
			
 
				 
			
 
				             assert(self.attached_data),\
			
@@ -615,32 +643,14 @@ class PipelineSelector(ABC):
 
				                  "in order to effectuate the best"
			
 
				                  "pipeline search")
			
 
				 
			
 
				-            summary = {}
			
 
				-
			
 
				-            if self._strategy_name is not None:
			
 
				-                summary["strategy_name"] = self._strategy_name
			
 
				+            summary = deepcopy(self.default_summary)
			
 
				 
			
 
				-            if isinstance(self._cost_func, str):
			
 
				-                summary["cost_func"] = self._cost_func
			
 
				-
			
 
				-            elif hasattr(self._cost_func, "__name__"):
			
 
				-                summary["cost_func"] = self._cost_func.__name__
			
 
				-
			
 
				-            summary["trials_path"] = self.trials_path
			
 
				-
			
 
				-            if self._data_path is not None:
			
 
				-                summary["data_path"] = self._data_path
			
 
				-
			
 
				-            if self._cv_path is not None:
			
 
				-                summary["cv_path"] = self._cv_path
			
 
				-
			
 
				-            summary["start_tuning_time"] = self.start_tuning_time
			
 
				-
			
 
				-            summary["iteration"] = self._iteration
			
 
				-
			
 
				-            backup_cond = (self._backup_trials_freq is not None) and\
			
 
				-                ((self._iteration - self._start_iteration - 1) %
			
 
				-                 self._backup_trials_freq == 0) or\
			
 
				+            # backup the current trials if the score improved
			
 
				+            # at previous iteration or every ith iteration
			
 
				+            # if the backup_trials_freq is set
			
 
				+            backup_cond = ((self._backup_trials_freq is not None) and
			
 
				+                           ((self._iteration - self._start_iteration - 1) %
			
 
				+                            self._backup_trials_freq == 0)) or\
			
 
				                 self._score_improved
			
 
				 
			
 
				             if backup_cond:
			
@@ -666,9 +676,6 @@ class PipelineSelector(ABC):
 
				 
			
 
				             end_time = time.time()
			
 
				 
			
 
				-            assert(not np.isnan(result["score"])),\
			
 
				-                "Score value is not in the output of the _evaluate method"
			
 
				-
			
 
				             summary['status'] = STATUS_OK
			
 
				             summary.update(result)
			
 
				             summary['loss'] = self._score_factor * summary['score']
			
@@ -695,6 +702,7 @@ class PipelineSelector(ABC):
 
				 
			
 
				             self._logger.warning("Trial failed with error {}".format(e))
			
 
				 
			
 
				+            summary = {}
			
 
				             summary['status'] = STATUS_FAIL
			
 
				             summary['timestamp'] = datetime.datetime.today()
			
 
				             summary['error'] = e
			
@@ -725,11 +733,10 @@ class PipelineSelector(ABC):
 
				     def best_trial(self) -> dict:
			
 
				         """
			
 
				         Best trial sor far.
			
 
				-         Should contain the best pipeline,
			
 
				-         best hyperparameters,
			
 
				-         as well as an output of the self._objective method,
			
 
				-         but the exact form of the output depends on the implementation
			
 
				-         of the Trials object.
			
 
				+         Should contain the status, pipeline,
			
 
				+         hyperparameters, and the score (loss).
			
 
				+         Other information is otional and is defined
			
 
				+         by self.default_summary
			
 
				         """
			
 
				         pass
			
 
				 
			
@@ -743,6 +750,7 @@ class PipelineSelector(ABC):
 
				     @abstractproperty
			
 
				     def best_trial_score_variance(self) -> float:
			
 
				         """
			
 
				+        Variance of the cross-validation score of the best pipeline
			
 
				         """
			
 
				         pass
			
 
				 
			
@@ -771,12 +779,11 @@ class PipelineSelector(ABC):
 
				         pass
			
 
				 
			
 
				     @abstractmethod
			
 
				-    def trials_to_excel(self, path: str):
			
 
				+    def trials_to_excel(self, path: str) -> None:
			
 
				         """
			
 
				         Trials object in the shape of table written to excel,
			
 
				         should contain the iteration, pipeline (as str),
			
 
				         hyperparamters (as str), self.best_result (see self._objective method)
			
 
				-        as well as additional information configured
			
 
				-        through self.save_result method.
			
 
				+        as well as additional information defined by self.default_summary
			
 
				         """
			
 
				         pass
			
--- a/cdplib/utils/CleaningUtils.py
+++ b/cdplib/utils/CleaningUtils.py
@@ -8,13 +8,16 @@ Created on Fri Sep 27 16:20:03 2019
 
				 
			
 
				 import pandas as pd
			
 
				 import numpy as np
			
 
				+from typing import Union, Any, List
			
 
				 
			
 
				 
			
 
				 class CleaningUtils:
			
 
				     '''
			
 
				     Unites different methods for data cleaning
			
 
				     '''
			
 
				-    def convert_dates(series: pd.Series, formats: (str, list)) -> pd.Series:
			
 
				+    def convert_dates(self,
			
 
				+                      series: pd.Series[Any],
			
 
				+                      formats: Union[str, List[str]]) -> pd.Series:
			
 
				         '''
			
 
				         Converts values from string to date in a pandas Series
			
 
				          where possibly multiple date formats are mixed
			
@@ -29,8 +32,7 @@ class CleaningUtils:
 
				 
			
 
				                 series = series.astype(str)
			
 
				 
			
 
				-                series.loc[missing_leading_zero] = "0" +\
			
 
				-                    series.loc[missing_leading_zero]
			
 
				+                series.loc[missing_leading_zero] += "0"
			
 
				 
			
 
				             converted_this_format = pd.to_datetime(series,
			
 
				                                                    format=formt,
			
@@ -71,21 +73,28 @@ class CleaningUtils:
 
				 
			
 
				         return s
			
 
				 
			
 
				-    def melt_duplicated_columns(self, df: pd.DataFrame, suffix: str = "", prefix: str = "") -> pd.DataFrame:
			
 
				+    def melt_duplicated_columns(self, df: pd.DataFrame,
			
 
				+                                suffix: str = "",
			
 
				+                                prefix: str = "") -> pd.DataFrame:
			
 
				         '''
			
 
				         If a dataframe has multiple columns with the same name
			
 
				          (up to a prefix or a suffix),
			
 
				          melts the columns together in one
			
 
				 
			
 
				-        :parame suffix: string or regex up to which we consider names as duplicated
			
 
				-        :parame prefix: string or regex up to which we consider names as duplicated
			
 
				+        :parame suffix: string or regex up
			
 
				+            to which we consider names as duplicated
			
 
				+        :parame prefix: string or rege
			
 
				+            up to which we consider names as duplicated
			
 
				         '''
			
 
				         from collections import Counter
			
 
				 
			
 
				         import re
			
 
				 
			
 
				-        # remove the suffix and the prefix from the column names (now the duplicates are truely duplicates)
			
 
				-        df.columns = [re.sub(re.compile(prefix), "", re.sub(re.compile(suffix), "", c)) for c in df.columns]
			
 
				+        # remove the suffix and the prefix from the column names
			
 
				+        # (now the duplicates are truely duplicates)
			
 
				+        df.columns = [re.sub(re.compile(prefix), "",
			
 
				+                             re.sub(re.compile(suffix), "", c))
			
 
				+                      for c in df.columns]
			
 
				 
			
 
				         column_counter = Counter(df.columns)
			
 
				 
			
@@ -100,10 +109,12 @@ class CleaningUtils:
 
				             df_melted = []
			
 
				 
			
 
				             for dup_var in dup_vars:
			
 
				-                dup_var_melted = pd.melt(frame=df, id_vars=id_vars, value_vars=[dup_var], value_name=dup_var)\
			
 
				+                dup_var_melted = pd.melt(frame=df,
			
 
				+                                         id_vars=id_vars,
			
 
				+                                         value_vars=[dup_var],
			
 
				+                                         value_name=dup_var)\
			
 
				                                    .set_index(id_vars)[dup_var]
			
 
				 
			
 
				                 df_melted.append(dup_var_melted)
			
 
				 
			
 
				             return pd.concat(df_melted, axis=1, sort=False).reset_index()
			
 
				-
			
--- a/cdplib/utils/TypeConverter.py
+++ b/cdplib/utils/TypeConverter.py
@@ -0,0 +1,36 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Created on Fri Apr 24 09:06:13 2020
			
 
				+
			
 
				+@author: tanya
			
 
				+"""
			
 
				+
			
 
				+import numpy as np
			
 
				+import pandas as pd
			
 
				+
			
 
				+class TypeConverter:
			
 
				+    """
			
 
				+    Library for methods to manage python types
			
 
				+    """
			
 
				+    def __init__(self):
			
 
				+        """
			
 
				+        """
			
 
				+        from cdplib.log import Log
			
 
				+
			
 
				+        self._logger = Log("TypeConverter")
			
 
				+
			
 
				+    def convert_to_ndarray(self, x: (pd.DataFrame, np.ndarray)) -> np.ndarray:
			
 
				+        '''
			
 
				+        Converts an DataFrame to an numpy array.
			
 
				+        '''
			
 
				+        if isinstance(x, np.ndarray):
			
 
				+            return x
			
 
				+
			
 
				+        elif (isinstance(x, pd.core.frame.DataFrame))\
			
 
				+                or (isinstance(x, pd.core.series.Series)):
			
 
				+            return x.values
			
 
				+
			
 
				+        else:
			
 
				+            self._logger.log_and_raise_error_stack_info(
			
 
				+                    'The argument must be a numpy array or a pandas DataFrame')