tanja 3 years ago
parent
commit
04ec30f765

+ 3 - 0
Pipfile

@@ -16,6 +16,9 @@ jsonref = "*"
 simplejson = "*"
 simplejson = "*"
 mysql = "*"
 mysql = "*"
 hyperopt = "*"
 hyperopt = "*"
+mypy = "*"
+data-science-types = "*"
+pytype = "*"
 
 
 [requires]
 [requires]
 python_version = "3"
 python_version = "3"

+ 435 - 231
Pipfile.lock

@@ -1,7 +1,7 @@
 {
 {
     "_meta": {
     "_meta": {
         "hash": {
         "hash": {
-            "sha256": "5ae0ad9df8502aead1689e37517dd3bb8d75ac1c9554b865563d395fb9c1f60a"
+            "sha256": "aaf6cb558761e9ff6ccf0035a08008b15fb12bceb916e49f27a47c406b4e0d2f"
         },
         },
         "pipfile-spec": 6,
         "pipfile-spec": 6,
         "requires": {
         "requires": {
@@ -16,24 +16,41 @@
         ]
         ]
     },
     },
     "default": {
     "default": {
+        "attrs": {
+            "hashes": [
+                "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6",
+                "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"
+            ],
+            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+            "version": "==20.3.0"
+        },
         "boltons": {
         "boltons": {
             "hashes": [
             "hashes": [
-                "sha256:6e890b173c5f2dcb4ec62320b3799342ecb1a6a0b2253014455387665d62c213",
-                "sha256:b3fc2b711f50cd975e726324d98e0bd5a324dd7e3b81d5e6a1b03c542d0c66c4"
+                "sha256:3dd8a8e3c1886e7f7ba3422b50f55a66e1700161bf01b919d098e7d96dd2d9b6",
+                "sha256:dd362291a460cc1e0c2e91cc6a60da3036ced77099b623112e8f833e6734bdc5"
             ],
             ],
-            "version": "==20.1.0"
+            "version": "==20.2.1"
         },
         },
         "cdplib": {
         "cdplib": {
             "editable": true,
             "editable": true,
             "git": "https://readonly:readonly@intra.acdp.at/gogs/tanja/cdplib.git",
             "git": "https://readonly:readonly@intra.acdp.at/gogs/tanja/cdplib.git",
-            "ref": "36c286e8f5ff2d441504e2286b2c3408d9756c75"
+            "ref": "2eacfa61358654a7e3e9150ae13aed8de9de1dc3"
         },
         },
         "cloudpickle": {
         "cloudpickle": {
             "hashes": [
             "hashes": [
-                "sha256:38af54d0e7705d87a287bdefe1df00f936aadb1f629dca383e825cca927fa753",
-                "sha256:8664761f810efc07dbb301459e413c99b68fcc6d8703912bd39d86618ac631e3"
+                "sha256:3a32d0eb0bc6f4d0c57fbc4f3e3780f7a81e6fee0fa935072884d58ae8e1cc7c",
+                "sha256:9bc994f9e9447593bd0a45371f0e7ac7333710fcf64a4eb9834bf149f4ef2f32"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==1.6.0"
+        },
+        "data-science-types": {
+            "hashes": [
+                "sha256:20ddbaaac3f3299e2091a64e74f78e64f4899f4ab5644bfd97e4694bd7b62ef4",
+                "sha256:86218af525896f84f3a39eef254449d795644311a64df78fba5eaf76aa610d6d"
             ],
             ],
-            "version": "==1.3.0"
+            "index": "pypi",
+            "version": "==0.2.19"
         },
         },
         "decorator": {
         "decorator": {
             "hashes": [
             "hashes": [
@@ -51,19 +68,26 @@
         },
         },
         "hyperopt": {
         "hyperopt": {
             "hashes": [
             "hashes": [
-                "sha256:52f4534e101f139b074ae626e0b7dc8410854b9410475d3e7f10c429393bb1a2",
-                "sha256:8caf0094fe824502932d949ee57bd3c92fe512dbbd93b7b7a78cd0761fa1a78f",
-                "sha256:df450eadfc9541086921bf863a5842e7009faef472b08630fd2cab13cdcfe0e6"
+                "sha256:bc6047d50f956ae64eebcb34b1fd40f186a93e214957f20e87af2f10195295cc",
+                "sha256:dc5c7cceaf33c125b727cf92709e70035d94dd507831dae66406ac762a18a253"
             ],
             ],
             "index": "pypi",
             "index": "pypi",
-            "version": "==0.2.3"
+            "version": "==0.2.5"
+        },
+        "importlab": {
+            "hashes": [
+                "sha256:d855350d19dc10a17aabd2fe6f4b428ff1a936071f692fbf686a73694d26a51c"
+            ],
+            "markers": "python_full_version >= '2.7.0'",
+            "version": "==0.5.1"
         },
         },
         "joblib": {
         "joblib": {
             "hashes": [
             "hashes": [
-                "sha256:0630eea4f5664c463f23fbf5dcfc54a2bc6168902719fa8e19daf033022786c8",
-                "sha256:bdb4fd9b72915ffb49fde2229ce482dd7ae79d842ed8c2b4c932441495af1403"
+                "sha256:698c311779f347cf6b7e6b8a39bb682277b8ee4aba8cf9507bc0cf4cd4737b72",
+                "sha256:9e284edd6be6b71883a63c9b7f124738a3c16195513ad940eae7e3438de885d5"
             ],
             ],
-            "version": "==0.14.1"
+            "markers": "python_version >= '3.6'",
+            "version": "==0.17.0"
         },
         },
         "jsonref": {
         "jsonref": {
             "hashes": [
             "hashes": [
@@ -73,6 +97,33 @@
             "index": "pypi",
             "index": "pypi",
             "version": "==0.2"
             "version": "==0.2"
         },
         },
+        "mypy": {
+            "hashes": [
+                "sha256:0a0d102247c16ce93c97066443d11e2d36e6cc2a32d8ccc1f705268970479324",
+                "sha256:0d34d6b122597d48a36d6c59e35341f410d4abfa771d96d04ae2c468dd201abc",
+                "sha256:2170492030f6faa537647d29945786d297e4862765f0b4ac5930ff62e300d802",
+                "sha256:2842d4fbd1b12ab422346376aad03ff5d0805b706102e475e962370f874a5122",
+                "sha256:2b21ba45ad9ef2e2eb88ce4aeadd0112d0f5026418324176fd494a6824b74975",
+                "sha256:72060bf64f290fb629bd4a67c707a66fd88ca26e413a91384b18db3876e57ed7",
+                "sha256:af4e9ff1834e565f1baa74ccf7ae2564ae38c8df2a85b057af1dbbc958eb6666",
+                "sha256:bd03b3cf666bff8d710d633d1c56ab7facbdc204d567715cb3b9f85c6e94f669",
+                "sha256:c614194e01c85bb2e551c421397e49afb2872c88b5830e3554f0519f9fb1c178",
+                "sha256:cf4e7bf7f1214826cf7333627cb2547c0db7e3078723227820d0a2490f117a01",
+                "sha256:da56dedcd7cd502ccd3c5dddc656cb36113dd793ad466e894574125945653cea",
+                "sha256:e86bdace26c5fe9cf8cb735e7cedfe7850ad92b327ac5d797c656717d2ca66de",
+                "sha256:e97e9c13d67fbe524be17e4d8025d51a7dca38f90de2e462243ab8ed8a9178d1",
+                "sha256:eea260feb1830a627fb526d22fbb426b750d9f5a47b624e8d5e7e004359b219c"
+            ],
+            "index": "pypi",
+            "version": "==0.790"
+        },
+        "mypy-extensions": {
+            "hashes": [
+                "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
+                "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
+            ],
+            "version": "==0.4.3"
+        },
         "mysql": {
         "mysql": {
             "hashes": [
             "hashes": [
                 "sha256:55e66b5e7b3823b1da5fb2a063e95a628fb850b2a0b76bdcd884faac5d2daa7d"
                 "sha256:55e66b5e7b3823b1da5fb2a063e95a628fb850b2a0b76bdcd884faac5d2daa7d"
@@ -82,137 +133,175 @@
         },
         },
         "mysqlclient": {
         "mysqlclient": {
             "hashes": [
             "hashes": [
-                "sha256:4c82187dd6ab3607150fbb1fa5ef4643118f3da122b8ba31c3149ddd9cf0cb39",
-                "sha256:9e6080a7aee4cc6a06b58b59239f20f1d259c1d2fddf68ddeed242d2311c7087",
-                "sha256:f3fdaa9a38752a3b214a6fe79d7cae3653731a53e577821f9187e67cbecb2e16",
-                "sha256:f646f8d17d02be0872291f258cce3813497bc7888cd4712a577fd1e719b2f213"
+                "sha256:3f39855a4ad22805361e782cc4d1010ac74796225fa2d1c03cc16673ccdc983a",
+                "sha256:a6b5648f648b16335e3b1aaec93dc3fcc81a9a661180e306936437cc522c810b",
+                "sha256:edd42ccaa444b00702d5374b2f5f7585c9d0ce201917f15339f1c3cf91c1b1ed",
+                "sha256:fb2f75aea14722390d2d8ddf384ad99da708c707a96656210a7be8af20a2c5e5"
             ],
             ],
-            "version": "==1.4.6"
+            "markers": "python_version >= '3.5'",
+            "version": "==2.0.1"
         },
         },
         "networkx": {
         "networkx": {
             "hashes": [
             "hashes": [
-                "sha256:45e56f7ab6fe81652fb4bc9f44faddb0e9025f469f602df14e3b2551c2ea5c8b"
+                "sha256:7978955423fbc9639c10498878be59caf99b44dc304c2286162fd24b458c1602",
+                "sha256:8c5812e9f798d37c50570d15c4a69d5710a18d77bafc903ee9c5fba7454c616c"
+            ],
+            "markers": "python_version >= '3.6'",
+            "version": "==2.5"
+        },
+        "ninja": {
+            "hashes": [
+                "sha256:06a72090f5c5516e57f12699644179504a77585bed6d5f8be9e67219a398ec80",
+                "sha256:16fc1bea52a36a91a0e80c3b221d2c1bc9bcf04d0564da9344e349b8c5efd5c6",
+                "sha256:1d9ed3b5fdeb646516f54bec92453dcb3000d6771c2fea56451444c988a23e29",
+                "sha256:24acc95359308d11243386cf9f076bdc95f438ef6a4e0e357e7c122c5e02816d",
+                "sha256:4252ce532304841e47478bb61710fcf9940cf2c91731303490762b6e4f23fd2b",
+                "sha256:5c3a8cb54aaaf5d4f692d65121ef47b3e43dea123a6563153d9d97631c0adf4f",
+                "sha256:621fd73513a9bef0cb82e8c531a29ef96580b4d6e797f833cce167054ad812f8",
+                "sha256:99c6102ae9a8981afe4d06f92508dbeab1e28ec89783fb703411166f4e13c9ee",
+                "sha256:a1a9d9455623a3f45557fff6eb5abb3e70910dde28cfb9239e3ca14249149f55",
+                "sha256:c6059bd04ad235e2326b39bc71bb7989de8d565084b5f269557704747b2910fa",
+                "sha256:fb1ae96811a9b73773014b8a21d710b89d7d5f765427a5e2541e7fb9d530fdd5"
             ],
             ],
-            "version": "==2.2"
+            "version": "==1.10.0.post2"
         },
         },
         "numpy": {
         "numpy": {
             "hashes": [
             "hashes": [
-                "sha256:1598a6de323508cfeed6b7cd6c4efb43324f4692e20d1f76e1feec7f59013448",
-                "sha256:1b0ece94018ae21163d1f651b527156e1f03943b986188dd81bc7e066eae9d1c",
-                "sha256:2e40be731ad618cb4974d5ba60d373cdf4f1b8dcbf1dcf4d9dff5e212baf69c5",
-                "sha256:4ba59db1fcc27ea31368af524dcf874d9277f21fd2e1f7f1e2e0c75ee61419ed",
-                "sha256:59ca9c6592da581a03d42cc4e270732552243dc45e87248aa8d636d53812f6a5",
-                "sha256:5e0feb76849ca3e83dd396254e47c7dba65b3fa9ed3df67c2556293ae3e16de3",
-                "sha256:6d205249a0293e62bbb3898c4c2e1ff8a22f98375a34775a259a0523111a8f6c",
-                "sha256:6fcc5a3990e269f86d388f165a089259893851437b904f422d301cdce4ff25c8",
-                "sha256:82847f2765835c8e5308f136bc34018d09b49037ec23ecc42b246424c767056b",
-                "sha256:87902e5c03355335fc5992a74ba0247a70d937f326d852fc613b7f53516c0963",
-                "sha256:9ab21d1cb156a620d3999dd92f7d1c86824c622873841d6b080ca5495fa10fef",
-                "sha256:a1baa1dc8ecd88fb2d2a651671a84b9938461e8a8eed13e2f0a812a94084d1fa",
-                "sha256:a244f7af80dacf21054386539699ce29bcc64796ed9850c99a34b41305630286",
-                "sha256:a35af656a7ba1d3decdd4fae5322b87277de8ac98b7d9da657d9e212ece76a61",
-                "sha256:b1fe1a6f3a6f355f6c29789b5927f8bd4f134a4bd9a781099a7c4f66af8850f5",
-                "sha256:b5ad0adb51b2dee7d0ee75a69e9871e2ddfb061c73ea8bc439376298141f77f5",
-                "sha256:ba3c7a2814ec8a176bb71f91478293d633c08582119e713a0c5351c0f77698da",
-                "sha256:cd77d58fb2acf57c1d1ee2835567cd70e6f1835e32090538f17f8a3a99e5e34b",
-                "sha256:cdb3a70285e8220875e4d2bc394e49b4988bdb1298ffa4e0bd81b2f613be397c",
-                "sha256:deb529c40c3f1e38d53d5ae6cd077c21f1d49e13afc7936f7f868455e16b64a0",
-                "sha256:e7894793e6e8540dbeac77c87b489e331947813511108ae097f1715c018b8f3d"
+                "sha256:08308c38e44cc926bdfce99498b21eec1f848d24c302519e64203a8da99a97db",
+                "sha256:09c12096d843b90eafd01ea1b3307e78ddd47a55855ad402b157b6c4862197ce",
+                "sha256:13d166f77d6dc02c0a73c1101dd87fdf01339febec1030bd810dcd53fff3b0f1",
+                "sha256:141ec3a3300ab89c7f2b0775289954d193cc8edb621ea05f99db9cb181530512",
+                "sha256:16c1b388cc31a9baa06d91a19366fb99ddbe1c7b205293ed072211ee5bac1ed2",
+                "sha256:18bed2bcb39e3f758296584337966e68d2d5ba6aab7e038688ad53c8f889f757",
+                "sha256:1aeef46a13e51931c0b1cf8ae1168b4a55ecd282e6688fdb0a948cc5a1d5afb9",
+                "sha256:27d3f3b9e3406579a8af3a9f262f5339005dd25e0ecf3cf1559ff8a49ed5cbf2",
+                "sha256:2a2740aa9733d2e5b2dfb33639d98a64c3b0f24765fed86b0fd2aec07f6a0a08",
+                "sha256:4377e10b874e653fe96985c05feed2225c912e328c8a26541f7fc600fb9c637b",
+                "sha256:448ebb1b3bf64c0267d6b09a7cba26b5ae61b6d2dbabff7c91b660c7eccf2bdb",
+                "sha256:50e86c076611212ca62e5a59f518edafe0c0730f7d9195fec718da1a5c2bb1fc",
+                "sha256:5734bdc0342aba9dfc6f04920988140fb41234db42381cf7ccba64169f9fe7ac",
+                "sha256:64324f64f90a9e4ef732be0928be853eee378fd6a01be21a0a8469c4f2682c83",
+                "sha256:6ae6c680f3ebf1cf7ad1d7748868b39d9f900836df774c453c11c5440bc15b36",
+                "sha256:6d7593a705d662be5bfe24111af14763016765f43cb6923ed86223f965f52387",
+                "sha256:8cac8790a6b1ddf88640a9267ee67b1aee7a57dfa2d2dd33999d080bc8ee3a0f",
+                "sha256:8ece138c3a16db8c1ad38f52eb32be6086cc72f403150a79336eb2045723a1ad",
+                "sha256:9eeb7d1d04b117ac0d38719915ae169aa6b61fca227b0b7d198d43728f0c879c",
+                "sha256:a09f98011236a419ee3f49cedc9ef27d7a1651df07810ae430a6b06576e0b414",
+                "sha256:a5d897c14513590a85774180be713f692df6fa8ecf6483e561a6d47309566f37",
+                "sha256:ad6f2ff5b1989a4899bf89800a671d71b1612e5ff40866d1f4d8bcf48d4e5764",
+                "sha256:c42c4b73121caf0ed6cd795512c9c09c52a7287b04d105d112068c1736d7c753",
+                "sha256:cb1017eec5257e9ac6209ac172058c430e834d5d2bc21961dceeb79d111e5909",
+                "sha256:d6c7bb82883680e168b55b49c70af29b84b84abb161cbac2800e8fcb6f2109b6",
+                "sha256:e452dc66e08a4ce642a961f134814258a082832c78c90351b75c41ad16f79f63",
+                "sha256:e5b6ed0f0b42317050c88022349d994fe72bfe35f5908617512cd8c8ef9da2a9",
+                "sha256:e9b30d4bd69498fc0c3fe9db5f62fffbb06b8eb9321f92cc970f2969be5e3949",
+                "sha256:ec149b90019852266fec2341ce1db513b843e496d5a8e8cdb5ced1923a92faab",
+                "sha256:edb01671b3caae1ca00881686003d16c2209e07b7ef8b7639f1867852b948f7c",
+                "sha256:f0d3929fe88ee1c155129ecd82f981b8856c5d97bcb0d5f23e9b4242e79d1de3",
+                "sha256:f29454410db6ef8126c83bd3c968d143304633d45dc57b51252afbd79d700893",
+                "sha256:fe45becb4c2f72a0907c1d0246ea6449fe7a9e2293bb0e11c4e9a32bb0930a15",
+                "sha256:fedbd128668ead37f33917820b704784aff695e0019309ad446a6d0b065b57e4"
             ],
             ],
-            "markers": "python_version >= '3.5'",
-            "version": "==1.18.2"
+            "markers": "python_version >= '3.6'",
+            "version": "==1.19.4"
         },
         },
         "pandas": {
         "pandas": {
             "hashes": [
             "hashes": [
-                "sha256:07c1b58936b80eafdfe694ce964ac21567b80a48d972879a359b3ebb2ea76835",
-                "sha256:0ebe327fb088df4d06145227a4aa0998e4f80a9e6aed4b61c1f303bdfdf7c722",
-                "sha256:11c7cb654cd3a0e9c54d81761b5920cdc86b373510d829461d8f2ed6d5905266",
-                "sha256:12f492dd840e9db1688126216706aa2d1fcd3f4df68a195f9479272d50054645",
-                "sha256:167a1315367cea6ec6a5e11e791d9604f8e03f95b57ad227409de35cf850c9c5",
-                "sha256:1a7c56f1df8d5ad8571fa251b864231f26b47b59cbe41aa5c0983d17dbb7a8e4",
-                "sha256:1fa4bae1a6784aa550a1c9e168422798104a85bf9c77a1063ea77ee6f8452e3a",
-                "sha256:32f42e322fb903d0e189a4c10b75ba70d90958cc4f66a1781ed027f1a1d14586",
-                "sha256:387dc7b3c0424327fe3218f81e05fc27832772a5dffbed385013161be58df90b",
-                "sha256:6597df07ea361231e60c00692d8a8099b519ed741c04e65821e632bc9ccb924c",
-                "sha256:743bba36e99d4440403beb45a6f4f3a667c090c00394c176092b0b910666189b",
-                "sha256:858a0d890d957ae62338624e4aeaf1de436dba2c2c0772570a686eaca8b4fc85",
-                "sha256:863c3e4b7ae550749a0bb77fa22e601a36df9d2905afef34a6965bed092ba9e5",
-                "sha256:a210c91a02ec5ff05617a298ad6f137b9f6f5771bf31f2d6b6367d7f71486639",
-                "sha256:ca84a44cf727f211752e91eab2d1c6c1ab0f0540d5636a8382a3af428542826e",
-                "sha256:d234bcf669e8b4d6cbcd99e3ce7a8918414520aeb113e2a81aeb02d0a533d7f7"
+                "sha256:09e0503758ad61afe81c9069505f8cb8c1e36ea8cc1e6826a95823ef5b327daf",
+                "sha256:0a11a6290ef3667575cbd4785a1b62d658c25a2fd70a5adedba32e156a8f1773",
+                "sha256:0d9a38a59242a2f6298fff45d09768b78b6eb0c52af5919ea9e45965d7ba56d9",
+                "sha256:112c5ba0f9ea0f60b2cc38c25f87ca1d5ca10f71efbee8e0f1bee9cf584ed5d5",
+                "sha256:185cf8c8f38b169dbf7001e1a88c511f653fbb9dfa3e048f5e19c38049e991dc",
+                "sha256:3aa8e10768c730cc1b610aca688f588831fa70b65a26cb549fbb9f35049a05e0",
+                "sha256:41746d520f2b50409dffdba29a15c42caa7babae15616bcf80800d8cfcae3d3e",
+                "sha256:43cea38cbcadb900829858884f49745eb1f42f92609d368cabcc674b03e90efc",
+                "sha256:5378f58172bd63d8c16dd5d008d7dcdd55bf803fcdbe7da2dcb65dbbf322f05b",
+                "sha256:54404abb1cd3f89d01f1fb5350607815326790efb4789be60508f458cdd5ccbf",
+                "sha256:5dac3aeaac5feb1016e94bde851eb2012d1733a222b8afa788202b836c97dad5",
+                "sha256:5fdb2a61e477ce58d3f1fdf2470ee142d9f0dde4969032edaf0b8f1a9dafeaa2",
+                "sha256:6613c7815ee0b20222178ad32ec144061cb07e6a746970c9160af1ebe3ad43b4",
+                "sha256:6d2b5b58e7df46b2c010ec78d7fb9ab20abf1d306d0614d3432e7478993fbdb0",
+                "sha256:8a5d7e57b9df2c0a9a202840b2881bb1f7a648eba12dd2d919ac07a33a36a97f",
+                "sha256:8b4c2055ebd6e497e5ecc06efa5b8aa76f59d15233356eb10dad22a03b757805",
+                "sha256:a15653480e5b92ee376f8458197a58cca89a6e95d12cccb4c2d933df5cecc63f",
+                "sha256:a7d2547b601ecc9a53fd41561de49a43d2231728ad65c7713d6b616cd02ddbed",
+                "sha256:a979d0404b135c63954dea79e6246c45dd45371a88631cdbb4877d844e6de3b6",
+                "sha256:b1f8111635700de7ac350b639e7e452b06fc541a328cf6193cf8fc638804bab8",
+                "sha256:c5a3597880a7a29a31ebd39b73b2c824316ae63a05c3c8a5ce2aea3fc68afe35",
+                "sha256:c681e8fcc47a767bf868341d8f0d76923733cbdcabd6ec3a3560695c69f14a1e",
+                "sha256:cf135a08f306ebbcfea6da8bf775217613917be23e5074c69215b91e180caab4",
+                "sha256:e2b8557fe6d0a18db4d61c028c6af61bfed44ef90e419ed6fadbdc079eba141e"
             ],
             ],
             "index": "pypi",
             "index": "pypi",
-            "version": "==1.0.3"
+            "version": "==1.1.4"
         },
         },
         "pymongo": {
         "pymongo": {
             "hashes": [
             "hashes": [
-                "sha256:01b4e10027aef5bb9ecefbc26f5df3368ce34aef81df43850f701e716e3fe16d",
-                "sha256:0fc5aa1b1acf7f61af46fe0414e6a4d0c234b339db4c03a63da48599acf1cbfc",
-                "sha256:1396eb7151e0558b1f817e4b9d7697d5599e5c40d839a9f7270bd90af994ad82",
-                "sha256:18e84a3ec5e73adcb4187b8e5541b2ad61d716026ed9863267e650300d8bea33",
-                "sha256:19adf2848b80cb349b9891cc854581bbf24c338be9a3260e73159bdeb2264464",
-                "sha256:20ee0475aa2ba437b0a14806f125d696f90a8433d820fb558fdd6f052acde103",
-                "sha256:26798795097bdeb571f13942beef7e0b60125397811c75b7aa9214d89880dd1d",
-                "sha256:26e707a4eb851ec27bb969b5f1413b9b2eac28fe34271fa72329100317ea7c73",
-                "sha256:2a3c7ad01553b27ec553688a1e6445e7f40355fb37d925c11fcb50b504e367f8",
-                "sha256:2f07b27dbf303ea53f4147a7922ce91a26b34a0011131471d8aaf73151fdee9a",
-                "sha256:316f0cf543013d0c085e15a2c8abe0db70f93c9722c0f99b6f3318ff69477d70",
-                "sha256:31d11a600eea0c60de22c8bdcb58cda63c762891facdcb74248c36713240987f",
-                "sha256:334ef3ffd0df87ea83a0054454336159f8ad9c1b389e19c0032d9cb8410660e6",
-                "sha256:358ba4693c01022d507b96a980ded855a32dbdccc3c9331d0667be5e967f30ed",
-                "sha256:3a6568bc53103df260f5c7d2da36dffc5202b9a36c85540bba1836a774943794",
-                "sha256:444bf2f44264578c4085bb04493bfed0e5c1b4fe7c2704504d769f955cc78fe4",
-                "sha256:47a00b22c52ee59dffc2aad02d0bbfb20c26ec5b8de8900492bf13ad6901cf35",
-                "sha256:4c067db43b331fc709080d441cb2e157114fec60749667d12186cc3fc8e7a951",
-                "sha256:4c092310f804a5d45a1bcaa4191d6d016c457b6ed3982a622c35f729ff1c7f6b",
-                "sha256:53b711b33134e292ef8499835a3df10909c58df53a2a0308f598c432e9a62892",
-                "sha256:568d6bee70652d8a5af1cd3eec48b4ca1696fb1773b80719ebbd2925b72cb8f6",
-                "sha256:56fa55032782b7f8e0bf6956420d11e2d4e9860598dfe9c504edec53af0fc372",
-                "sha256:5a2c492680c61b440272341294172fa3b3751797b1ab983533a770e4fb0a67ac",
-                "sha256:61235cc39b5b2f593086d1d38f3fc130b2d125bd8fc8621d35bc5b6bdeb92bd2",
-                "sha256:619ac9aaf681434b4d4718d1b31aa2f0fce64f2b3f8435688fcbdc0c818b6c54",
-                "sha256:6238ac1f483494011abde5286282afdfacd8926659e222ba9b74c67008d3a58c",
-                "sha256:63752a72ca4d4e1386278bd43d14232f51718b409e7ac86bcf8810826b531113",
-                "sha256:6fdc5ccb43864065d40dd838437952e9e3da9821b7eac605ba46ada77f846bdf",
-                "sha256:7abc3a6825a346fa4621a6f63e3b662bbb9e0f6ffc32d30a459d695f20fb1a8b",
-                "sha256:7aef381bb9ae8a3821abd7f9d4d93978dbd99072b48522e181baeffcd95b56ae",
-                "sha256:80df3caf251fe61a3f0c9614adc6e2bfcffd1cd3345280896766712fb4b4d6d7",
-                "sha256:95f970f34b59987dee6f360d2e7d30e181d58957b85dff929eee4423739bd151",
-                "sha256:993257f6ca3cde55332af1f62af3e04ca89ce63c08b56a387cdd46136c72f2fa",
-                "sha256:9c0a57390549affc2b5dda24a38de03a5c7cbc58750cd161ff5d106c3c6eec80",
-                "sha256:a0794e987d55d2f719cc95fcf980fc62d12b80e287e6a761c4be14c60bd9fecc",
-                "sha256:a3b98121e68bf370dd8ea09df67e916f93ea95b52fc010902312168c4d1aff5d",
-                "sha256:a60756d55f0887023b3899e6c2923ba5f0042fb11b1d17810b4e07395404f33e",
-                "sha256:a676bd2fbc2309092b9bbb0083d35718b5420af3a42135ebb1e4c3633f56604d",
-                "sha256:a732838c78554c1257ff2492f5c8c4c7312d0aecd7f732149e255f3749edd5ee",
-                "sha256:ad3dc88dfe61f0f1f9b99c6bc833ea2f45203a937a18f0d2faa57c6952656012",
-                "sha256:ae65d65fde4135ef423a2608587c9ef585a3551fc2e4e431e7c7e527047581be",
-                "sha256:b070a4f064a9edb70f921bfdc270725cff7a78c22036dd37a767c51393fb956f",
-                "sha256:b6da85949aa91e9f8c521681344bd2e163de894a5492337fba8b05c409225a4f",
-                "sha256:bbf47110765b2a999803a7de457567389253f8670f7daafb98e059c899ce9764",
-                "sha256:bd9c1e6f92b4888ae3ef7ae23262c513b962f09f3fb3b48581dde5df7d7a860a",
-                "sha256:c06b3f998d2d7160db58db69adfb807d2ec307e883e2f17f6b87a1ef6c723f11",
-                "sha256:c318fb70542be16d3d4063cde6010b1e4d328993a793529c15a619251f517c39",
-                "sha256:c4aef42e5fa4c9d5a99f751fb79caa880dac7eaf8a65121549318b984676a1b7",
-                "sha256:c9ca545e93a9c2a3bdaa2e6e21f7a43267ff0813e8055adf2b591c13164c0c57",
-                "sha256:da2c3220eb55c4239dd8b982e213da0b79023cac59fe54ca09365f2bc7e4ad32",
-                "sha256:dd8055da300535eefd446b30995c0813cc4394873c9509323762a93e97c04c03",
-                "sha256:e2b46e092ea54b732d98c476720386ff2ccd126de1e52076b470b117bff7e409",
-                "sha256:e334c4f39a2863a239d38b5829e442a87f241a92da9941861ee6ec5d6380b7fe",
-                "sha256:e5c54f04ca42bbb5153aec5d4f2e3d9f81e316945220ac318abd4083308143f5",
-                "sha256:f4d06764a06b137e48db6d569dc95614d9d225c89842c885669ee8abc9f28c7a",
-                "sha256:f96333f9d2517c752c20a35ff95de5fc2763ac8cdb1653df0f6f45d281620606"
+                "sha256:03dc64a9aa7a5d405aea5c56db95835f6a2fa31b3502c5af1760e0e99210be30",
+                "sha256:05fcc6f9c60e6efe5219fbb5a30258adb3d3e5cbd317068f3d73c09727f2abb6",
+                "sha256:076a7f2f7c251635cf6116ac8e45eefac77758ee5a77ab7bd2f63999e957613b",
+                "sha256:137e6fa718c7eff270dbd2fc4b90d94b1a69c9e9eb3f3de9e850a7fd33c822dc",
+                "sha256:1f865b1d1c191d785106f54df9abdc7d2f45a946b45fd1ea0a641b4f982a2a77",
+                "sha256:213c445fe7e654621c6309e874627c35354b46ef3ee807f5a1927dc4b30e1a67",
+                "sha256:25e617daf47d8dfd4e152c880cd0741cbdb48e51f54b8de9ddbfe74ecd87dd16",
+                "sha256:3d9bb1ba935a90ec4809a8031efd988bdb13cdba05d9e9a3e9bf151bf759ecde",
+                "sha256:40696a9a53faa7d85aaa6fd7bef1cae08f7882640bad08c350fb59dee7ad069b",
+                "sha256:421aa1b92c291c429668bd8d8d8ec2bd00f183483a756928e3afbf2b6f941f00",
+                "sha256:4437300eb3a5e9cc1a73b07d22c77302f872f339caca97e9bf8cf45eca8fa0d2",
+                "sha256:455f4deb00158d5ec8b1d3092df6abb681b225774ab8a59b3510293b4c8530e3",
+                "sha256:475a34a0745c456ceffaec4ce86b7e0983478f1b6140890dff7b161e7bcd895b",
+                "sha256:4797c0080f41eba90404335e5ded3aa66731d303293a675ff097ce4ea3025bb9",
+                "sha256:4ae23fbbe9eadf61279a26eba866bbf161a6f7e2ffad14a42cf20e9cb8e94166",
+                "sha256:4b32744901ee9990aa8cd488ec85634f443526def1e5190a407dc107148249d7",
+                "sha256:50127b13b38e8e586d5e97d342689405edbd74ad0bd891d97ee126a8c7b6e45f",
+                "sha256:50531caa7b4be1c4ed5e2d5793a4e51cc9bd62a919a6fd3299ef7c902e206eab",
+                "sha256:63a5387e496a98170ffe638b435c0832c0f2011a6f4ff7a2880f17669fff8c03",
+                "sha256:68220b81850de8e966d4667d5c325a96c6ac0d6adb3d18935d6e3d325d441f48",
+                "sha256:689142dc0c150e9cb7c012d84cac2c346d40beb891323afb6caf18ec4caafae0",
+                "sha256:6a15e2bee5c4188369a87ed6f02de804651152634a46cca91966a11c8abd2550",
+                "sha256:7122ffe597b531fb065d3314e704a6fe152b81820ca5f38543e70ffcc95ecfd4",
+                "sha256:7307024b18266b302f4265da84bb1effb5d18999ef35b30d17592959568d5c0a",
+                "sha256:7a4a6f5b818988a3917ec4baa91d1143242bdfece8d38305020463955961266a",
+                "sha256:83c5a3ecd96a9f3f11cfe6dfcbcec7323265340eb24cc996acaecea129865a3a",
+                "sha256:890b0f1e18dbd898aeb0ab9eae1ab159c6bcbe87f0abb065b0044581d8614062",
+                "sha256:8deda1f7b4c03242f2a8037706d9584e703f3d8c74d6d9cac5833db36fe16c42",
+                "sha256:8ea13d0348b4c96b437d944d7068d59ed4a6c98aaa6c40d8537a2981313f1c66",
+                "sha256:91e96bf85b7c07c827d339a386e8a3cf2e90ef098c42595227f729922d0851df",
+                "sha256:96782ebb3c9e91e174c333208b272ea144ed2a684413afb1038e3b3342230d72",
+                "sha256:9755c726aa6788f076114dfdc03b92b03ff8860316cca00902cce88bcdb5fedd",
+                "sha256:9dbab90c348c512e03f146e93a5e2610acec76df391043ecd46b6b775d5397e6",
+                "sha256:9ee0eef254e340cc11c379f797af3977992a7f2c176f1a658740c94bf677e13c",
+                "sha256:9fc17fdac8f1973850d42e51e8ba6149d93b1993ed6768a24f352f926dd3d587",
+                "sha256:a2787319dc69854acdfd6452e6a8ba8f929aeb20843c7f090e04159fc18e6245",
+                "sha256:b7c522292407fa04d8195032493aac937e253ad9ae524aab43b9d9d242571f03",
+                "sha256:bd312794f51e37dcf77f013d40650fe4fbb211dd55ef2863839c37480bd44369",
+                "sha256:c0d660a186e36c526366edf8a64391874fe53cf8b7039224137aee0163c046df",
+                "sha256:c4869141e20769b65d2d72686e7a7eb141ce9f3168106bed3e7dcced54eb2422",
+                "sha256:cc4057f692ac35bbe82a0a908d42ce3a281c9e913290fac37d7fa3bd01307dfb",
+                "sha256:cccf1e7806f12300e3a3b48f219e111000c2538483e85c869c35c1ae591e6ce9",
+                "sha256:ce208f80f398522e49d9db789065c8ad2cd37b21bd6b23d30053474b7416af11",
+                "sha256:d0565481dc196986c484a7fb13214fc6402201f7fb55c65fd215b3324962fe6c",
+                "sha256:d1b3366329c45a474b3bbc9b9c95d4c686e03f35da7fd12bc144626d1f2a7c04",
+                "sha256:d226e0d4b9192d95079a9a29c04dd81816b1ce8903b8c174a39224fe978547cb",
+                "sha256:d38b35f6eef4237b1d0d8e845fc1546dad85c55eba447e28c211da8c7ef9697c",
+                "sha256:d64c98277ea80e4484f1332ab107e8dfd173a7dcf1bdbf10a9cccc97aaab145f",
+                "sha256:d9de8427a5601799784eb0e7fa1b031aa64086ce04de29df775a8ca37eedac41",
+                "sha256:e6a15cf8f887d9f578dd49c6fb3a99d53e1d922fdd67a245a67488d77bf56eb2",
+                "sha256:e8c446882cbb3774cd78c738c9f58220606b702b7c1655f1423357dc51674054",
+                "sha256:e8d188ee39bd0ffe76603da887706e4e7b471f613625899ddf1e27867dc6a0d3",
+                "sha256:ef76535776c0708a85258f6dc51d36a2df12633c735f6d197ed7dfcaa7449b99",
+                "sha256:f6efca006a81e1197b925a7d7b16b8f61980697bb6746587aad8842865233218"
             ],
             ],
             "index": "pypi",
             "index": "pypi",
-            "version": "==3.10.1"
+            "version": "==3.11.0"
         },
         },
         "pymysql": {
         "pymysql": {
             "hashes": [
             "hashes": [
-                "sha256:3943fbbbc1e902f41daf7f9165519f140c4451c179380677e6a848587042561a",
-                "sha256:d8c059dcd81dedb85a9f034d5e22dcb4442c0b201908bede99e306d65ea7c8e7"
+                "sha256:263040d2779a3b84930f7ac9da5132be0fefcd6f453a885756656103f8ee1fdd",
+                "sha256:44f47128dda8676e021c8d2dbb49a82be9e4ab158b9f03e897152a3a287c69ea"
             ],
             ],
             "index": "pypi",
             "index": "pypi",
-            "version": "==0.9.3"
+            "version": "==0.10.1"
         },
         },
         "python-dateutil": {
         "python-dateutil": {
             "hashes": [
             "hashes": [
@@ -222,108 +311,153 @@
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "version": "==2.8.1"
             "version": "==2.8.1"
         },
         },
+        "pytype": {
+            "hashes": [
+                "sha256:01c2dc3664b550e5c571c432035eda85c5b1ba0bc2675f50bd24f226fda25fc2",
+                "sha256:1b63bfccdd68a8f8a80358fccf09c2a52b2e8d0e079e7ae9c034ba5df4356418",
+                "sha256:409ff5f52e767ec957014d1c5c1abf2e246446896d333c25f8f2a19de150f85e",
+                "sha256:6353e37f0df5037a1f18d0692b9b0b2d71ed0bb1e3b1d6d8d29458ef1a18cb81",
+                "sha256:926dea04b6fc9e396b69281679dbbe982f3825d8a3590ba63e671460d58ff192",
+                "sha256:e2ea11478665f7496f2e6f9b38956a01e47ab18462961ae5acfeb99c937dcef0",
+                "sha256:e97ff9dea170897e35fd1bf5934863176c7d97fbf533d2020ff0ab751dc2e389"
+            ],
+            "index": "pypi",
+            "version": "==2020.11.3"
+        },
         "pytz": {
         "pytz": {
             "hashes": [
             "hashes": [
-                "sha256:1c557d7d0e871de1f5ccd5833f60fb2550652da6be2693c1e02300743d21500d",
-                "sha256:b02c06db6cf09c12dd25137e563b31700d3b80fcc4ad23abb7a315f2789819be"
+                "sha256:3e6b7dd2d1e0a59084bcee14a17af60c5c562cdc16d828e8eba2e683d3a7e268",
+                "sha256:5c55e189b682d420be27c6995ba6edce0c0a77dd67bfbe2ae6607134d5851ffd"
             ],
             ],
-            "version": "==2019.3"
+            "version": "==2020.4"
+        },
+        "pyyaml": {
+            "hashes": [
+                "sha256:06a0d7ba600ce0b2d2fe2e78453a470b5a6e000a985dd4a4e54e436cc36b0e97",
+                "sha256:240097ff019d7c70a4922b6869d8a86407758333f02203e0fc6ff79c5dcede76",
+                "sha256:4f4b913ca1a7319b33cfb1369e91e50354d6f07a135f3b901aca02aa95940bd2",
+                "sha256:69f00dca373f240f842b2931fb2c7e14ddbacd1397d57157a9b005a6a9942648",
+                "sha256:73f099454b799e05e5ab51423c7bcf361c58d3206fa7b0d555426b1f4d9a3eaf",
+                "sha256:74809a57b329d6cc0fdccee6318f44b9b8649961fa73144a98735b0aaf029f1f",
+                "sha256:7739fc0fa8205b3ee8808aea45e968bc90082c10aef6ea95e855e10abf4a37b2",
+                "sha256:95f71d2af0ff4227885f7a6605c37fd53d3a106fcab511b8860ecca9fcf400ee",
+                "sha256:b8eac752c5e14d3eca0e6dd9199cd627518cb5ec06add0de9d32baeee6fe645d",
+                "sha256:cc8955cfbfc7a115fa81d85284ee61147059a753344bc51098f3ccd69b0d7e0c",
+                "sha256:d13155f591e6fcc1ec3b30685d50bf0711574e2c0dfffd7644babf8b5102ca1a"
+            ],
+            "version": "==5.3.1"
         },
         },
         "scikit-learn": {
         "scikit-learn": {
             "hashes": [
             "hashes": [
-                "sha256:1bf45e62799b6938357cfce19f72e3751448c4b27010e4f98553da669b5bbd86",
-                "sha256:267ad874b54c67b479c3b45eb132ef4a56ab2b27963410624a413a4e2a3fc388",
-                "sha256:2d1bb83d6c51a81193d8a6b5f31930e2959c0e1019d49bdd03f54163735dae4b",
-                "sha256:349ba3d837fb3f7cb2b91486c43713e4b7de17f9e852f165049b1b7ac2f81478",
-                "sha256:3f4d8eea3531d3eaf613fa33f711113dfff6021d57a49c9d319af4afb46f72f0",
-                "sha256:4990f0e166292d2a0f0ee528233723bcfd238bfdb3ec2512a9e27f5695362f35",
-                "sha256:57538d138ba54407d21e27c306735cbd42a6aae0df6a5a30c7a6edde46b0017d",
-                "sha256:5b722e8bb708f254af028dc2da86d23df5371cba57e24f889b672e7b15423caa",
-                "sha256:6043e2c4ccfc68328c331b0fc19691be8fb02bd76d694704843a23ad651de902",
-                "sha256:672ea38eb59b739a8907ec063642b486bcb5a2073dda5b72b7983eeaf1fd67c1",
-                "sha256:73207dca6e70f8f611f28add185cf3a793c8232a1722f21d82259560dc35cd50",
-                "sha256:83fc104a799cb340054e485c25dfeee712b36f5638fb374eba45a9db490f16ff",
-                "sha256:8416150ab505f1813da02cdbdd9f367b05bfc75cf251235015bb09f8674358a0",
-                "sha256:84e759a766c315deb5c85139ff879edbb0aabcddb9358acf499564ed1c21e337",
-                "sha256:8ed66ab27b3d68e57bb1f315fc35e595a5c4a1f108c3420943de4d18fc40e615",
-                "sha256:a7f8aa93f61aaad080b29a9018db93ded0586692c03ddf2122e47dd1d3a14e1b",
-                "sha256:ddd3bf82977908ff69303115dd5697606e669d8a7eafd7d83bb153ef9e11bd5e",
-                "sha256:de9933297f8659ee3bb330eafdd80d74cd73d5dab39a9026b65a4156bc479063",
-                "sha256:ea91a70a992ada395efc3d510cf011dc2d99dc9037bb38cd1cb00e14745005f5",
-                "sha256:eb4c9f0019abb374a2e55150f070a333c8f990b850d1eb4dfc2765fc317ffc7c",
-                "sha256:ffce8abfdcd459e72e5b91727b247b401b22253cbd18d251f842a60e26262d6f"
+                "sha256:0a127cc70990d4c15b1019680bfedc7fec6c23d14d3719fdf9b64b22d37cdeca",
+                "sha256:0d39748e7c9669ba648acf40fb3ce96b8a07b240db6888563a7cb76e05e0d9cc",
+                "sha256:1b8a391de95f6285a2f9adffb7db0892718950954b7149a70c783dc848f104ea",
+                "sha256:20766f515e6cd6f954554387dfae705d93c7b544ec0e6c6a5d8e006f6f7ef480",
+                "sha256:2aa95c2f17d2f80534156215c87bee72b6aa314a7f8b8fe92a2d71f47280570d",
+                "sha256:5ce7a8021c9defc2b75620571b350acc4a7d9763c25b7593621ef50f3bd019a2",
+                "sha256:6c28a1d00aae7c3c9568f61aafeaad813f0f01c729bee4fd9479e2132b215c1d",
+                "sha256:7671bbeddd7f4f9a6968f3b5442dac5f22bf1ba06709ef888cc9132ad354a9ab",
+                "sha256:914ac2b45a058d3f1338d7736200f7f3b094857758895f8667be8a81ff443b5b",
+                "sha256:98508723f44c61896a4e15894b2016762a55555fbf09365a0bb1870ecbd442de",
+                "sha256:a64817b050efd50f9abcfd311870073e500ae11b299683a519fbb52d85e08d25",
+                "sha256:cb3e76380312e1f86abd20340ab1d5b3cc46a26f6593d3c33c9ea3e4c7134028",
+                "sha256:d0dcaa54263307075cb93d0bee3ceb02821093b1b3d25f66021987d305d01dce",
+                "sha256:d9a1ce5f099f29c7c33181cc4386660e0ba891b21a60dc036bf369e3a3ee3aec",
+                "sha256:da8e7c302003dd765d92a5616678e591f347460ac7b53e53d667be7dfe6d1b10",
+                "sha256:daf276c465c38ef736a79bd79fc80a249f746bcbcae50c40945428f7ece074f8"
             ],
             ],
-            "markers": "python_version >= '3.5'",
-            "version": "==0.22.2.post1"
+            "markers": "python_version >= '3.6'",
+            "version": "==0.23.2"
         },
         },
         "scipy": {
         "scipy": {
             "hashes": [
             "hashes": [
-                "sha256:00af72998a46c25bdb5824d2b729e7dabec0c765f9deb0b504f928591f5ff9d4",
-                "sha256:0902a620a381f101e184a958459b36d3ee50f5effd186db76e131cbefcbb96f7",
-                "sha256:1e3190466d669d658233e8a583b854f6386dd62d655539b77b3fa25bfb2abb70",
-                "sha256:2cce3f9847a1a51019e8c5b47620da93950e58ebc611f13e0d11f4980ca5fecb",
-                "sha256:3092857f36b690a321a662fe5496cb816a7f4eecd875e1d36793d92d3f884073",
-                "sha256:386086e2972ed2db17cebf88610aab7d7f6e2c0ca30042dc9a89cf18dcc363fa",
-                "sha256:71eb180f22c49066f25d6df16f8709f215723317cc951d99e54dc88020ea57be",
-                "sha256:770254a280d741dd3436919d47e35712fb081a6ff8bafc0f319382b954b77802",
-                "sha256:787cc50cab3020a865640aba3485e9fbd161d4d3b0d03a967df1a2881320512d",
-                "sha256:8a07760d5c7f3a92e440ad3aedcc98891e915ce857664282ae3c0220f3301eb6",
-                "sha256:8d3bc3993b8e4be7eade6dcc6fd59a412d96d3a33fa42b0fa45dc9e24495ede9",
-                "sha256:9508a7c628a165c2c835f2497837bf6ac80eb25291055f56c129df3c943cbaf8",
-                "sha256:a144811318853a23d32a07bc7fd5561ff0cac5da643d96ed94a4ffe967d89672",
-                "sha256:a1aae70d52d0b074d8121333bc807a485f9f1e6a69742010b33780df2e60cfe0",
-                "sha256:a2d6df9eb074af7f08866598e4ef068a2b310d98f87dc23bd1b90ec7bdcec802",
-                "sha256:bb517872058a1f087c4528e7429b4a44533a902644987e7b2fe35ecc223bc408",
-                "sha256:c5cac0c0387272ee0e789e94a570ac51deb01c796b37fb2aad1fb13f85e2f97d",
-                "sha256:cc971a82ea1170e677443108703a2ec9ff0f70752258d0e9f5433d00dda01f59",
-                "sha256:dba8306f6da99e37ea08c08fef6e274b5bf8567bb094d1dbe86a20e532aca088",
-                "sha256:dc60bb302f48acf6da8ca4444cfa17d52c63c5415302a9ee77b3b21618090521",
-                "sha256:dee1bbf3a6c8f73b6b218cb28eed8dd13347ea2f87d572ce19b289d6fd3fbc59"
+                "sha256:168c45c0c32e23f613db7c9e4e780bc61982d71dcd406ead746c7c7c2f2004ce",
+                "sha256:213bc59191da2f479984ad4ec39406bf949a99aba70e9237b916ce7547b6ef42",
+                "sha256:25b241034215247481f53355e05f9e25462682b13bd9191359075682adcd9554",
+                "sha256:2c872de0c69ed20fb1a9b9cf6f77298b04a26f0b8720a5457be08be254366c6e",
+                "sha256:3397c129b479846d7eaa18f999369a24322d008fac0782e7828fa567358c36ce",
+                "sha256:368c0f69f93186309e1b4beb8e26d51dd6f5010b79264c0f1e9ca00cd92ea8c9",
+                "sha256:3d5db5d815370c28d938cf9b0809dade4acf7aba57eaf7ef733bfedc9b2474c4",
+                "sha256:4598cf03136067000855d6b44d7a1f4f46994164bcd450fb2c3d481afc25dd06",
+                "sha256:4a453d5e5689de62e5d38edf40af3f17560bfd63c9c5bd228c18c1f99afa155b",
+                "sha256:4f12d13ffbc16e988fa40809cbbd7a8b45bc05ff6ea0ba8e3e41f6f4db3a9e47",
+                "sha256:634568a3018bc16a83cda28d4f7aed0d803dd5618facb36e977e53b2df868443",
+                "sha256:65923bc3809524e46fb7eb4d6346552cbb6a1ffc41be748535aa502a2e3d3389",
+                "sha256:6b0ceb23560f46dd236a8ad4378fc40bad1783e997604ba845e131d6c680963e",
+                "sha256:8c8d6ca19c8497344b810b0b0344f8375af5f6bb9c98bd42e33f747417ab3f57",
+                "sha256:9ad4fcddcbf5dc67619379782e6aeef41218a79e17979aaed01ed099876c0e62",
+                "sha256:a254b98dbcc744c723a838c03b74a8a34c0558c9ac5c86d5561703362231107d",
+                "sha256:b03c4338d6d3d299e8ca494194c0ae4f611548da59e3c038813f1a43976cb437",
+                "sha256:cc1f78ebc982cd0602c9a7615d878396bec94908db67d4ecddca864d049112f2",
+                "sha256:d6d25c41a009e3c6b7e757338948d0076ee1dd1770d1c09ec131f11946883c54",
+                "sha256:d84cadd7d7998433334c99fa55bcba0d8b4aeff0edb123b2a1dfcface538e474",
+                "sha256:e360cb2299028d0b0d0f65a5c5e51fc16a335f1603aa2357c25766c8dab56938",
+                "sha256:e98d49a5717369d8241d6cf33ecb0ca72deee392414118198a8e5b4c35c56340",
+                "sha256:ed572470af2438b526ea574ff8f05e7f39b44ac37f712105e57fc4d53a6fb660",
+                "sha256:f87b39f4d69cf7d7529d7b1098cb712033b17ea7714aed831b95628f483fd012",
+                "sha256:fa789583fc94a7689b45834453fec095245c7e69c58561dc159b5d5277057e4c"
             ],
             ],
-            "markers": "python_version >= '3.5'",
-            "version": "==1.4.1"
+            "markers": "python_version >= '3.6'",
+            "version": "==1.5.4"
         },
         },
         "simplejson": {
         "simplejson": {
             "hashes": [
             "hashes": [
-                "sha256:0fe3994207485efb63d8f10a833ff31236ed27e3b23dadd0bf51c9900313f8f2",
-                "sha256:17163e643dbf125bb552de17c826b0161c68c970335d270e174363d19e7ea882",
-                "sha256:1d1e929cdd15151f3c0b2efe953b3281b2fd5ad5f234f77aca725f28486466f6",
-                "sha256:1d346c2c1d7dd79c118f0cc7ec5a1c4127e0c8ffc83e7b13fc5709ff78c9bb84",
-                "sha256:1ea59f570b9d4916ae5540a9181f9c978e16863383738b69a70363bc5e63c4cb",
-                "sha256:1fbba86098bbfc1f85c5b69dc9a6d009055104354e0d9880bb00b692e30e0078",
-                "sha256:229edb079d5dd81bf12da952d4d825bd68d1241381b37d3acf961b384c9934de",
-                "sha256:22a7acb81968a7c64eba7526af2cf566e7e2ded1cb5c83f0906b17ff1540f866",
-                "sha256:2b4b2b738b3b99819a17feaf118265d0753d5536049ea570b3c43b51c4701e81",
-                "sha256:4cf91aab51b02b3327c9d51897960c554f00891f9b31abd8a2f50fd4a0071ce8",
-                "sha256:4fd5f79590694ebff8dc980708e1c182d41ce1fda599a12189f0ca96bf41ad70",
-                "sha256:5cfd495527f8b85ce21db806567de52d98f5078a8e9427b18e251c68bd573a26",
-                "sha256:60aad424e47c5803276e332b2a861ed7a0d46560e8af53790c4c4fb3420c26c2",
-                "sha256:7739940d68b200877a15a5ff5149e1599737d6dd55e302625650629350466418",
-                "sha256:7cce4bac7e0d66f3a080b80212c2238e063211fe327f98d764c6acbc214497fc",
-                "sha256:8027bd5f1e633eb61b8239994e6fc3aba0346e76294beac22a892eb8faa92ba1",
-                "sha256:86afc5b5cbd42d706efd33f280fec7bd7e2772ef54e3f34cf6b30777cd19a614",
-                "sha256:87d349517b572964350cc1adc5a31b493bbcee284505e81637d0174b2758ba17",
-                "sha256:8de378d589eccbc75941e480b4d5b4db66f22e4232f87543b136b1f093fff342",
-                "sha256:926bcbef9eb60e798eabda9cd0bbcb0fca70d2779aa0aa56845749d973eb7ad5",
-                "sha256:9a126c3a91df5b1403e965ba63b304a50b53d8efc908a8c71545ed72535374a3",
-                "sha256:ad8dd3454d0c65c0f92945ac86f7b9efb67fa2040ba1b0189540e984df904378",
-                "sha256:d140e9376e7f73c1f9e0a8e3836caf5eec57bbafd99259d56979da05a6356388",
-                "sha256:da00675e5e483ead345429d4f1374ab8b949fba4429d60e71ee9d030ced64037",
-                "sha256:daaf4d11db982791be74b23ff4729af2c7da79316de0bebf880fa2d60bcc8c5a",
-                "sha256:f4b64a1031acf33e281fd9052336d6dad4d35eee3404c95431c8c6bc7a9c0588",
-                "sha256:fc046afda0ed8f5295212068266c92991ab1f4a50c6a7144b69364bdee4a0159",
-                "sha256:fc9051d249dd5512e541f20330a74592f7a65b2d62e18122ca89bf71f94db748"
+                "sha256:034550078a11664d77bc1a8364c90bb7eef0e44c2dbb1fd0a4d92e3997088667",
+                "sha256:05b43d568300c1cd43f95ff4bfcff984bc658aa001be91efb3bb21df9d6288d3",
+                "sha256:0dd9d9c738cb008bfc0862c9b8fa6743495c03a0ed543884bf92fb7d30f8d043",
+                "sha256:10fc250c3edea4abc15d930d77274ddb8df4803453dde7ad50c2f5565a18a4bb",
+                "sha256:2862beabfb9097a745a961426fe7daf66e1714151da8bb9a0c430dde3d59c7c0",
+                "sha256:292c2e3f53be314cc59853bd20a35bf1f965f3bc121e007ab6fd526ed412a85d",
+                "sha256:2d3eab2c3fe52007d703a26f71cf649a8c771fcdd949a3ae73041ba6797cfcf8",
+                "sha256:2e7b57c2c146f8e4dadf84977a83f7ee50da17c8861fd7faf694d55e3274784f",
+                "sha256:311f5dc2af07361725033b13cc3d0351de3da8bede3397d45650784c3f21fbcf",
+                "sha256:344e2d920a7f27b4023c087ab539877a1e39ce8e3e90b867e0bfa97829824748",
+                "sha256:3fabde09af43e0cbdee407555383063f8b45bfb52c361bc5da83fcffdb4fd278",
+                "sha256:42b8b8dd0799f78e067e2aaae97e60d58a8f63582939af60abce4c48631a0aa4",
+                "sha256:4b3442249d5e3893b90cb9f72c7d6ce4d2ea144d2c0d9f75b9ae1e5460f3121a",
+                "sha256:55d65f9cc1b733d85ef95ab11f559cce55c7649a2160da2ac7a078534da676c8",
+                "sha256:5c659a0efc80aaaba57fcd878855c8534ecb655a28ac8508885c50648e6e659d",
+                "sha256:72d8a3ffca19a901002d6b068cf746be85747571c6a7ba12cbcf427bfb4ed971",
+                "sha256:75ecc79f26d99222a084fbdd1ce5aad3ac3a8bd535cd9059528452da38b68841",
+                "sha256:76ac9605bf2f6d9b56abf6f9da9047a8782574ad3531c82eae774947ae99cc3f",
+                "sha256:7d276f69bfc8c7ba6c717ba8deaf28f9d3c8450ff0aa8713f5a3280e232be16b",
+                "sha256:7f10f8ba9c1b1430addc7dd385fc322e221559d3ae49b812aebf57470ce8de45",
+                "sha256:8042040af86a494a23c189b5aa0ea9433769cc029707833f261a79c98e3375f9",
+                "sha256:813846738277729d7db71b82176204abc7fdae2f566e2d9fcf874f9b6472e3e6",
+                "sha256:845a14f6deb124a3bcb98a62def067a67462a000e0508f256f9c18eff5847efc",
+                "sha256:869a183c8e44bc03be1b2bbcc9ec4338e37fa8557fc506bf6115887c1d3bb956",
+                "sha256:8acf76443cfb5c949b6e781c154278c059b09ac717d2757a830c869ba000cf8d",
+                "sha256:8f713ea65958ef40049b6c45c40c206ab363db9591ff5a49d89b448933fa5746",
+                "sha256:934115642c8ba9659b402c8bdbdedb48651fb94b576e3b3efd1ccb079609b04a",
+                "sha256:9551f23e09300a9a528f7af20e35c9f79686d46d646152a0c8fc41d2d074d9b0",
+                "sha256:9a2b7543559f8a1c9ed72724b549d8cc3515da7daf3e79813a15bdc4a769de25",
+                "sha256:a55c76254d7cf8d4494bc508e7abb993a82a192d0db4552421e5139235604625",
+                "sha256:ad8f41c2357b73bc9e8606d2fa226233bf4d55d85a8982ecdfd55823a6959995",
+                "sha256:af4868da7dd53296cd7630687161d53a7ebe2e63814234631445697bd7c29f46",
+                "sha256:afebfc3dd3520d37056f641969ce320b071bc7a0800639c71877b90d053e087f",
+                "sha256:b59aa298137ca74a744c1e6e22cfc0bf9dca3a2f41f51bc92eb05695155d905a",
+                "sha256:bc00d1210567a4cdd215ac6e17dc00cb9893ee521cee701adfd0fa43f7c73139",
+                "sha256:c1cb29b1fced01f97e6d5631c3edc2dadb424d1f4421dad079cb13fc97acb42f",
+                "sha256:c94dc64b1a389a416fc4218cd4799aa3756f25940cae33530a4f7f2f54f166da",
+                "sha256:ceaa28a5bce8a46a130cd223e895080e258a88d51bf6e8de2fc54a6ef7e38c34",
+                "sha256:cff6453e25204d3369c47b97dd34783ca820611bd334779d22192da23784194b",
+                "sha256:d0b64409df09edb4c365d95004775c988259efe9be39697d7315c42b7a5e7e94",
+                "sha256:d4813b30cb62d3b63ccc60dd12f2121780c7a3068db692daeb90f989877aaf04",
+                "sha256:da3c55cdc66cfc3fffb607db49a42448785ea2732f055ac1549b69dcb392663b",
+                "sha256:e058c7656c44fb494a11443191e381355388443d543f6fc1a245d5d238544396",
+                "sha256:fed0f22bf1313ff79c7fc318f7199d6c2f96d4de3234b2f12a1eab350e597c06",
+                "sha256:ffd4e4877a78c84d693e491b223385e0271278f5f4e1476a4962dca6824ecfeb"
             ],
             ],
             "index": "pypi",
             "index": "pypi",
-            "version": "==3.17.0"
+            "version": "==3.17.2"
         },
         },
         "six": {
         "six": {
             "hashes": [
             "hashes": [
-                "sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a",
-                "sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"
+                "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
+                "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
             ],
             ],
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==1.14.0"
+            "version": "==1.15.0"
         },
         },
         "sklearn": {
         "sklearn": {
             "hashes": [
             "hashes": [
@@ -333,50 +467,120 @@
         },
         },
         "sqlalchemy": {
         "sqlalchemy": {
             "hashes": [
             "hashes": [
-                "sha256:083e383a1dca8384d0ea6378bd182d83c600ed4ff4ec8247d3b2442cf70db1ad",
-                "sha256:0a690a6486658d03cc6a73536d46e796b6570ac1f8a7ec133f9e28c448b69828",
-                "sha256:114b6ace30001f056e944cebd46daef38fdb41ebb98f5e5940241a03ed6cad43",
-                "sha256:128f6179325f7597a46403dde0bf148478f868df44841348dfc8d158e00db1f9",
-                "sha256:13d48cd8b925b6893a4e59b2dfb3e59a5204fd8c98289aad353af78bd214db49",
-                "sha256:211a1ce7e825f7142121144bac76f53ac28b12172716a710f4bf3eab477e730b",
-                "sha256:2dc57ee80b76813759cccd1a7affedf9c4dbe5b065a91fb6092c9d8151d66078",
-                "sha256:3e625e283eecc15aee5b1ef77203bfb542563fa4a9aa622c7643c7b55438ff49",
-                "sha256:43078c7ec0457387c79b8d52fff90a7ad352ca4c7aa841c366238c3e2cf52fdf",
-                "sha256:5b1bf3c2c2dca738235ce08079783ef04f1a7fc5b21cf24adaae77f2da4e73c3",
-                "sha256:6056b671aeda3fc451382e52ab8a753c0d5f66ef2a5ccc8fa5ba7abd20988b4d",
-                "sha256:68d78cf4a9dfade2e6cf57c4be19f7b82ed66e67dacf93b32bb390c9bed12749",
-                "sha256:7025c639ce7e170db845e94006cf5f404e243e6fc00d6c86fa19e8ad8d411880",
-                "sha256:7224e126c00b8178dfd227bc337ba5e754b197a3867d33b9f30dc0208f773d70",
-                "sha256:7d98e0785c4cd7ae30b4a451416db71f5724a1839025544b4edbd92e00b91f0f",
-                "sha256:8d8c21e9d4efef01351bf28513648ceb988031be4159745a7ad1b3e28c8ff68a",
-                "sha256:bbb545da054e6297242a1bb1ba88e7a8ffb679f518258d66798ec712b82e4e07",
-                "sha256:d00b393f05dbd4ecd65c989b7f5a81110eae4baea7a6a4cdd94c20a908d1456e",
-                "sha256:e18752cecaef61031252ca72031d4d6247b3212ebb84748fc5d1a0d2029c23ea"
+                "sha256:009e8388d4d551a2107632921320886650b46332f61dc935e70c8bcf37d8e0d6",
+                "sha256:0157c269701d88f5faf1fa0e4560e4d814f210c01a5b55df3cab95e9346a8bcc",
+                "sha256:0a92745bb1ebbcb3985ed7bda379b94627f0edbc6c82e9e4bac4fb5647ae609a",
+                "sha256:0cca1844ba870e81c03633a99aa3dc62256fb96323431a5dec7d4e503c26372d",
+                "sha256:166917a729b9226decff29416f212c516227c2eb8a9c9f920d69ced24e30109f",
+                "sha256:1f5f369202912be72fdf9a8f25067a5ece31a2b38507bb869306f173336348da",
+                "sha256:2909dffe5c9a615b7e6c92d1ac2d31e3026dc436440a4f750f4749d114d88ceb",
+                "sha256:2b5dafed97f778e9901b79cc01b88d39c605e0545b4541f2551a2fd785adc15b",
+                "sha256:2e9bd5b23bba8ae8ce4219c9333974ff5e103c857d9ff0e4b73dc4cb244c7d86",
+                "sha256:3aa6d45e149a16aa1f0c46816397e12313d5e37f22205c26e06975e150ffcf2a",
+                "sha256:4bdbdb8ca577c6c366d15791747c1de6ab14529115a2eb52774240c412a7b403",
+                "sha256:53fd857c6c8ffc0aa6a5a3a2619f6a74247e42ec9e46b836a8ffa4abe7aab327",
+                "sha256:5cdfe54c1e37279dc70d92815464b77cd8ee30725adc9350f06074f91dbfeed2",
+                "sha256:5d92c18458a4aa27497a986038d5d797b5279268a2de303cd00910658e8d149c",
+                "sha256:632b32183c0cb0053194a4085c304bc2320e5299f77e3024556fa2aa395c2a8b",
+                "sha256:7c735c7a6db8ee9554a3935e741cf288f7dcbe8706320251eb38c412e6a4281d",
+                "sha256:7cd40cb4bc50d9e87b3540b23df6e6b24821ba7e1f305c1492b0806c33dbdbec",
+                "sha256:84f0ac4a09971536b38cc5d515d6add7926a7e13baa25135a1dbb6afa351a376",
+                "sha256:8dcbf377529a9af167cbfc5b8acec0fadd7c2357fc282a1494c222d3abfc9629",
+                "sha256:950f0e17ffba7a7ceb0dd056567bc5ade22a11a75920b0e8298865dc28c0eff6",
+                "sha256:9e379674728f43a0cd95c423ac0e95262500f9bfd81d33b999daa8ea1756d162",
+                "sha256:b15002b9788ffe84e42baffc334739d3b68008a973d65fad0a410ca5d0531980",
+                "sha256:b6f036ecc017ec2e2cc2a40615b41850dc7aaaea6a932628c0afc73ab98ba3fb",
+                "sha256:bad73f9888d30f9e1d57ac8829f8a12091bdee4949b91db279569774a866a18e",
+                "sha256:bbc58fca72ce45a64bb02b87f73df58e29848b693869e58bd890b2ddbb42d83b",
+                "sha256:bca4d367a725694dae3dfdc86cf1d1622b9f414e70bd19651f5ac4fb3aa96d61",
+                "sha256:be41d5de7a8e241864189b7530ca4aaf56a5204332caa70555c2d96379e18079",
+                "sha256:bf53d8dddfc3e53a5bda65f7f4aa40fae306843641e3e8e701c18a5609471edf",
+                "sha256:c092fe282de83d48e64d306b4bce03114859cdbfe19bf8a978a78a0d44ddadb1",
+                "sha256:c3ab23ee9674336654bf9cac30eb75ac6acb9150dc4b1391bec533a7a4126471",
+                "sha256:ce64a44c867d128ab8e675f587aae7f61bd2db836a3c4ba522d884cd7c298a77",
+                "sha256:d05cef4a164b44ffda58200efcb22355350979e000828479971ebca49b82ddb1",
+                "sha256:d2f25c7f410338d31666d7ddedfa67570900e248b940d186b48461bd4e5569a1",
+                "sha256:d3b709d64b5cf064972b3763b47139e4a0dc4ae28a36437757f7663f67b99710",
+                "sha256:e32e3455db14602b6117f0f422f46bc297a3853ae2c322ecd1e2c4c04daf6ed5",
+                "sha256:ed53209b5f0f383acb49a927179fa51a6e2259878e164273ebc6815f3a752465",
+                "sha256:f605f348f4e6a2ba00acb3399c71d213b92f27f2383fc4abebf7a37368c12142",
+                "sha256:fcdb3755a7c355bc29df1b5e6fb8226d5c8b90551d202d69d0076a8a5649d68b"
             ],
             ],
             "index": "pypi",
             "index": "pypi",
-            "version": "==1.3.16"
+            "version": "==1.3.20"
         },
         },
         "sqlalchemy-utils": {
         "sqlalchemy-utils": {
             "hashes": [
             "hashes": [
-                "sha256:f268af5bc03597fe7690d60df3e5f1193254a83e07e4686f720f61587ec4493a"
+                "sha256:fb66e9956e41340011b70b80f898fde6064ec1817af77199ee21ace71d7d6ab0"
             ],
             ],
-            "version": "==0.36.3"
+            "version": "==0.36.8"
         },
         },
         "sqlparse": {
         "sqlparse": {
             "hashes": [
             "hashes": [
-                "sha256:022fb9c87b524d1f7862b3037e541f68597a730a8843245c349fc93e1643dc4e",
-                "sha256:e162203737712307dfe78860cc56c8da8a852ab2ee33750e33aeadf38d12c548"
+                "sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0",
+                "sha256:0f91fd2e829c44362cbcfab3e9ae12e22badaa8a29ad5ff599f9ec109f0454e8"
             ],
             ],
             "index": "pypi",
             "index": "pypi",
-            "version": "==0.3.1"
+            "version": "==0.4.1"
+        },
+        "threadpoolctl": {
+            "hashes": [
+                "sha256:38b74ca20ff3bb42caca8b00055111d74159ee95c4370882bbff2b93d24da725",
+                "sha256:ddc57c96a38beb63db45d6c159b5ab07b6bced12c45a1f07b2b92f272aebfa6b"
+            ],
+            "markers": "python_version >= '3.5'",
+            "version": "==2.1.0"
         },
         },
         "tqdm": {
         "tqdm": {
             "hashes": [
             "hashes": [
-                "sha256:00339634a22c10a7a22476ee946bbde2dbe48d042ded784e4d88e0236eca5d81",
-                "sha256:ea9e3fd6bd9a37e8783d75bfc4c1faf3c6813da6bd1c3e776488b41ec683af94"
+                "sha256:9ad44aaf0fc3697c06f6e05c7cf025dd66bc7bcb7613c66d85f4464c47ac8fad",
+                "sha256:ef54779f1c09f346b2b5a8e5c61f96fbcb639929e640e59f8cf810794f406432"
             ],
             ],
             "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
             "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'",
-            "version": "==4.45.0"
+            "version": "==4.51.0"
+        },
+        "typed-ast": {
+            "hashes": [
+                "sha256:0666aa36131496aed8f7be0410ff974562ab7eeac11ef351def9ea6fa28f6355",
+                "sha256:0c2c07682d61a629b68433afb159376e24e5b2fd4641d35424e462169c0a7919",
+                "sha256:0d8110d78a5736e16e26213114a38ca35cb15b6515d535413b090bd50951556d",
+                "sha256:249862707802d40f7f29f6e1aad8d84b5aa9e44552d2cc17384b209f091276aa",
+                "sha256:24995c843eb0ad11a4527b026b4dde3da70e1f2d8806c99b7b4a7cf491612652",
+                "sha256:269151951236b0f9a6f04015a9004084a5ab0d5f19b57de779f908621e7d8b75",
+                "sha256:3742b32cf1c6ef124d57f95be609c473d7ec4c14d0090e5a5e05a15269fb4d0c",
+                "sha256:4083861b0aa07990b619bd7ddc365eb7fa4b817e99cf5f8d9cf21a42780f6e01",
+                "sha256:498b0f36cc7054c1fead3d7fc59d2150f4d5c6c56ba7fb150c013fbc683a8d2d",
+                "sha256:4e3e5da80ccbebfff202a67bf900d081906c358ccc3d5e3c8aea42fdfdfd51c1",
+                "sha256:6daac9731f172c2a22ade6ed0c00197ee7cc1221aa84cfdf9c31defeb059a907",
+                "sha256:715ff2f2df46121071622063fc7543d9b1fd19ebfc4f5c8895af64a77a8c852c",
+                "sha256:73d785a950fc82dd2a25897d525d003f6378d1cb23ab305578394694202a58c3",
+                "sha256:7e4c9d7658aaa1fc80018593abdf8598bf91325af6af5cce4ce7c73bc45ea53d",
+                "sha256:8c8aaad94455178e3187ab22c8b01a3837f8ee50e09cf31f1ba129eb293ec30b",
+                "sha256:8ce678dbaf790dbdb3eba24056d5364fb45944f33553dd5869b7580cdbb83614",
+                "sha256:92c325624e304ebf0e025d1224b77dd4e6393f18aab8d829b5b7e04afe9b7a2c",
+                "sha256:aaee9905aee35ba5905cfb3c62f3e83b3bec7b39413f0a7f19be4e547ea01ebb",
+                "sha256:b52ccf7cfe4ce2a1064b18594381bccf4179c2ecf7f513134ec2f993dd4ab395",
+                "sha256:bcd3b13b56ea479b3650b82cabd6b5343a625b0ced5429e4ccad28a8973f301b",
+                "sha256:c9e348e02e4d2b4a8b2eedb48210430658df6951fa484e59de33ff773fbd4b41",
+                "sha256:d205b1b46085271b4e15f670058ce182bd1199e56b317bf2ec004b6a44f911f6",
+                "sha256:d43943ef777f9a1c42bf4e552ba23ac77a6351de620aa9acf64ad54933ad4d34",
+                "sha256:d5d33e9e7af3b34a40dc05f498939f0ebf187f07c385fd58d591c533ad8562fe",
+                "sha256:d648b8e3bf2fe648745c8ffcee3db3ff903d0817a01a12dd6a6ea7a8f4889072",
+                "sha256:f208eb7aff048f6bea9586e61af041ddf7f9ade7caed625742af423f6bae3298",
+                "sha256:fac11badff8313e23717f3dada86a15389d0708275bddf766cca67a84ead3e91",
+                "sha256:fc0fea399acb12edbf8a628ba8d2312f583bdbdb3335635db062fa98cf71fca4",
+                "sha256:fcf135e17cc74dbfbc05894ebca928ffeb23d9790b3167a674921db19082401f",
+                "sha256:fe460b922ec15dd205595c9b5b99e2f056fd98ae8f9f56b888e7a17dc2b757e7"
+            ],
+            "version": "==1.4.1"
+        },
+        "typing-extensions": {
+            "hashes": [
+                "sha256:7cb407020f00f7bfc3cb3e7881628838e69d8f3fcab2f64742a5e76b2f841918",
+                "sha256:99d4073b617d30288f569d3f13d2bd7548c3a7e4c8de87db09a9d29bb3a4a60c",
+                "sha256:dafc7639cde7f1b6e1acc0f457842a83e722ccca8eef5270af2d74792619a89f"
+            ],
+            "version": "==3.7.4.3"
         }
         }
     },
     },
     "develop": {}
     "develop": {}

+ 1 - 2
cdplib/db_handlers/SQLHandler.py

@@ -508,7 +508,6 @@ class SQLHandler:
         :rtype: DataFrame
         :rtype: DataFrame
         '''
         '''
         try:
         try:
-            
             connection = self._engine.connect()
             connection = self._engine.connect()
 
 
             data = pd.read_sql(sql=query,
             data = pd.read_sql(sql=query,
@@ -516,7 +515,7 @@ class SQLHandler:
                                **read_sql_kwargs)
                                **read_sql_kwargs)
 
 
             connection.close()
             connection.close()
-           
+
             return data
             return data
 
 
         except Exception as e:
         except Exception as e:

+ 173 - 0
cdplib/fine_tuning/FineTunedClassiferCV.py

@@ -0,0 +1,173 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Apr 23 08:51:53 2020
+
+@author: tanya
+
+@description: class for fine-tuning a sklearn classifier
+(optimizing the probability threshold)
+"""
+
+import pandas as pd
+import numpy as np
+
+from typing import Callable
+
+from sklearn.base import (BaseEstimator, ClassifierMixin,
+                          clone, MetaEstimatorMixin)
+
+from cdplib.log import Log
+
+from cdplib.utils.TyperConverter import TypeConverter
+
+
+class FineTunedClassifierCV(BaseEstimator, ClassifierMixin,
+                            MetaEstimatorMixin):
+    """
+    Probability threshold tuning for a given estimator.
+    Overrides the method predict of the given sklearn classifer
+    and returns predictions with the optimal value of
+    the probability threshold.
+
+    An object of this class can be passed to an sklearn Pipeline
+    """
+    def __init__(self, estimator, cost_func: Callable, greater_is_better: bool,
+                 cv=None, threshold_step: float = 0.1):
+        """
+        """
+        self.estimator = estimator
+
+        self.is_fitted = False
+
+        self.greater_is_better = greater_is_better
+
+        if cv is None:
+            self.cv = ...
+        else:
+            self.cv = cv
+
+        self.cost_func = cost_func
+
+        self.threshold_step = threshold_step
+
+        self.optimal_threshold = 0.5
+
+        self._logger = Log("FineTunedClassifyCV")
+
+    def _get_best_threshold(self, y_val: (pd.DataFrame, np.array),
+                            proba_pred: (pd.DataFrame, np.array)):
+        '''
+        '''
+        costs = {}
+
+        for t in np.arange(self.threshold_step, 1, self.threshold_step):
+            costs[t] = self.cost_func(y_val, (proba_pred >= t).astype(int))
+
+        if self.greater_is_better:
+            return max(costs, key=costs.get)
+        else:
+            return min(costs, key=costs.get)
+
+    def fit(self, X: (pd.DataFrame, np.array),
+            y: (pd.DataFrame, np.array) = None,
+            **fit_args):
+        """
+        """
+        X = TypeConverter().convert_to_ndarray(X)
+        if y is not None:
+            y = TypeConverter().convert_to_ndarray(X)
+
+        optimal_thrs_per_fold = []
+
+        for train_inds, val_inds in self.cv:
+            X_train, X_val = X[train_inds], X[val_inds]
+
+            if y is not None:
+                y_train, y_val = y[train_inds], y[val_inds]
+            else:
+                y_train, y_val = None, None
+
+            estimator = clone(fine_tuned_clf.estimator)
+
+            estimator.fit(X_train, y_train, **fit_args)
+
+            proba_pred = estimator.predict_proba(X_val)
+
+            optimal_thr = self._get_best_threshold(y_val, proba_pred)
+
+            optimal_thrs_per_fold.append(optimal_thr)
+
+        self.optimal_threshold = np.mean(optimal_thrs_per_fold)
+
+        self.estimator.fit(X, **fit_args)
+
+    def predict(self, X: (pd.DataFrame, np.array)) -> np.array:
+        """
+        """
+        if self.is_fitted:
+
+            proba_pred = self.estimator.predict_proba(X)
+
+            return (proba_pred >= self.optimal_threshold).astype(int)
+
+        else:
+            self._logger.warn("You should fit first")
+
+    def get_params(self):
+        """
+        """
+        params = self.estimator.get_params()
+
+        params.update({"cv": self.cv, "cost_func": self.cost_func})
+
+        return params
+
+    def set_params(self, **params: dict):
+        """
+        """
+        for param in params:
+            if param == "cv":
+                self.cv = params[param]
+                params.pop(param)
+
+            elif param == "cost_func":
+                self.cost_func = params[param]
+                params.pop(param)
+
+        self.estimator.set_params(**params)
+
+
+if __name__ == "__main__":
+    # test
+    from sklearn.datasets import load_iris
+    from sklearn.metrics import accuracy_score
+    import gc
+    from xgboost import XGBRFClassifier
+
+    data = load_iris()
+    X, y = data["data"], data["target"]
+    y = (y==1).astype(int)
+    del data
+    gc.collect()
+
+    # make a custom cv object
+    val_len = len(X)//10
+    split_inds = range(len(X)//2, len(X), val_len)
+
+    cv = []
+
+    for i in split_inds:
+        train_inds = list(range(i))
+        val_inds = list(range(i, i + val_len))
+        cv.append((train_inds, val_inds))
+
+    clf = XGBRFClassifier()
+
+    fine_tuned_clf = FineTunedClassifierCV(estimator=clf,
+                                           cv=cv,
+                                           greater_is_better=True,
+                                           cost_func=accuracy_score)
+
+    fine_tuned_clf.fit(X=X, y=y)
+

+ 47 - 51
cdplib/gridsearch/GridSearchPipelineSelector.py

@@ -14,16 +14,15 @@ Created on Wed Sep 30 14:15:17 2020
 """
 """
 
 
 import os
 import os
-import sys
 import datetime
 import datetime
+import numpy as np
 from itertools import product
 from itertools import product
 from collections import ChainMap
 from collections import ChainMap
 from sklearn.pipeline import Pipeline
 from sklearn.pipeline import Pipeline
+from typing import Callable, Optional, Literal, Dict, Union, List
 
 
 from cdplib.pipeline_selector.PipelineSelector import PipelineSelector
 from cdplib.pipeline_selector.PipelineSelector import PipelineSelector
 
 
-sys.path.append(os.getcwd())
-
 
 
 class GridSearchPipelineSelector(PipelineSelector):
 class GridSearchPipelineSelector(PipelineSelector):
     """
     """
@@ -36,17 +35,19 @@ class GridSearchPipelineSelector(PipelineSelector):
      if needed.
      if needed.
     """
     """
     def __init__(self,
     def __init__(self,
-                 cost_func,
+                 cost_func: Union[Callable, str],
                  greater_is_better: bool,
                  greater_is_better: bool,
                  trials_path: str,
                  trials_path: str,
-                 backup_trials_freq: int = 1,
-                 cross_val_averaging_func: callable = None,
-                 additional_metrics: dict = None,
-                 strategy_name: str = None,
-                 stdout_log_level: str = "INFO"
+                 backup_trials_freq: Optional[int] = None,
+                 cross_val_averaging_func: Callable = np.mean,
+                 additional_metrics: Optional[Dict[str, Callable]] = None,
+                 strategy_name: Optional[str] = None,
+                 stdout_log_level: Literal["INFO", "WARNING", "ERROR"]
+                 = "INFO"
                  ):
                  ):
         """
         """
-        :param callable cost_func: function to minimize or maximize
+        ::param Callable cost_func: function to minimize or maximize
+            over the elements of a given (pipeline/hyperparameter) space
 
 
         :param bool greater_is_better: when True
         :param bool greater_is_better: when True
             cost_func is maximized, else minimized.
             cost_func is maximized, else minimized.
@@ -56,25 +57,24 @@ class GridSearchPipelineSelector(PipelineSelector):
             select information about the obtained scores, score variations,
             select information about the obtained scores, score variations,
             and pipelines, and parameters tried out so far. If a trials object
             and pipelines, and parameters tried out so far. If a trials object
             already exists at the given path, it is loaded and the
             already exists at the given path, it is loaded and the
-            search is continued, else, the search is started from
-            the beginning.
+            search is continued, else, the search is started from scratch.
 
 
         :param backup_trials_freq: frequecy in interations (trials)
         :param backup_trials_freq: frequecy in interations (trials)
             of saving the trials object at the trials_path.
             of saving the trials object at the trials_path.
+            if None, the trials object is backed up avery time
+            the score improves.
 
 
-        :param str log_path: Optional, when not provided logs to stdout.
+        :param Callable cross_val_averaging_func: Function to aggregate
+            the cross-validation scores.
+            Example different from the mean: mean - c*var.
 
 
-        :param callable averaging_func: optional,
-            when not provided set to mean. Function
-            to aggregate the cross-validated values of the cost function.
-            Classic situation is to take the mean,
-            another example is, for example mean() - c*var().
         :param additional_metics: dict of additional metrics to save
         :param additional_metics: dict of additional metrics to save
             of the form {"metric_name": metric} where metric is a Callable.
             of the form {"metric_name": metric} where metric is a Callable.
 
 
-        :param str strategy_name: a name might be asigned to the trials,
-            a strategy is defined by the data set, cv object, cost function.
-            When the strategy changes, one should start with new trials.
+        :param str strategy_name:
+            a strategy is defined by the data set (columns/features and rows),
+            cv object, cost function.
+            When the strategy changes, one must start with new trials.
 
 
         :param str stdout_log_level: can be INFO, WARNING, ERROR
         :param str stdout_log_level: can be INFO, WARNING, ERROR
         """
         """
@@ -99,7 +99,7 @@ class GridSearchPipelineSelector(PipelineSelector):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def run_trials(self):
+    def run_trials(self) -> None:
         """
         """
         """
         """
         try:
         try:
@@ -115,22 +115,25 @@ class GridSearchPipelineSelector(PipelineSelector):
             # with all different combinations of
             # with all different combinations of
             # parameters for different pipelines
             # parameters for different pipelines
             # from the space definition.
             # from the space definition.
-            space_unfolded = ({"name": pipeline_dist["name"],
-                               "pipeline": pipeline_dist["pipeline"],
+            space_unfolded = ({"name": param_dist["name"],
+                               "pipeline": param_dist["pipeline"],
                                "params": param_set}
                                "params": param_set}
-                              for pipeline_dist in self._space
+                              for param_dist in self._space
                               for param_set in
                               for param_set in
                               (dict(ChainMap(*tup)) for tup in
                               (dict(ChainMap(*tup)) for tup in
                                product(*[[{k: v} for v in
                                product(*[[{k: v} for v in
-                                          pipeline_dist["params"][k]]
-                                         for k in pipeline_dist["params"]])))
+                                          param_dist["params"][k]]
+                                         for k in param_dist["params"]])))
 
 
             for space_element in space_unfolded:
             for space_element in space_unfolded:
 
 
+                # uniquely identifies the current space element
                 trial_id = {"name": space_element["name"],
                 trial_id = {"name": space_element["name"],
                             "params": space_element["params"],
                             "params": space_element["params"],
                             "status": 'ok'}
                             "status": 'ok'}
 
 
+                # verify if the current pipline/parameters
+                # were already tested before
                 if trial_id in done_trial_ids:
                 if trial_id in done_trial_ids:
                     continue
                     continue
 
 
@@ -159,15 +162,12 @@ class GridSearchPipelineSelector(PipelineSelector):
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def number_of_trials(self) -> int:
+    def number_of_trials(self) -> Union[int, None]:
         """
         """
         Number of trials already run in the current trials object
         Number of trials already run in the current trials object
         """
         """
         try:
         try:
-            if self._trials is None:
-                return 0
-            else:
-                return len(self._trials)
+            return len(self._trials)
 
 
         except Exception as e:
         except Exception as e:
             err = ("Failed to retrieve the number of trials. "
             err = ("Failed to retrieve the number of trials. "
@@ -176,11 +176,11 @@ class GridSearchPipelineSelector(PipelineSelector):
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def best_trial(self) -> dict:
+    def best_trial(self) -> Union[dict, None]:
         """
         """
         """
         """
         try:
         try:
-            assert(self._trials is not None),\
+            assert(len(self._trials) > 0),\
                 ("Trials object is empty. "
                 ("Trials object is empty. "
                  "Call run_trials method.")
                  "Call run_trials method.")
 
 
@@ -193,11 +193,11 @@ class GridSearchPipelineSelector(PipelineSelector):
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def best_trial_score(self) -> float:
+    def best_trial_score(self) -> Union[float, None]:
         '''
         '''
         '''
         '''
         try:
         try:
-            assert(self._trials is not None),\
+            assert(len(self._trials) > 0),\
                 ("Trials object is empty. "
                 ("Trials object is empty. "
                  "Call run_trials method.")
                  "Call run_trials method.")
 
 
@@ -210,11 +210,11 @@ class GridSearchPipelineSelector(PipelineSelector):
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def best_trial_score_variance(self) -> float:
+    def best_trial_score_variance(self) -> Union[float, None]:
         '''
         '''
         '''
         '''
         try:
         try:
-            assert(self._trials is not None),\
+            assert(len(self._trials) > 0),\
                 ("Trials object is empty. "
                 ("Trials object is empty. "
                  "Call run_trials method.")
                  "Call run_trials method.")
 
 
@@ -227,11 +227,11 @@ class GridSearchPipelineSelector(PipelineSelector):
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def best_trial_pipeline(self) -> Pipeline:
+    def best_trial_pipeline(self) -> Union[Pipeline, None]:
         '''
         '''
         '''
         '''
         try:
         try:
-            assert(self._trials is not None),\
+            assert(len(self._trials) > 0),\
                 ("Trials object is empty. "
                 ("Trials object is empty. "
                  "Call run_trials method.")
                  "Call run_trials method.")
 
 
@@ -243,16 +243,14 @@ class GridSearchPipelineSelector(PipelineSelector):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def get_n_best_trial_pipelines(self, n: int) -> list:
+    def get_n_best_trial_pipelines(self, n: int)\
+            -> Union[List[Pipeline], None]:
         """
         """
         N best pipelines with corresponding
         N best pipelines with corresponding
         best hyperparameters
         best hyperparameters
         """
         """
         try:
         try:
-            assert(isinstance(n, int)),\
-                "Parameter n must be an int"
-
-            assert(self._trials is not None),\
+            assert(len(self._trials) > 0),\
                 ("Trials object is empty. "
                 ("Trials object is empty. "
                  "Call run_trials method.")
                  "Call run_trials method.")
 
 
@@ -266,17 +264,15 @@ class GridSearchPipelineSelector(PipelineSelector):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def get_n_best_trial_pipelines_of_each_type(self, n: int) -> list:
+    def get_n_best_trial_pipelines_of_each_type(self, n: int)\
+            -> Union[Dict[str, List[Pipeline]], None]:
         """
         """
         If the hyperparameter search is done over multiple
         If the hyperparameter search is done over multiple
         pipelines, then returns n different pipeline-types
         pipelines, then returns n different pipeline-types
         with corresponding hyperparameters
         with corresponding hyperparameters
         """
         """
         try:
         try:
-            assert(isinstance(n, int)),\
-                "Parameter n must be an int"
-
-            assert(self._trials is not None),\
+            assert(len(self._trials) > 0),\
                 ("Trials object is empty. "
                 ("Trials object is empty. "
                  "Call run_trials method.")
                  "Call run_trials method.")
 
 
@@ -295,7 +291,7 @@ class GridSearchPipelineSelector(PipelineSelector):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def trials_to_excel(self, path: str):
+    def trials_to_excel(self, path: str) -> None:
         """
         """
         Trials object in the shape of table written to excel,
         Trials object in the shape of table written to excel,
         should contain the run number, pipeline (as str),
         should contain the run number, pipeline (as str),

+ 2 - 4
cdplib/hyperopt/HyperoptPipelineSelection.py

@@ -480,8 +480,6 @@ class HyperoptPipelineSelection:
                         trials=self._trials,
                         trials=self._trials,
                         max_evals=len(self._trials.trials) + niter)
                         max_evals=len(self._trials.trials) + niter)
 
 
-            # print('AAAA', str(niter))
-
             self._logger.info(
             self._logger.info(
                     "Best score is {0} with variance {1}"
                     "Best score is {0} with variance {1}"
                     .format(
                     .format(
@@ -589,8 +587,8 @@ class HyperoptPipelineSelection:
                 losses = [self._ith_trial_loss(i)
                 losses = [self._ith_trial_loss(i)
                           for i in range(len(self._trials.trials))]
                           for i in range(len(self._trials.trials))]
 
 
-            best_n_indices = [losses.index(l)
-                              for l in sorted(list(set(losses)))[:n]]
+            best_n_indices = [losses.index(ll)
+                              for ll in sorted(list(set(losses)))[:n]]
 
 
             return [self._ith_trial_pipeline(i) for i in best_n_indices]
             return [self._ith_trial_pipeline(i) for i in best_n_indices]
         else:
         else:

+ 60 - 60
cdplib/hyperopt/HyperoptPipelineSelector.py

@@ -21,8 +21,6 @@ from copy import deepcopy
 
 
 import datetime
 import datetime
 
 
-from typing import Callable
-
 import pandas as pd
 import pandas as pd
 import numpy as np
 import numpy as np
 
 
@@ -30,7 +28,10 @@ from sklearn.pipeline import Pipeline
 
 
 from hyperopt import fmin, tpe, rand, Trials, space_eval
 from hyperopt import fmin, tpe, rand, Trials, space_eval
 
 
-from cdplib.pipeline_selector.PipelineSelector import PipelineSelector
+from cdplib.pipeline_selector.PipelineSelector import PipelineSelector,\
+     SpaceElementType
+
+from typing import Callable, Optional, Literal, Dict, Union, List
 
 
 
 
 class HyperoptPipelineSelector(PipelineSelector):
 class HyperoptPipelineSelector(PipelineSelector):
@@ -52,16 +53,18 @@ class HyperoptPipelineSelector(PipelineSelector):
     a better pipeline was found.
     a better pipeline was found.
     """
     """
     def __init__(self,
     def __init__(self,
-                 cost_func: (Callable, str),
+                 cost_func: Union[Callable, str],
                  greater_is_better: bool,
                  greater_is_better: bool,
                  trials_path: str,
                  trials_path: str,
-                 backup_trials_freq: int = None,
-                 cross_val_averaging_func: Callable = None,
-                 additional_metrics: dict = None,
-                 strategy_name: str = None,
-                 stdout_log_level: str = "INFO"):
+                 backup_trials_freq: Optional[int] = None,
+                 cross_val_averaging_func: Callable = np.mean,
+                 additional_metrics: Optional[Dict[str, Callable]] = None,
+                 strategy_name: Optional[str] = None,
+                 stdout_log_level: Literal["INFO", "WARNING", "ERROR"]
+                 = "INFO"):
         """
         """
-        :param callable cost_func: function to minimize or maximize
+        param Callable cost_func: function to minimize or maximize
+            over the elements of a given (pipeline/hyperparameter) space
 
 
         :param bool greater_is_better: when True
         :param bool greater_is_better: when True
             cost_func is maximized, else minimized.
             cost_func is maximized, else minimized.
@@ -71,25 +74,24 @@ class HyperoptPipelineSelector(PipelineSelector):
             select information about the obtained scores, score variations,
             select information about the obtained scores, score variations,
             and pipelines, and parameters tried out so far. If a trials object
             and pipelines, and parameters tried out so far. If a trials object
             already exists at the given path, it is loaded and the
             already exists at the given path, it is loaded and the
-            search is continued, else, the search is started from
-            the beginning.
+            search is continued, else, the search is started from scratch.
 
 
         :param backup_trials_freq: frequecy in interations (trials)
         :param backup_trials_freq: frequecy in interations (trials)
             of saving the trials object at the trials_path.
             of saving the trials object at the trials_path.
+            if None, the trials object is backed up avery time
+            the score improves.
 
 
-        :param str log_path: Optional, when not provided logs to stdout.
+        :param Callable cross_val_averaging_func: Function to aggregate
+            the cross-validation scores.
+            Example different from the mean: mean - c*var.
 
 
-        :param callable averaging_func: optional,
-            when not provided set to mean. Function
-            to aggregate the cross-validated values of the cost function.
-            Classic situation is to take the mean,
-            another example is, for example mean() - c*var().
         :param additional_metics: dict of additional metrics to save
         :param additional_metics: dict of additional metrics to save
             of the form {"metric_name": metric} where metric is a Callable.
             of the form {"metric_name": metric} where metric is a Callable.
 
 
-        :param str strategy_name: a name might be asigned to the trials,
-            a strategy is defined by the data set, cv object, cost function.
-            When the strategy changes, one should start with new trials.
+        :param str strategy_name:
+            a strategy is defined by the data set (columns/features and rows),
+            cv object, cost function.
+            When the strategy changes, one must start with new trials.
 
 
         :param str stdout_log_level: can be INFO, WARNING, ERROR
         :param str stdout_log_level: can be INFO, WARNING, ERROR
         """
         """
@@ -116,30 +118,19 @@ class HyperoptPipelineSelector(PipelineSelector):
 
 
     def run_trials(self,
     def run_trials(self,
                    niter: int,
                    niter: int,
-                   algo: callable = tpe.suggest):
+                   algo: Literal[tpe.suggest, rand.suggest] = tpe.suggest)\
+            -> None:
         '''
         '''
         Method performing the search of the best pipeline in the given space.
         Method performing the search of the best pipeline in the given space.
         Calls fmin function from the hyperopt library to minimize the output of
         Calls fmin function from the hyperopt library to minimize the output of
         _objective.
         _objective.
 
 
         :params int niter: number of search iterations
         :params int niter: number of search iterations
-        :param callable algo: now can only take values tpe for a tree-based
-            random search or random for random search
+        :param algo: now can only take supported by the hyperopt library.
+            For now these are tpe.suggest for a tree-based bayesian search
+            or rad.suggest for randomized search
         '''
         '''
         try:
         try:
-            assert(self.attached_space),\
-                ("Space must be attach to be able to "
-                 "retrieve this information.")
-
-            assert(isinstance(niter, int)),\
-                "Parameter 'niter' must be of int type"
-
-            # right now only two algorithms are provided by hyperopt
-            assert(algo in [tpe.suggest, rand.suggest]),\
-                ("Parameter 'algo' can be now only tpe or random. "
-                 "If other algorithms have been developped by "
-                 "by hyperopt, plased add them to the list.")
-
             self._trials = self._trials or Trials()
             self._trials = self._trials or Trials()
 
 
             self._logger.info(("Starting {0} iterations of search "
             self._logger.info(("Starting {0} iterations of search "
@@ -171,11 +162,13 @@ class HyperoptPipelineSelector(PipelineSelector):
             self._backup_trials()
             self._backup_trials()
 
 
         except Exception as e:
         except Exception as e:
-            raise ValueError(("Failed to select best "
-                             "pipeline! Exit with error: {}").format(e))
+            err = ("Failed to select best "
+                   "pipeline! Exit with error: {}").format(e)
+
+            self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def number_of_trials(self) -> int:
+    def number_of_trials(self) -> Union[int, None]:
         """
         """
         :return: number of trials run so far
         :return: number of trials run so far
             with the given Trials object
             with the given Trials object
@@ -187,9 +180,11 @@ class HyperoptPipelineSelector(PipelineSelector):
         except Exception as e:
         except Exception as e:
             err = ("Failed to retrieve the number of trials. "
             err = ("Failed to retrieve the number of trials. "
                    "Exit with error {}".format(e))
                    "Exit with error {}".format(e))
+
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def _get_space_element_from_trial(self, trial) -> dict:
+    def _get_space_element_from_trial(self, trial: Dict)\
+            -> Union[Dict[SpaceElementType], None]:
         """
         """
         Hyperopt trials object does not contain the space
         Hyperopt trials object does not contain the space
              elements that result in the corresponding trials.
              elements that result in the corresponding trials.
@@ -224,7 +219,8 @@ class HyperoptPipelineSelector(PipelineSelector):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def _get_space_element_from_index(self, i: int) -> dict:
+    def _get_space_element_from_index(self, i: int)\
+            -> Union[Dict[SpaceElementType], None]:
         """
         """
         Gets the space element of shape
         Gets the space element of shape
         {"name": NAME, "params": PARAMS, "pipeline": PIPELINE}
         {"name": NAME, "params": PARAMS, "pipeline": PIPELINE}
@@ -243,7 +239,7 @@ class HyperoptPipelineSelector(PipelineSelector):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def _get_pipeline_from_index(self, i: int) -> Pipeline:
+    def _get_pipeline_from_index(self, i: int) -> Union[Pipeline, None]:
         """
         """
         Gets a pipeline with set parameters from the trial number i
         Gets a pipeline with set parameters from the trial number i
         """
         """
@@ -259,16 +255,19 @@ class HyperoptPipelineSelector(PipelineSelector):
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def best_trial(self) -> dict:
+    def best_trial(self) -> Union[Dict, None]:
         """
         """
         :return: dictionary with the summary of the best trial
         :return: dictionary with the summary of the best trial
             and space element (name, pipeline, params)
             and space element (name, pipeline, params)
             resulting in the best trial
             resulting in the best trial
         """
         """
         if len(self._trials.trials) == 0:
         if len(self._trials.trials) == 0:
+
             self._logger.log_and_throw_warning("Trials object is empty")
             self._logger.log_and_throw_warning("Trials object is empty")
             return {}
             return {}
+
         else:
         else:
+
             try:
             try:
                 best_trial = deepcopy(self._trials.best_trial)
                 best_trial = deepcopy(self._trials.best_trial)
 
 
@@ -297,7 +296,7 @@ class HyperoptPipelineSelector(PipelineSelector):
                 self._logger.log_and_raise_error(err)
                 self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def best_trial_score(self) -> float:
+    def best_trial_score(self) -> Union[float, None]:
         """
         """
         """
         """
         try:
         try:
@@ -313,7 +312,7 @@ class HyperoptPipelineSelector(PipelineSelector):
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def best_trial_score_variance(self) -> float:
+    def best_trial_score_variance(self) -> Union[float, None]:
         """
         """
         """
         """
         try:
         try:
@@ -329,7 +328,7 @@ class HyperoptPipelineSelector(PipelineSelector):
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
     @property
     @property
-    def best_trial_pipeline(self) -> Pipeline:
+    def best_trial_pipeline(self) -> Union[Pipeline, None]:
         """
         """
         """
         """
         try:
         try:
@@ -344,15 +343,13 @@ class HyperoptPipelineSelector(PipelineSelector):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def get_n_best_trial_pipelines(self, n: int) -> list:
+    def get_n_best_trial_pipelines(self, n: int)\
+            -> Union[List[Pipeline], None]:
         """
         """
         :return: the list of n best pipelines
         :return: the list of n best pipelines
         documented in trials
         documented in trials
         """
         """
         try:
         try:
-            assert(isinstance(n, int)),\
-                "Parameter n must be an int"
-
             if len(self._trials.trials) == 0:
             if len(self._trials.trials) == 0:
                 return []
                 return []
             else:
             else:
@@ -369,15 +366,13 @@ class HyperoptPipelineSelector(PipelineSelector):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def get_n_best_trial_pipelines_of_each_type(self, n: int) -> dict:
+    def get_n_best_trial_pipelines_of_each_type(self, n: int)\
+            -> Union[Dict[str, List[Pipeline]], None]:
         """
         """
         :return: a dictiionry where keys are pipeline names,
         :return: a dictiionry where keys are pipeline names,
         and values are lists of best pipelines with this name
         and values are lists of best pipelines with this name
         """
         """
         try:
         try:
-            assert(isinstance(n, int)),\
-                "Parameter n must be an int"
-
             scores = [trial["result"]["score"]
             scores = [trial["result"]["score"]
                       for trial in self._trials.trials]
                       for trial in self._trials.trials]
 
 
@@ -401,7 +396,7 @@ class HyperoptPipelineSelector(PipelineSelector):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def trials_to_excel(self, path: str = None):
+    def trials_to_excel(self, path: str = None) -> None:
         """
         """
         Saves an excel file with pipeline names, scores,
         Saves an excel file with pipeline names, scores,
         parameters, and timestamps.
         parameters, and timestamps.
@@ -431,8 +426,8 @@ if __name__ == '__main__':
     from sklearn.datasets import load_breast_cancer
     from sklearn.datasets import load_breast_cancer
     from cdplib.log import Log
     from cdplib.log import Log
     from cdplib.db_handlers import MongodbHandler
     from cdplib.db_handlers import MongodbHandler
-    # from cdplib.hyperopt.space_sample import space
-    from cdplib.hyperopt.composed_space_sample import space
+    from cdplib.hyperopt.space_sample import space
+    # from cdplib.hyperopt.composed_space_sample import space
 
 
     trials_path = "hyperopt_trials_TEST.pkl"
     trials_path = "hyperopt_trials_TEST.pkl"
     additional_metrics = {"precision": precision_score}
     additional_metrics = {"precision": precision_score}
@@ -472,9 +467,14 @@ if __name__ == '__main__':
 
 
     try:
     try:
 
 
+        # TODO: this line causes a pytype to throw not-callable error
+        # works fine with pytype on other class methods.
         save_method = MongodbHandler().insert_data_into_collection
         save_method = MongodbHandler().insert_data_into_collection
         save_kwargs = {'collection_name': collection_name}
         save_kwargs = {'collection_name': collection_name}
 
 
+        # save_method = pd.DataFrame.to_excel()
+        # save_kwargs = {'excel_writer': "TEST.xlsx"}
+
         hs.configer_summary_saving(save_method=save_method,
         hs.configer_summary_saving(save_method=save_method,
                                    kwargs=save_kwargs)
                                    kwargs=save_kwargs)
 
 
@@ -482,8 +482,8 @@ if __name__ == '__main__':
 
 
     except Exception as e:
     except Exception as e:
 
 
-        logger.warn(("Could not configure summary saving in mongo. "
-                     "Exit with error: {}".format(e)))
+        logger.warning(("Could not configure summary saving in mongo. "
+                        "Exit with error: {}".format(e)))
 
 
     hs.run_trials(niter=10)
     hs.run_trials(niter=10)
 
 

+ 208 - 0
cdplib/ml_validation/CVComposer.py

@@ -0,0 +1,208 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec  9 10:27:39 2020
+
+@author: tanya
+"""
+
+from typing import Union, Iterable, Tuple, List, NewType
+import pandas as pd
+import numpy as np
+from itertools import accumulate, repeat, takewhile, chain
+
+from cdplib.log import Log
+
+
+CVType = NewType("CVType", Iterable[Tuple[List]])
+
+DataSetType = NewType("DataSetType",
+                      Union[pd.DataFrame, pd.Sereis, np.ndarray, List])
+
+
+class CVComposer:
+    """
+    Groups methods for composing cv objects
+    that follow standards from sklearn,
+    these cv objects can be passed to algorithms like gridsearch, etc
+    """
+    def __init__(self):
+        """
+        """
+        self._logger = Log("CVComposer: ")
+
+    def dummy_cv(
+            self,
+            train_set_size: Union[int, None] = None,
+            train_index: Union[pd.Series, np.ndarray, None] = None,
+            test_set_size: Union[int, None] = None,
+            test_index: DataSetType = None) -> CVType:
+        """
+        """
+        assert((train_index is None) != (train_set_size is None)),\
+            "Set train_index or train_set_size"
+
+        assert((test_index is None) != (test_set_size is None)),\
+            "Set train_index or train_set_size"
+
+        train_index = train_index if (train_index is not None)\
+            else list(range(train_set_size))
+
+        test_index = test_index if (test_index is not None)\
+            else list(range(train_set_size, train_set_size + test_set_size))
+
+        return [(train_index, test_index)]
+
+    def dummy_cv_and_concatenated_data_set(
+            self,
+            X_train: DataSetType,
+            y_train: Union[DataSetType, None] = None,
+            X_test: DataSetType,
+            y_test: Union[DataSetType, None] = None)\
+            -> Tuple[DataSetType, DataSetType, CVType]:
+        """
+        """
+        assert((y_test is None) == (y_train is None))
+
+        use_index = (isinstance(X_train, pd.DataFrame) and
+                     isinstance(X_test, pd.DataFrame) and
+                     (len(set(X_train.index) and set(X_test.index)) == 0))
+
+        if use_index:
+
+            cv = self.dummy_cv(train_index=X_train.index,
+                               test_index=X_test.index)
+
+            X = pd.concat([X_train, X_test], ignore_index=False, axis=0)
+
+        else:
+            cv = self.dummy_cv(train_size=len(X_train),
+                               test_size=len(X_test))
+
+            X = np.concatenate([X_train, X_test])
+
+        use_target_index = use_index and (
+                    isinstance(y_train, pd.Series) and
+                    isinstance(y_test, pd.Series) and
+                    (X_train.index.equals(y_train.index)) and
+                    (X_test.index.equals(y_test.index)))
+
+        if use_target_index:
+
+            y = pd.concat([y_train, y_test], ignore_index=False, axis=0)
+
+        else:
+
+            y = np.concatenate([y_train, y_test]) if (y_train is not None)\
+                else None
+
+        result_to_np = (
+            (isinstance(X_train, pd.DataFrame) !=
+             isinstance(X_test, pd.DataFrame)) or
+            (isinstance(X_train, pd.DataFrame)) and
+            (len(set(X_train.index) and set(X_test.index)) != 0))
+
+        if result_to_np:
+            self._logger.log_and_throw_warning(
+                    "The concatenated dataframe is converted to numpy")
+
+        return cv, X, y
+
+    def expanding_cv(self, test_proportion: float,
+                     start_train_proportion: float,
+                     step_proportion: float = None,
+                     expanding_test_size: bool = False,
+                     data_set_size: Union[float, None] = None,
+                     index: Union[pd.Series, np.ndarray, list, None] = None)\
+            -> Union[Iterable[Tuple[List]], None]:
+        """
+        """
+        try:
+            assert((index is None) != (data_set_size is None)),\
+                "Set index or data_set_size"
+
+            index = pd.Series(index) if (index is not None)\
+                else pd.Series(range(data_set_size))
+
+            data_set_size = data_set_size or len(index)
+
+            start_train_size = int(start_train_proportion * data_set_size)
+            step_size = int(step_proportion * data_set_size)
+
+            test_size = int(test_proportion * data_set_size)
+
+            train_inds_set = (list(range(train_size))
+                              for train_size in
+                              takewhile(
+                                      lambda x: x <= data_set_size - test_size,
+                                      accumulate(repeat(start_train_size),
+                                                 lambda x, _: x + step_size)))
+
+            for train_inds in train_inds_set:
+
+                if expanding_test_size:
+
+                    yield (index[train_inds],
+                           index[train_inds[-1] + 1:
+                                 train_inds[-1] + 1
+                                 + int(test_proportion*len(train_inds))])
+
+                else:
+
+                    yield (index[train_inds],
+                           index[train_inds[-1] + 1:
+                                 train_inds[-1] + 1 + test_size])
+
+        except Exception as e:
+            self._logger.log_and_raise_error(("Failed to make expanding cv. "
+                                              "Exit with error: {}".format(e)))
+
+    def sliding_window_cv(
+        self,
+        test_proportion: float,
+        train_proportion: float,
+        step_proportion: float = None,
+        data_set_size: Union[float, None] = None,
+        index: Union[pd.Series, np.ndarray, list, None] = None)\
+            -> Union[Iterable[Tuple[List]], None]:
+        """
+        """
+        try:
+            assert((index is None) != (data_set_size is None)),\
+                "Set index or data_set_size"
+
+            index = pd.Series(index) if (index is not None)\
+                else pd.Series(range(data_set_size))
+
+            data_set_size = data_set_size or len(index)
+
+            train_size = int(train_proportion * data_set_size)
+            test_size = int(test_proportion * data_set_size)
+            step_size = int(step_proportion * data_set_size)
+
+            train_sizes = takewhile(lambda x: x <= data_set_size - test_size,
+                                    accumulate(repeat(train_size),
+                                               lambda x, _: x + step_size))
+
+            train_starts = takewhile(lambda x: x <= data_set_size
+                                     - train_size - test_size,
+                                     accumulate(repeat(step_size),
+                                                lambda x, _: x + step_size))
+
+            train_starts = chain([0], train_starts)
+
+            train_inds_set = list(range(train_start, train_size)
+                                  for train_start, train_size in
+                                  zip(train_starts, train_sizes))
+
+            cv = ((index[train_inds], index[train_inds[-1] + 1:
+                                            train_inds[-1] + 1 + test_size])
+                  for train_inds in train_inds_set)
+
+            return cv
+
+        except Exception as e:
+            self._logger.log_and_raise_error(
+                    ("Failed to make sliding window cv. "
+                     "Exit with error: {}".format(e)))
+

+ 0 - 0
cdplib/ml_validation/__init__.py


+ 491 - 0
cdplib/ml_validation/cross_validate_with_fine_tuning.py

@@ -0,0 +1,491 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Thu Oct 29 13:58:23 2020
+
+@author: tanya
+
+
+@description:
+
+* Input:
+    - pipeline/hyperparameter space
+    - data_train
+    - cv
+    - cv_folds
+
+* For each pipeline:
+
+    -> Split data_train into folds according to cv
+
+     -> For each fold:
+
+         => get data_train_fold, data_test_fold, cv_fold
+
+         => split data_train_fold into subfolds according to cv_fold
+
+         => For each subfold:
+
+             ==> get data_train_subfold, data_test_subfold
+
+             ==> train pipeline on data_train_subfold
+
+             ==> find best_threshold_subfold on data_test_subfold
+
+        => Find averaged_threshold_fold averaged over best_threshold_subfold
+
+        => train pipeline on data_train_fold
+
+        => find score_fold on data_test_fold with proba_threshold_fold
+
+        => find best_threshold_fold on data_test_fold
+
+    -> find score averaged over score_fold
+
+    -> find averaged_threshold averaged over best_threshold_fold
+
+* choose (pipeline/hyperparameters, threshold) in the space with best score
+
+"""
+
+import pandas as pd
+import numpy as np
+from itertools import zip_longest
+from typing import Union, Callable, Dict, Iterable, Tuple, List
+from copy import deepcopy
+from itertools import accumulate, repeat, takewhile, chain
+
+from sklearn.model_selection import StratifiedKFold
+
+from cdplib.log import Log
+
+
+
+
+
+aa = make_sliding_window_cv(data_set_size=50,
+                            test_proportion=0.1,
+                            train_proportion=0.6,
+                            step_proportion=0.1)
+
+aa = list(aa)
+
+aa = make_sliding_window_cv(test_proportion=0.1,
+                            train_proportion=0.6,
+                            step_proportion=0.05,
+                            index=pd.date_range(start=pd.to_datetime("2020-01-01"), periods=50))
+
+aa = list(aa)
+
+
+# TODO: write with yield !!!!
+
+def make_nested_expanding_cv(
+        test_proportion: float,
+        start_train_proportion: float,
+        step_proportion: float = None,
+        expanding_test_size: bool = False,
+        data_set_size: Union[float, None] = None,
+        index: Union[pd.Series, np.ndarray, list, None] = None)\
+        -> Iterable[Tuple[List]]:
+    """
+    """
+    logger = Log("make_nested_expanding_cv:")
+
+    try:
+        cv = make_expanding_cv(test_proportion=test_proportion,
+                               start_train_proportion=start_train_proportion,
+                               step_proportion=step_proportion,
+                               expanding_test_size=expanding_test_size,
+                               data_set_size=data_set_size,
+                               index=index)
+
+        nested_cv = []
+
+        for train_inds, test_inds in cv:
+
+            fold_index = train_inds if index is not None\
+                else None
+
+            fold_size = len(train_inds) if index is None else None
+
+            fold_cv = make_expanding_cv(
+                    test_proportion=test_proportion,
+                    start_train_proportion=start_train_proportion,
+                    step_proportion=step_proportion,
+                    expanding_test_size=expanding_test_size,
+                    data_set_size=fold_size,
+                    index=fold_index)
+
+            nested_cv.append(list(fold_cv))
+
+        return nested_cv
+
+    except Exception as e:
+        logger.log_and_raise_error(("Failed to make nested expanding cv. "
+                                    "Exit with error: {}".format(e)))
+
+
+
+
+for train_inds, test_inds in aa:
+    print(len(test_inds)/(len(train_inds) + len(test_inds)))
+    print(len(test_inds)/50)
+
+aaa = list(aaa)
+
+for aaa_cv in aaa:
+    for train_inds, test_inds in aaa_cv:
+        print(len(test_inds)/(len(train_inds) + len(test_inds)))
+        print(len(test_inds)/50)
+
+aaa = make_nested_expanding_cv(#data_set_size=50,
+                               test_proportion=0.1,
+                               start_train_proportion=0.6,
+                               step_proportion=0.1,
+                               index=pd.date_range(start=pd.to_datetime("2020-01-01"), periods=50))
+
+aaa = list(aaa)
+
+
+
+
+
+def cv_slice_dataset(X, y, train_inds, test_inds)\
+        -> Tuple[Union[pd.DataFrame, np.ndarray],
+                 Union[pd.Series, np.ndarray]]:
+    """
+    """
+    if isinstance(X, pd.DataFrame):
+        X_train = X.loc[train_inds]
+        X_val = X.loc[test_inds]
+    else:
+        X_train = X[train_inds]
+        X_val = X[test_inds]
+
+    if y is not None:
+        y_train = y[train_inds]
+        y_val = y[test_inds]
+
+    return X_train, X_val, y_train, y_val
+
+
+def get_optimal_proba_threshold(score_func: Callable,
+                                y_true: Union[pd.Series, np.ndarray],
+                                proba: Union[pd.Series, np.ndarray],
+                                threshold_set: Union[Iterable, None] = None):
+    """
+    """
+    scores = {}
+
+    if threshold_set is None:
+        threshold_set = np.arange(0, 1, 0.1)
+
+    for threshold in threshold_set:
+
+        y_pred = (proba >= threshold).astype(int)
+
+        scores[threshold] = score_func(y_true, y_pred)
+
+    return max(scores, key=scores.get)
+
+
+def cross_validate_with_optimal_threshold(
+        estimator: object,
+        score_func: Callable,
+        X_train: Union[pd.DataFrame, np.ndarray],
+        y_train: Union[pd.Series, np.ndarray, None] = None,
+        X_val: Union[pd.DataFrame, np.ndarray, None] = None,
+        y_val: Union[pd.Series, np.ndarray, None] = None,
+        X_val_threshold: Union[pd.DataFrame, np.ndarray, None] = None,
+        y_val_threshold: Union[pd.Series, np.ndarray, None] = None,
+        cv: Union[Iterable, int, None] = None,
+        cv_threshold: Union[Iterable, int, None] = None,
+        additional_metrics: Union[Dict[str, Callable], None] = None,
+        threshold_set: Union[Iterable, None] = None,
+        scores: Dict = None)\
+            -> Dict:
+    """
+    """
+    logger = Log("cross_validate_with_optimal_threshold:")
+
+    X_train = deepcopy(X_train)
+    y_train = deepcopy(y_train)
+    X_val = deepcopy(X_val)
+    y_val = deepcopy(y_val)
+    X_val_threshold = deepcopy(X_val_threshold)
+    y_val_threshold = deepcopy(y_val_threshold)
+
+    scores = scores or {"test_threshold": [],
+                        "test_score": [],
+                        "train_score": []}
+
+    additional_metrics = additional_metrics or {}
+
+    for metric_name, metric in additional_metrics.items():
+        if "test_" + metric_name not in scores:
+            scores["test_" + metric_name] = []
+            scores["train_" + metric_name] = []
+
+    if cv is None:
+
+        # test score is calculated on X_vals
+
+        assert((X_val is not None) and (y_val is not None)),\
+            "Validation set must be set"
+
+        if cv_threshold is None:
+
+            refit = (X_val_threshold is not None)
+
+            # if a validation set for proba threshold tuning is not given,
+            # we use the validation set on which we calculate the test score
+            # (this might lead to overfitting)
+
+            X_val_threshold = X_val_threshold if refit else deepcopy(X_val)
+            y_val_threshold = y_val_threshold if refit else deepcopy(y_val)
+
+            cv_threshold, X_train, y_train = make_dummy_cv(
+                    X_train=X_train,
+                    y_train=y_train,
+                    X_val=X_val_threshold,
+                    y_val=y_val_threshold)
+        else:
+
+            # if cv_threshold is given, we find the optimal threshold
+            # on each fold and output the average value for the threshold
+
+            if (X_val_threshold is not None):
+                logger.log_and_throw_warning((
+                        "X_val_threshold is set "
+                        "but cv_threshold will be used"))
+
+            if isinstance(cv_threshold, int):
+                cv_threshold = StratifiedKFold(n_splits=cv_threshold)\
+                    .split(X=X_train, y=y_train)
+
+            refit = True
+
+        thresholds = []
+
+        for train_inds, val_inds in cv_threshold:
+
+            print("----- In cv threshold fold")
+
+            X_train_fold, X_val_fold, y_train_fold, y_val_fold =\
+                cv_slice_dataset(X=X_train,
+                                 y=y_train,
+                                 train_inds=train_inds,
+                                 test_inds=val_inds)
+
+            estimator.fit(X_train_fold, y_train_fold)
+
+            proba_val = estimator.predict_proba(X_val_fold)[:, 1]
+
+            threshold = get_optimal_proba_threshold(score_func=score_func,
+                                                    y_true=y_val_fold,
+                                                    proba=proba_val)
+
+            thresholds.append(threshold)
+
+            print("----- Threshold:", threshold)
+
+        scores["test_threshold"].append(np.mean(thresholds))
+
+        if refit:
+
+            estimator.fit(X_train, y_train)
+
+            proba_val = estimator.predict_proba(X_val)[:, 1]
+
+        proba_train = estimator.predict_proba(X_train)[:, 1]
+
+        pred_train = (proba_train >= threshold)
+        pred_val = (proba_val >= threshold)
+
+        train_score = score_func(y_train, pred_train)
+        test_score = score_func(y_val, pred_val)
+
+        for metric_name, metric in additional_metrics.items():
+            scores["train_" + metric_name].append(metric(y_train, pred_train))
+            scores["test_" + metric_name].append(metric(y_val, pred_val))
+
+        scores["train_score"].append(train_score)
+        scores["test_score"].append(test_score)
+
+        return scores
+
+    else:
+
+        if isinstance(cv, int):
+            cv = StratifiedKFold(n_splits=cv).split(X=X_train, y=y_train)
+
+        cv_threshold = cv_threshold or []
+
+        for (train_inds, val_inds), cv_fold in zip_longest(cv, cv_threshold):
+
+            print("=== In cv fold")
+
+            X_train_fold, X_val_fold, y_train_fold, y_val_fold =\
+                cv_slice_dataset(X=X_train,
+                                 y=y_train,
+                                 train_inds=train_inds,
+                                 test_inds=val_inds)
+
+            scores = cross_validate_with_optimal_threshold(
+                    estimator=estimator,
+                    score_func=score_func,
+                    X_train=X_train_fold,
+                    y_train=y_train_fold,
+                    X_val=X_val_fold,
+                    y_val=y_val_fold,
+                    cv_threshold=cv_fold,
+                    additional_metrics=additional_metrics,
+                    threshold_set=threshold_set,
+                    scores=scores)
+
+            print("=== scores:", scores)
+
+        return scores
+
+
+if __name__ == "__main__":
+
+    from sklearn.metrics import accuracy_score, precision_score
+    from sklearn.datasets import load_breast_cancer
+    from xgboost import XGBRFClassifier
+    from sklearn.model_selection import train_test_split
+
+    data_loader = load_breast_cancer()
+
+    X = data_loader["data"]
+    y = data_loader["target"]
+
+    X_train, X_val, y_train, y_val = train_test_split(X, y)
+
+    estimator = XGBRFClassifier()
+
+    score_func = accuracy_score
+
+    additional_metrics = {"precision": precision_score}
+
+    averaged_scores = []
+    averaged_thresholds = []
+
+    print("\nTesting cv=None, cv_threshold=None, X_val_threshold=None\n")
+
+    scores = cross_validate_with_optimal_threshold(
+            estimator=estimator,
+            score_func=accuracy_score,
+            X_train=X_train,
+            y_train=y_train,
+            X_val=X_val,
+            y_val=y_val,
+            X_val_threshold=None,
+            y_val_threshold=None,
+            cv=None,
+            cv_threshold=None,
+            additional_metrics=additional_metrics)
+
+    print("\nScores:", scores)
+
+    averaged_scores.append(np.mean(scores["test_score"]))
+    averaged_thresholds.append(np.mean(scores["test_threshold"]))
+
+    print("\n ########################################################## \n")
+
+    X_train, X_val_threshold, y_train, y_val_threshold =\
+        train_test_split(X_train, y_train)
+
+    print("\nTesting cv=None, cv_threshold=None, X_val_threshold\n")
+
+    scores = cross_validate_with_optimal_threshold(
+            estimator=estimator,
+            score_func=accuracy_score,
+            X_train=X_train,
+            y_train=y_train,
+            X_val=X_val,
+            y_val=y_val,
+            X_val_threshold=X_val_threshold,
+            y_val_threshold=y_val_threshold,
+            cv=None,
+            cv_threshold=None,
+            additional_metrics=additional_metrics)
+
+    print("\nScores:", scores)
+
+    averaged_scores.append(np.mean(scores["test_score"]))
+    averaged_thresholds.append(np.mean(scores["test_threshold"]))
+
+    print("\n ########################################################## \n")
+
+    print("\nTesting cv=None, cv_threshold=3 \n")
+
+    scores = cross_validate_with_optimal_threshold(
+            estimator=estimator,
+            score_func=accuracy_score,
+            X_train=X_train,
+            y_train=y_train,
+            X_val=X_val,
+            y_val=y_val,
+            X_val_threshold=X_val_threshold,
+            y_val_threshold=y_val_threshold,
+            cv=None,
+            cv_threshold=3,
+            additional_metrics=additional_metrics)
+
+    print("\nScores:", scores)
+
+    averaged_scores.append(np.mean(scores["test_score"]))
+    averaged_thresholds.append(np.mean(scores["test_threshold"]))
+
+    print("\n ########################################################## \n")
+
+    print("\nTesting cv=3, cv_threshold=None \n")
+
+    scores = cross_validate_with_optimal_threshold(
+            estimator=estimator,
+            score_func=accuracy_score,
+            X_train=X_train,
+            y_train=y_train,
+            X_val=X_val,
+            y_val=y_val,
+            X_val_threshold=X_val_threshold,
+            y_val_threshold=y_val_threshold,
+            cv=3,
+            cv_threshold=None,
+            additional_metrics=additional_metrics)
+
+    print("\nScores:", scores)
+
+    print("\n ########################################################## \n")
+
+    print("\nTesting cv=3, cv_threshold=[3, 3, 3] \n")
+
+    scores = cross_validate_with_optimal_threshold(
+            estimator=estimator,
+            score_func=accuracy_score,
+            X_train=X_train,
+            y_train=y_train,
+            X_val=X_val,
+            y_val=y_val,
+            X_val_threshold=X_val_threshold,
+            y_val_threshold=y_val_threshold,
+            cv=3,
+            cv_threshold=[3, 3, 3],
+            additional_metrics=additional_metrics)
+
+    print("\nScores:", scores)
+
+    averaged_scores.append(np.mean(scores["test_score"]))
+    averaged_thresholds.append(np.mean(scores["test_threshold"]))
+
+    print("\n ########################################################## \n")
+
+    # TODO: check overwriting X_train,
+    # additional metrics append instead of overwrite
+    # check the length of cv_threshold
+    # test custom cv, cv_threshold
+
+    print("\n Averaged test score:", averaged_scores)
+    print("\n Averaged threshold:", averaged_thresholds)

+ 97 - 0
cdplib/ml_validation/expanding_cv.py

@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Wed Dec  9 09:55:52 2020
+
+@author: tanya
+"""
+
+from typing import Union, Iterable, Tuple, List
+import pandas as pd
+import numpy as np
+from itertools import accumulate, repeat, takewhile
+
+from cdplib.log import Log
+
+
+def make_expanding_cv(test_proportion: float,
+                      start_train_proportion: float,
+                      step_proportion: float = None,
+                      expanding_test_size: bool = False,
+                      data_set_size: Union[float, None] = None,
+                      index: Union[pd.Series, np.ndarray, list, None] = None)\
+        -> Union[Iterable[Tuple[List]], None]:
+    """
+
+    """
+    logger = Log("make_expanding_cv:")
+
+    try:
+        assert((index is None) != (data_set_size is None)),\
+            "Set index or data_set_size"
+
+        index = index if (index is not None)\
+            else pd.Series(range(data_set_size))
+
+        data_set_size = data_set_size or len(index)
+
+        start_train_size = int(start_train_proportion * data_set_size)
+        step_size = int(step_proportion * data_set_size)
+
+        test_size = int(test_proportion * data_set_size)
+
+        train_inds_set = (list(range(train_size))
+                          for train_size in
+                          takewhile(
+                                  lambda x: x <= data_set_size - test_size,
+                                  accumulate(repeat(start_train_size),
+                                             lambda x, _: x + step_size)))
+
+        for train_inds in train_inds_set:
+
+            if expanding_test_size:
+
+                yield (index[train_inds],
+                       index[train_inds[-1] + 1:
+                             train_inds[-1] + 1
+                             + int(test_proportion*len(train_inds))])
+
+            else:
+
+                yield (index[train_inds],
+                       index[train_inds[-1] + 1:
+                             train_inds[-1] + 1 + test_size])
+
+    except Exception as e:
+        logger.log_and_raise_error(("Failed to make expanding cv. "
+                                    "Exit with error: {}".format(e)))
+
+
+if __name__ == "__main__":
+
+    logger = Log("Test_expanding_cv: ")
+
+    logger.info("Start Testing")
+
+    logger.info("Testing expanding cv: ")
+
+    cv = make_expanding_cv(data_set_size=50,
+                           test_proportion=0.1,
+                           start_train_proportion=0.6,
+                           step_proportion=0.1,
+                           expanding_test_size=True)
+
+    cv = list(cv)
+
+    logger.info("Testing expanding cv with datetime index")
+
+    cv = make_expanding_cv(
+            test_proportion=0.1,
+            start_train_proportion=0.6,
+            step_proportion=0.1,
+            index=pd.date_range(start=pd.to_datetime("2020-01-01"),
+                                periods=50))
+
+    cv = list(cv)
+
+    logger.info("Finish testing")

+ 235 - 228
cdplib/pipeline_selector/PipelineSelector.py

@@ -24,8 +24,10 @@ import time
 import datetime
 import datetime
 import numpy as np
 import numpy as np
 import pandas as pd
 import pandas as pd
+from copy import deepcopy
 from abc import ABC, abstractmethod, abstractproperty
 from abc import ABC, abstractmethod, abstractproperty
-from typing import Callable
+from typing import Callable, Optional, TypedDict,\
+    Literal, Dict, Iterable, List, Tuple, Union
 import functools
 import functools
 from sklearn.pipeline import Pipeline
 from sklearn.pipeline import Pipeline
 from sklearn.model_selection import cross_validate as sklearn_cross_validation
 from sklearn.model_selection import cross_validate as sklearn_cross_validation
@@ -34,10 +36,17 @@ from hyperopt import STATUS_OK, STATUS_FAIL
 from cdplib.log import Log
 from cdplib.log import Log
 from cdplib.utils import ExceptionsHandler
 from cdplib.utils import ExceptionsHandler
 from cdplib.utils import LoadingUtils
 from cdplib.utils import LoadingUtils
+from cdplib.ml_validation import CVComposer
 
 
 sys.path.append(os.getcwd())
 sys.path.append(os.getcwd())
 
 
 
 
+class SpaceElementType(TypedDict):
+    name: str
+    pipeline: Pipeline
+    params: dict
+
+
 class PipelineSelector(ABC):
 class PipelineSelector(ABC):
     """
     """
     An abstract class for selecting a machine learning
     An abstract class for selecting a machine learning
@@ -53,16 +62,20 @@ class PipelineSelector(ABC):
     Children classes: hyperopt and custom gridsearch.
     Children classes: hyperopt and custom gridsearch.
     """
     """
     def __init__(self,
     def __init__(self,
-                 cost_func: (Callable, str),
+                 cost_func: Union[Callable, str],
                  greater_is_better: bool,
                  greater_is_better: bool,
                  trials_path: str,
                  trials_path: str,
-                 backup_trials_freq: int = None,
-                 cross_val_averaging_func: Callable = None,
-                 additional_metrics: dict = None,
-                 strategy_name: str = None,
-                 stdout_log_level: str = "INFO"):
+                 backup_trials_freq: Optional[int] = None,
+                 cross_val_averaging_func: Callable = np.mean,
+                 additional_metrics: Optional[Dict[str, Callable]] = None,
+                 additional_averaging_funcs:
+                     Optional[Dict[str, Callable]] = None,
+                 strategy_name: Optional[str] = None,
+                 stdout_log_level: Literal["INFO", "WARNING", "ERROR"]
+                 = "INFO"):
         """
         """
         :param Callable cost_func: function to minimize or maximize
         :param Callable cost_func: function to minimize or maximize
+            over the elements of a given (pipeline/hyperparameter) space
 
 
         :param bool greater_is_better: when True
         :param bool greater_is_better: when True
             cost_func is maximized, else minimized.
             cost_func is maximized, else minimized.
@@ -72,62 +85,42 @@ class PipelineSelector(ABC):
             select information about the obtained scores, score variations,
             select information about the obtained scores, score variations,
             and pipelines, and parameters tried out so far. If a trials object
             and pipelines, and parameters tried out so far. If a trials object
             already exists at the given path, it is loaded and the
             already exists at the given path, it is loaded and the
-            search is continued, else, the search is started from
-            the beginning.
+            search is continued, else, the search is started from scratch.
 
 
         :param backup_trials_freq: frequecy in interations (trials)
         :param backup_trials_freq: frequecy in interations (trials)
             of saving the trials object at the trials_path.
             of saving the trials object at the trials_path.
             if None, the trials object is backed up avery time
             if None, the trials object is backed up avery time
             the score improves.
             the score improves.
 
 
-        :param str log_path: Optional, when not provided logs to stdout.
+        :param Callable cross_val_averaging_func: Function to aggregate
+            the cross-validation scores of the cost_func.
+            Example different from the mean: mean - c*var.
+
+        :param additional_metics: dict of additional metrics to keep track of
+            in the trials of the form {"metric_name": metric}.
 
 
-        :param Callable cross_val_averaging_func: optional,
-            when not provided set to mean. Function
-            to aggregate the cross-validated values of the cost function.
-            Classic situation is to take the mean,
-            another example is, for example mean() - c*var().
+        :param additional_averaging_funcs: functions used to aggregate
+            the output of the cross_validate function.
+            The output always contains the scores of the cost_func,
+            additional_metrics (if it is not empty),
+            but it can also contain additional information
+            (like probability threshold for example)
+            if different from cross_val_averaging_func.
+            Of the form {"metric_name": averaging_func}
 
 
-        :param additional_metics: dict of additional metrics to save
-            of the form {"metric_name": metric} where metric is a Callable.
+            Remark:
 
 
-        :param str strategy_name: a name might be asigned to the trials,
-            a strategy is defined by the data set, cv object, cost function.
-            When the strategy changes, one should start with new trials.
+        :param str strategy_name:
+            a strategy is defined by the data set (columns/features and rows),
+            cv object, cost function.
+            When the strategy changes, one must start with new trials.
 
 
         :param str stdout_log_level: can be INFO, WARNING, ERROR
         :param str stdout_log_level: can be INFO, WARNING, ERROR
         """
         """
-        try:
+        self._logger = Log("PipelineSelector: ",
+                           stdout_log_level=stdout_log_level)
 
 
-            self._logger = Log("PipelineSelector: ",
-                               stdout_log_level=stdout_log_level)
-
-            input_errors = [
-                    (cost_func, Callable,
-                     "Parameter 'cost_func' must be a Callable"),
-                    (greater_is_better, bool,
-                     "Parameter 'greater_is_better' must be bool type"),
-                    (trials_path, str,
-                     "Parameter 'trials_path' must be of string type"),
-                    (cross_val_averaging_func, (Callable, None.__class__),
-                     ("Parameter 'cross_val_averaging_func'"
-                      "must be a Callable")),
-                    (backup_trials_freq, (int, None.__class__),
-                     "Parameter backup_trials_freq must be an int"),
-                    (additional_metrics, (dict, None.__class__),
-                     "Parameter additional_metrics must be a dict"),
-                    (strategy_name, (str, None.__class__),
-                     "Parameter strategy_name must be a str"),
-                    (stdout_log_level, str,
-                     "Parameter stdout_log_level must be a str")]
-
-            for p, t, err in input_errors:
-                assert((isinstance(p, t))), err
-
-            assert((additional_metrics is None) or
-                   all([isinstance(metric, Callable)
-                        for metric in additional_metrics.values()])),\
-                "Metrics in additional_metrics must be Callables"
+        try:
 
 
             ExceptionsHandler(self._logger)\
             ExceptionsHandler(self._logger)\
                 .assert_is_directory(path=trials_path)
                 .assert_is_directory(path=trials_path)
@@ -143,18 +136,14 @@ class PipelineSelector(ABC):
             self._score_factor = (not greater_is_better) - greater_is_better
             self._score_factor = (not greater_is_better) - greater_is_better
             self.trials_path = trials_path
             self.trials_path = trials_path
             self._backup_trials_freq = backup_trials_freq
             self._backup_trials_freq = backup_trials_freq
-            self._cross_val_averaging_func = cross_val_averaging_func\
-                or np.mean
-            self._additional_metrics = additional_metrics or {}
             self._strategy_name = strategy_name
             self._strategy_name = strategy_name
             self._data_path = None
             self._data_path = None
             self._cv_path = None
             self._cv_path = None
 
 
-            # best_score can be also read from trials
-            # but is kept explicitely in order not to
-            # search through the trials object every time
-            # loss is the opposite of score
-            self.best_score = np.nan
+            self._X = None
+            self._y = None
+            self._cv = None
+            self._space = None
 
 
             # if cross-valition is not configured,
             # if cross-valition is not configured,
             # sklearn cross-validation method is taken by default
             # sklearn cross-validation method is taken by default
@@ -164,23 +153,17 @@ class PipelineSelector(ABC):
             # it is loaded and the search is continued. Else,
             # it is loaded and the search is continued. Else,
             # the search is started from the beginning.
             # the search is started from the beginning.
             if os.path.isfile(self.trials_path):
             if os.path.isfile(self.trials_path):
-                try:
-                    with open(self.trials_path, "rb") as f:
-                        self._trials = pickle.load(f)
 
 
-                    self._start_iteration = self.number_of_trials
+                with open(self.trials_path, "rb") as f:
+                    self._trials = pickle.load(f)
 
 
-                    self.best_score = self.best_trial_score
+                self._start_iteration = self.number_of_trials
 
 
-                    self._logger.info(("Loaded an existing trials object"
-                                       "Consisting of {} trials")
-                                      .format(self._start_iteration))
+                self.best_score = self.best_trial_score
 
 
-                except Exception as e:
-                    err = ("Trials object could not be loaded. "
-                           "Exit with error {}").format(e)
-                    self._logger.log_and_raise_error(err)
-                    self._trials = None
+                self._logger.info(("Loaded an existing trials object"
+                                   "Consisting of {} trials")
+                                  .format(self._start_iteration))
 
 
             else:
             else:
                 self._logger.warning(("No existing trials object was found, "
                 self._logger.warning(("No existing trials object was found, "
@@ -188,6 +171,7 @@ class PipelineSelector(ABC):
 
 
                 self._trials = None
                 self._trials = None
                 self._start_iteration = 0
                 self._start_iteration = 0
+                self.best_score = np.nan
 
 
             # keeping track of the current search iteration
             # keeping track of the current search iteration
             self._iteration = self._start_iteration
             self._iteration = self._start_iteration
@@ -203,10 +187,9 @@ class PipelineSelector(ABC):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def _backup_trials(self):
+    def _backup_trials(self) -> None:
         '''
         '''
-        Pickles (Saves) the trials object.
-        Used in a scheduler.
+        Pickles (Saves) the trials object in binary format.
         '''
         '''
         try:
         try:
             with open(self.trials_path, "wb") as f:
             with open(self.trials_path, "wb") as f:
@@ -218,30 +201,21 @@ class PipelineSelector(ABC):
 
 
     def configure_cross_validation(self,
     def configure_cross_validation(self,
                                    cross_validation: Callable,
                                    cross_validation: Callable,
-                                   kwargs: dict = None):
+                                   kwargs: dict = None) -> None:
         """
         """
         Method for attaching a custom cross-validation function
         Method for attaching a custom cross-validation function
+
         :param cross_validation: a function that has the same
         :param cross_validation: a function that has the same
              signature as sklearn.model_selection.cross_validate
              signature as sklearn.model_selection.cross_validate
         """
         """
         try:
         try:
-            assert(isinstance(cross_validation, Callable)),\
-                "Parameter cross_validation must be a function"
-
             kwargs = kwargs or {}
             kwargs = kwargs or {}
 
 
-            assert(isinstance(kwargs, dict)),\
-                "Paramter kwargs must be a dict"
-
             self._cross_validation = functools.partial(
             self._cross_validation = functools.partial(
                     self._cross_validation, **kwargs)
                     self._cross_validation, **kwargs)
 
 
             self.configured_cross_validation = True
             self.configured_cross_validation = True
 
 
-            if hasattr(cross_validation, "__name__"):
-                self.best_result["cross_validation"] =\
-                    cross_validation.__name__
-
             self._logger.info("Configured cross validation")
             self._logger.info("Configured cross validation")
 
 
         except Exception as e:
         except Exception as e:
@@ -252,8 +226,12 @@ class PipelineSelector(ABC):
 
 
     def configure_cross_validation_from_module(self,
     def configure_cross_validation_from_module(self,
                                                module_path: str,
                                                module_path: str,
-                                               name: str):
+                                               name: str) -> None:
         """
         """
+        Attaches a cross-validation funciton defined in
+        a different python model. This function must have
+        the same signature as sklearn.model_seclection.cross_validate
+
         :param str module_path: path to python module
         :param str module_path: path to python module
             where the cross_validation function is defined.
             where the cross_validation function is defined.
 
 
@@ -261,18 +239,12 @@ class PipelineSelector(ABC):
             loaded froma python module.
             loaded froma python module.
         """
         """
         try:
         try:
-            assert(isinstance(module_path, str) and
-                   isinstance(name, str)),\
-                   "Parameters module_path and name must be of str type"
-
             self._cross_validation = \
             self._cross_validation = \
                 LoadingUtils().load_from_module(
                 LoadingUtils().load_from_module(
                         module_path=module_path, name=name)
                         module_path=module_path, name=name)
 
 
             self.configured_cross_validation = True
             self.configured_cross_validation = True
 
 
-            self.best_result["cross_validation"] = name
-
             self._logger.info("Configured cross validation")
             self._logger.info("Configured cross validation")
 
 
         except Exception as e:
         except Exception as e:
@@ -281,8 +253,11 @@ class PipelineSelector(ABC):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def attach_space(self, space):
+    def attach_space(self, space) -> None:
         """
         """
+        Method for attaching the pipeline/hyperparameter space
+        over which the score_func is optimized.
+
         :param space: space where
         :param space: space where
             the search is performed. A space might be either
             the search is performed. A space might be either
             a list of dictionaries or a hyperopt space object
             a list of dictionaries or a hyperopt space object
@@ -291,17 +266,21 @@ class PipelineSelector(ABC):
         """
         """
         try:
         try:
             self._space = space
             self._space = space
-            self._logger.info("Attached parameter distribution space")
+
             self.attached_space = True
             self.attached_space = True
 
 
+            self._logger.info("Attached parameter distribution space")
+
         except Exception as e:
         except Exception as e:
             err = ("Failed to attach space. "
             err = ("Failed to attach space. "
                    "Exit with error: {}".format(e))
                    "Exit with error: {}".format(e))
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def attach_space_from_module(self, module_path: str, name: str):
+    def attach_space_from_module(self, module_path: str, name: str) -> None:
         """
         """
+        Attaches a space defined in a different python module.
+
         :param str module_path: path to python module
         :param str module_path: path to python module
             where the space is defined.
             where the space is defined.
 
 
@@ -309,34 +288,34 @@ class PipelineSelector(ABC):
             a python module.
             a python module.
         """
         """
         try:
         try:
-            assert(isinstance(module_path, str) and
-                   isinstance(name, str)),\
-                   "Parameters module_path and name must be of str type"
-
             self._space = LoadingUtils().load_from_module(
             self._space = LoadingUtils().load_from_module(
                     module_path=module_path, name=name)
                     module_path=module_path, name=name)
 
 
-            self._logger.info("Attached parameter distribution space")
-
             self.attached_space = True
             self.attached_space = True
 
 
+            self._logger.info("Attached parameter distribution space")
+
         except Exception as e:
         except Exception as e:
             err = ("Failed to attach space from module. "
             err = ("Failed to attach space from module. "
                    "Exit with error {}".format(e))
                    "Exit with error {}".format(e))
 
 
             self._logger.loger_and_raise_error(err)
             self._logger.loger_and_raise_error(err)
 
 
-    def attach_data(self, X_train: (pd.DataFrame, np.ndarray),
-                    y_train: (pd.DataFrame, pd.Series, np.ndarray) = None,
-                    X_val: (pd.DataFrame, np.ndarray) = None,
-                    y_val: (pd.DataFrame, pd.Series, np.ndarray) = None,
-                    cv: (list, int) = None):
+    def attach_data(self, X_train: Union[pd.DataFrame, np.ndarray],
+                    y_train: Optional[pd.DataFrame, pd.Series, np.ndarray]
+                    = None,
+                    X_val: Optional[pd.DataFrame, np.ndarray]
+                    = None,
+                    y_val: Optional[pd.DataFrame, pd.Series, np.ndarray]
+                    = None,
+                    cv: Optional[Iterable[Tuple[List[int], List[int]]]]
+                    = None) -> None:
         '''
         '''
         :param array X_train: data on which
         :param array X_train: data on which
             machine learning pipelines are trained
             machine learning pipelines are trained
 
 
         :param array y_train: optional, vector with targets,
         :param array y_train: optional, vector with targets,
-            (not all algorithms require a targets)
+            (None in case of unsupervided learning)
 
 
         :param array X_val: optional, validation data.
         :param array X_val: optional, validation data.
             When not provided, cross-validated value
             When not provided, cross-validated value
@@ -344,53 +323,49 @@ class PipelineSelector(ABC):
 
 
         :param array y_val: optional, validation targets
         :param array y_val: optional, validation targets
 
 
-        :param list cv: list of tuples containing
+        :param list cv: iterabe of tuples containing
             train and validation indices or an integer representing
             train and validation indices or an integer representing
             the number of folds for a random split of data
             the number of folds for a random split of data
             during cross-validation
             during cross-validation
             example: [([0,1,2], [3,4]), ([1,2,3], [4,5])]
             example: [([0,1,2], [3,4]), ([1,2,3], [4,5])]
         '''
         '''
         try:
         try:
-            NoneType = None.__class__
-
-            input_err = "Non-valid combination of train and val data types"
+            assert((cv is None) == (X_val is not None)),\
+                "Either cv or X_val must be provided"
 
 
             if cv is None:
             if cv is None:
-                assert(isinstance(X_train, (pd.DataFrame, np.ndarray)) and
-                       isinstance(X_val, (pd.DataFrame, np.ndarray)) and
-                       isinstance(y_train, (pd.Series, np.ndarray,
-                                            pd.DataFrame, NoneType)) and
-                       isinstance(y_val, (pd.Series, np.ndarray)) and
-                       (y_val is None) == (y_train is None)), input_err
-
-                # cost is evaluated with a cross validation function
-                # that accepts an array and a cv object with
-                # indices of the fold splits.
+
+                assert((y_val is None) == (y_train is None)),\
+                    "y_train and y_val must be simultanious"
+
                 # Here we create a trivial cv object
                 # Here we create a trivial cv object
                 # with one validation split.
                 # with one validation split.
+                cv = CVComposer.dummy_cv()
+
+
+
+
 
 
                 train_inds = list(range(len(X_train)))
                 train_inds = list(range(len(X_train)))
                 val_inds = list(range(len(X_train),
                 val_inds = list(range(len(X_train),
                                       len(X_train) + len(X_val)))
                                       len(X_train) + len(X_val)))
 
 
                 self._cv = [(train_inds, val_inds)]
                 self._cv = [(train_inds, val_inds)]
+
                 self._X = np.concatenate([X_train, X_val])
                 self._X = np.concatenate([X_train, X_val])
                 self._y = None if y_train is None\
                 self._y = None if y_train is None\
                     else np.concatenate([y_train, y_val])
                     else np.concatenate([y_train, y_val])
 
 
             else:
             else:
-                assert(isinstance(X_train, (pd.DataFrame, np.ndarray)) and
-                       isinstance(y_train, (pd.Series, np.ndarray,
-                                            pd.DataFrame, NoneType)) and
-                       (X_val is None) and (y_val is None)), input_err
 
 
                 self._cv = cv
                 self._cv = cv
                 self._X = X_train
                 self._X = X_train
                 self._y = y_train
                 self._y = y_train
 
 
-            self._logger.info("Attached data")
             self.attached_data = True
             self.attached_data = True
 
 
+            self._logger.info("Attached data")
+
         except Exception as e:
         except Exception as e:
             err = ("Failed to attach data. "
             err = ("Failed to attach data. "
                    "Exit with error: {}".format(e))
                    "Exit with error: {}".format(e))
@@ -399,17 +374,23 @@ class PipelineSelector(ABC):
 
 
     def attach_data_from_hdf5(self,
     def attach_data_from_hdf5(self,
                               data_hdf5_store_path: str,
                               data_hdf5_store_path: str,
-                              cv_pickle_path: str = None):
-        """
-        Method for attaching data from a hdf5 store.
-             The hdf5 store is a binary file,
-             after loading it, it is a dictionary with keys
-             X_train (y_train, X_val, y_val). The cv is loaded
-             from a pickle file. The reason to separate the data
-             store from the cv store, is the hdf5 is optimized to
-             store large dataframes (especially with simple types) and
-             a a small list of lists like a cv-object is better
-             to be stored as a pickle file.
+                              cv_pickle_path: str = None) -> None:
+        """
+        Method for attaching data from a hdf5 store
+         and a cv object from a pickled file.
+
+         The hdf5 store is a binary file,
+         after loading it, it is a dictionary with keys
+         X_train (y_train, X_val, y_val).
+
+         The cv is loaded from a pickle file.
+
+         The reason to separate the data
+         store from the cv store, is the hdf5 is optimized to
+         store large dataframes (especially with simple types) and
+         a a small list of lists like a cv-object is better
+         to be stored as a pickle file.
+
         :param str data_hdf5_store_path: path to the hdf5 store
         :param str data_hdf5_store_path: path to the hdf5 store
             with train and validation data
             with train and validation data
         :param str cv_pickle_path: path to the pickle file with
         :param str cv_pickle_path: path to the pickle file with
@@ -423,19 +404,16 @@ class PipelineSelector(ABC):
 
 
             self._data_path = data_hdf5_store_path
             self._data_path = data_hdf5_store_path
 
 
-            data_input = {}
-
-            for key in ["/X_train", "/y_train", "/X_val", "/y_val"]:
-                if key not in store.keys():
-                    data_input[key.replace("/", "")] = None
-                else:
-                    data_input[key.replace("/", "")] = store[key]
+            data_input = {key: store["key"] if key in store else None
+                          for key in ["X_train", "y_train", "X_val", "y_val"]}
 
 
             if cv_pickle_path is not None:
             if cv_pickle_path is not None:
+
                 assert(os.path.isfile(cv_pickle_path)),\
                 assert(os.path.isfile(cv_pickle_path)),\
                     "Parameter cv_pickle_path is not a file"
                     "Parameter cv_pickle_path is not a file"
 
 
                 data_input["cv"] = pickle.load(open(cv_pickle_path, "rb"))
                 data_input["cv"] = pickle.load(open(cv_pickle_path, "rb"))
+
                 self._cv_path = cv_pickle_path
                 self._cv_path = cv_pickle_path
 
 
             else:
             else:
@@ -449,21 +427,60 @@ class PipelineSelector(ABC):
             err = "Failed to attach data. Exit with error: {}".format(e)
             err = "Failed to attach data. Exit with error: {}".format(e)
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
+    @property
+    def default_summary(self) -> dict:
+        """
+        Default summary of the strategy.
+        Every the _objective function is called
+        the current score and the information
+        about the tested space element is added to the
+        summary and it is saved to the Trials.
+        If summary saving is configured it is also
+        saved to a file, or a database when the score improves.
+        """
+        summary = {}
+
+        if self._strategy_name is not None:
+            summary["strategy_name"] = self._strategy_name
+
+        if isinstance(self._cost_func, str):
+            summary["cost_func"] = self._cost_func
+
+        elif hasattr(self._cost_func, "__name__"):
+            summary["cost_func"] = self._cost_func.__name__
+
+        summary["trials_path"] = self.trials_path
+
+        if self._data_path is not None:
+            summary["data_path"] = self._data_path
+
+        if self._cv_path is not None:
+            summary["cv_path"] = self._cv_path
+
+        summary["start_tuning_time"] = self.start_tuning_time
+
+        summary["iteration"] = self._iteration
+
+        return summary
+
     def configer_summary_saving(self,
     def configer_summary_saving(self,
-                                save_method: Callable = None,
-                                kwargs: dict = None):
-        """
-        Attaching a method for saving information about
-             the trials/space/strategy and the result of
-             the current best pipeline. This method can
-             save the result in a txt or a json file,
-             or in a database for example. Arguments like
-             file path or the table name can be specified in kwargs.
+                                save_method: Callable
+                                = functools.partial(
+                                        pd.DataFrame.to_excel,
+                                        **{"path_or_buf": "result.csv"}),
+                                kwargs: Optional[dict] = None) -> None:
+        """
+        When the score calculated by _objective function improves,
+        the default summary is updated with information about the
+        current score and pipeline/hyperparameters
+        and can be saved to a file or database, depending
+        on the configured save_method.
+
         :param Callable save_method: method for saving the result
         :param Callable save_method: method for saving the result
             of the pipeline selection. The method must accept
             of the pipeline selection. The method must accept
-            a pandas DataFrame as argument. See self._save_result
-            method for the format of the argument being saved.
-            By default, saving to a csv file.
+            a pandas DataFrame as argument.
+            By default, saving to an excel file.
+
             Examples:
             Examples:
                 functools.partial(pd.DataFrame.to_csv,
                 functools.partial(pd.DataFrame.to_csv,
                                   **{"path_or_buf": <PATH>})
                                   **{"path_or_buf": <PATH>})
@@ -476,13 +493,11 @@ class PipelineSelector(ABC):
                                   **{"collection_name": <NAME>})
                                   **{"collection_name": <NAME>})
 
 
             using functools can be avoided by providing the kwarg argument
             using functools can be avoided by providing the kwarg argument
+
         :param dict kwargs: a dictionary with keyword arguments
         :param dict kwargs: a dictionary with keyword arguments
             (like tablename) to provide to the save_method
             (like tablename) to provide to the save_method
         """
         """
         try:
         try:
-            save_method = save_method or functools.partial(
-                    pd.DataFrame.to_excel, **{"path_or_buf": "result.csv"})
-
             kwargs = kwargs or {}
             kwargs = kwargs or {}
 
 
             self._save_method = functools.partial(save_method, **kwargs)
             self._save_method = functools.partial(save_method, **kwargs)
@@ -494,10 +509,16 @@ class PipelineSelector(ABC):
         except Exception as e:
         except Exception as e:
             err = ("Failed to configure the summary saving. "
             err = ("Failed to configure the summary saving. "
                    "Exit with error {}".format(e))
                    "Exit with error {}".format(e))
+
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def _save_summary(self, summary: dict):
+    def _save_summary(self, summary: dict) -> None:
         """
         """
+        When the score calculated by _objective function improves,
+        the default summary is updated with information about the
+        current score and pipeline/hyperparameters
+        and can be saved to a file or database, depending
+        on the configured save_method.
         """
         """
         try:
         try:
             assert(self.configured_summary_saving),\
             assert(self.configured_summary_saving),\
@@ -511,29 +532,40 @@ class PipelineSelector(ABC):
 
 
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def _evaluate(self, pipeline: Pipeline,
-                  scoring: Callable = None,
-                  cross_validation: Callable = None) -> dict:
+    def _evaluate(self, pipeline: Pipeline) -> Union[Dict[str, float], None]:
         """
         """
-        This method is called in _objective.
+        Calculates the averaged cross-validated score and score variance,
+        as well as the averaged values and variances of the additional metrics.
+
+        This method is called in the _objective function that is
+        passed to the hyperopt optimizer.
 
 
-        Calculates the cost on the attached data.
         This function can be overriden, when the cost
         This function can be overriden, when the cost
         needs to be calculated differently,
         needs to be calculated differently,
         for example with a tensorflow model.
         for example with a tensorflow model.
 
 
         :param Pipeline pipeline: machine learning pipeline
         :param Pipeline pipeline: machine learning pipeline
             that will be evaluated with cross-validation
             that will be evaluated with cross-validation
-        :param cross_validation: a function that has the same
-             signature as sklearn.model_selection.cross_validate
 
 
         :return: dictionary with the aggregated
         :return: dictionary with the aggregated
-            cross-validation score and
-            the score variance.
+            cross-validation scores and
+            the score variances for the scores in the output
+            of the cross-validation function.
+
+            form of the output:
+                {"score": 10, #score used in optimization,
+                 "score_variance": 0.5
+                 "additional_metric1": 5,
+                 "additional_metric1_variance": 7}
+
+            a custom cross-validation function can also include for
+            example probability threshold for each fold, then
+            the output of this function will include the average
+            value and the variance of the probability threshold
+            over the folds.
         """
         """
         try:
         try:
-
-            scoring = {"score": make_scorer(self._cost_func)}
+            scoring = {"score": make_scorer(self.cost_func)}
 
 
             scoring.update({metric_name: make_scorer(metric)
             scoring.update({metric_name: make_scorer(metric)
                             for metric_name, metric
                             for metric_name, metric
@@ -543,13 +575,19 @@ class PipelineSelector(ABC):
                     estimator=pipeline,
                     estimator=pipeline,
                     X=self._X,
                     X=self._X,
                     y=self._y,
                     y=self._y,
-                    cv=self._cv or 5,
-                    scoring=scoring,
+                    cv=self._cv,
+                    scoring=self._scoring,
                     error_score=np.nan)
                     error_score=np.nan)
 
 
+            averaging_funcs = {
+                    metric_name: self._additional_averaging_funcs[metric_name]
+                    if metric_name in self._additional_averaging_funcs
+                    else self._cross_val_averaging_func
+                    for metric_name in scores}
+
             scores_average = {
             scores_average = {
                     metric_name.replace("test_", ""):
                     metric_name.replace("test_", ""):
-                    self._cross_val_averaging_func(scores[metric_name])
+                    averaging_funcs[metric_name](scores[metric_name])
                     for metric_name in scores
                     for metric_name in scores
                     if metric_name.startswith("test")}
                     if metric_name.startswith("test")}
 
 
@@ -563,12 +601,13 @@ class PipelineSelector(ABC):
 
 
         except Exception as e:
         except Exception as e:
             err = "Failed to evaluate pipeline. Exit with error: {}".format(e)
             err = "Failed to evaluate pipeline. Exit with error: {}".format(e)
+
             self._logger.log_and_raise_error(err)
             self._logger.log_and_raise_error(err)
 
 
-    def _objective(self, space_element: dict) -> dict:
+    def _objective(self, space_element: SpaceElementType) -> dict:
         '''
         '''
-        This method is called in search_for_best_pipeline
-        inside the hyperopt fmin method.
+        This method is called in run_trials method
+        that is using the hyperopt fmin opmizer.
 
 
         Uses _evaluate method.
         Uses _evaluate method.
 
 
@@ -581,12 +620,10 @@ class PipelineSelector(ABC):
 
 
         :Warning: fmin minimizes the loss,
         :Warning: fmin minimizes the loss,
         when _evaluate returns a value to be maximized,
         when _evaluate returns a value to be maximized,
-        it should be multiplied by -1 to obtain loss.
+        it is multiplied by -1 to obtain loss.
 
 
-        :param dict space_element: must contain keys
-            name (with the name of the pipeline),
-            pipeline (Pipeline object),
-            params (dict of pipeline params)
+        :param SpaceElementType space_element: element
+            of the space over which the optimization is done
 
 
         :output: dictionary with keys
         :output: dictionary with keys
             loss (minimized value),
             loss (minimized value),
@@ -596,18 +633,9 @@ class PipelineSelector(ABC):
             score_variance,
             score_variance,
             timestamp (end of execution),
             timestamp (end of execution),
             train_time: execution time
             train_time: execution time
+            and other keys given in self.default_summary
         '''
         '''
         try:
         try:
-            assert(isinstance(space_element, dict) and
-                   set(['name', 'pipeline', 'params'])
-                   <= space_element.keys()),\
-                 "Space elements are of wrong form"
-
-            assert(isinstance(space_element['name'], str) and
-                   isinstance(space_element['pipeline'], Pipeline) and
-                   isinstance(space_element['params'], dict)),\
-                "Space elements are of wrong form"
-
             start_time = time.time()
             start_time = time.time()
 
 
             assert(self.attached_data),\
             assert(self.attached_data),\
@@ -615,32 +643,14 @@ class PipelineSelector(ABC):
                  "in order to effectuate the best"
                  "in order to effectuate the best"
                  "pipeline search")
                  "pipeline search")
 
 
-            summary = {}
-
-            if self._strategy_name is not None:
-                summary["strategy_name"] = self._strategy_name
+            summary = deepcopy(self.default_summary)
 
 
-            if isinstance(self._cost_func, str):
-                summary["cost_func"] = self._cost_func
-
-            elif hasattr(self._cost_func, "__name__"):
-                summary["cost_func"] = self._cost_func.__name__
-
-            summary["trials_path"] = self.trials_path
-
-            if self._data_path is not None:
-                summary["data_path"] = self._data_path
-
-            if self._cv_path is not None:
-                summary["cv_path"] = self._cv_path
-
-            summary["start_tuning_time"] = self.start_tuning_time
-
-            summary["iteration"] = self._iteration
-
-            backup_cond = (self._backup_trials_freq is not None) and\
-                ((self._iteration - self._start_iteration - 1) %
-                 self._backup_trials_freq == 0) or\
+            # backup the current trials if the score improved
+            # at previous iteration or every ith iteration
+            # if the backup_trials_freq is set
+            backup_cond = ((self._backup_trials_freq is not None) and
+                           ((self._iteration - self._start_iteration - 1) %
+                            self._backup_trials_freq == 0)) or\
                 self._score_improved
                 self._score_improved
 
 
             if backup_cond:
             if backup_cond:
@@ -666,9 +676,6 @@ class PipelineSelector(ABC):
 
 
             end_time = time.time()
             end_time = time.time()
 
 
-            assert(not np.isnan(result["score"])),\
-                "Score value is not in the output of the _evaluate method"
-
             summary['status'] = STATUS_OK
             summary['status'] = STATUS_OK
             summary.update(result)
             summary.update(result)
             summary['loss'] = self._score_factor * summary['score']
             summary['loss'] = self._score_factor * summary['score']
@@ -695,6 +702,7 @@ class PipelineSelector(ABC):
 
 
             self._logger.warning("Trial failed with error {}".format(e))
             self._logger.warning("Trial failed with error {}".format(e))
 
 
+            summary = {}
             summary['status'] = STATUS_FAIL
             summary['status'] = STATUS_FAIL
             summary['timestamp'] = datetime.datetime.today()
             summary['timestamp'] = datetime.datetime.today()
             summary['error'] = e
             summary['error'] = e
@@ -725,11 +733,10 @@ class PipelineSelector(ABC):
     def best_trial(self) -> dict:
     def best_trial(self) -> dict:
         """
         """
         Best trial sor far.
         Best trial sor far.
-         Should contain the best pipeline,
-         best hyperparameters,
-         as well as an output of the self._objective method,
-         but the exact form of the output depends on the implementation
-         of the Trials object.
+         Should contain the status, pipeline,
+         hyperparameters, and the score (loss).
+         Other information is otional and is defined
+         by self.default_summary
         """
         """
         pass
         pass
 
 
@@ -743,6 +750,7 @@ class PipelineSelector(ABC):
     @abstractproperty
     @abstractproperty
     def best_trial_score_variance(self) -> float:
     def best_trial_score_variance(self) -> float:
         """
         """
+        Variance of the cross-validation score of the best pipeline
         """
         """
         pass
         pass
 
 
@@ -771,12 +779,11 @@ class PipelineSelector(ABC):
         pass
         pass
 
 
     @abstractmethod
     @abstractmethod
-    def trials_to_excel(self, path: str):
+    def trials_to_excel(self, path: str) -> None:
         """
         """
         Trials object in the shape of table written to excel,
         Trials object in the shape of table written to excel,
         should contain the iteration, pipeline (as str),
         should contain the iteration, pipeline (as str),
         hyperparamters (as str), self.best_result (see self._objective method)
         hyperparamters (as str), self.best_result (see self._objective method)
-        as well as additional information configured
-        through self.save_result method.
+        as well as additional information defined by self.default_summary
         """
         """
         pass
         pass

+ 21 - 10
cdplib/utils/CleaningUtils.py

@@ -8,13 +8,16 @@ Created on Fri Sep 27 16:20:03 2019
 
 
 import pandas as pd
 import pandas as pd
 import numpy as np
 import numpy as np
+from typing import Union, Any, List
 
 
 
 
 class CleaningUtils:
 class CleaningUtils:
     '''
     '''
     Unites different methods for data cleaning
     Unites different methods for data cleaning
     '''
     '''
-    def convert_dates(series: pd.Series, formats: (str, list)) -> pd.Series:
+    def convert_dates(self,
+                      series: pd.Series[Any],
+                      formats: Union[str, List[str]]) -> pd.Series:
         '''
         '''
         Converts values from string to date in a pandas Series
         Converts values from string to date in a pandas Series
          where possibly multiple date formats are mixed
          where possibly multiple date formats are mixed
@@ -29,8 +32,7 @@ class CleaningUtils:
 
 
                 series = series.astype(str)
                 series = series.astype(str)
 
 
-                series.loc[missing_leading_zero] = "0" +\
-                    series.loc[missing_leading_zero]
+                series.loc[missing_leading_zero] += "0"
 
 
             converted_this_format = pd.to_datetime(series,
             converted_this_format = pd.to_datetime(series,
                                                    format=formt,
                                                    format=formt,
@@ -71,21 +73,28 @@ class CleaningUtils:
 
 
         return s
         return s
 
 
-    def melt_duplicated_columns(self, df: pd.DataFrame, suffix: str = "", prefix: str = "") -> pd.DataFrame:
+    def melt_duplicated_columns(self, df: pd.DataFrame,
+                                suffix: str = "",
+                                prefix: str = "") -> pd.DataFrame:
         '''
         '''
         If a dataframe has multiple columns with the same name
         If a dataframe has multiple columns with the same name
          (up to a prefix or a suffix),
          (up to a prefix or a suffix),
          melts the columns together in one
          melts the columns together in one
 
 
-        :parame suffix: string or regex up to which we consider names as duplicated
-        :parame prefix: string or regex up to which we consider names as duplicated
+        :parame suffix: string or regex up
+            to which we consider names as duplicated
+        :parame prefix: string or rege
+            up to which we consider names as duplicated
         '''
         '''
         from collections import Counter
         from collections import Counter
 
 
         import re
         import re
 
 
-        # remove the suffix and the prefix from the column names (now the duplicates are truely duplicates)
-        df.columns = [re.sub(re.compile(prefix), "", re.sub(re.compile(suffix), "", c)) for c in df.columns]
+        # remove the suffix and the prefix from the column names
+        # (now the duplicates are truely duplicates)
+        df.columns = [re.sub(re.compile(prefix), "",
+                             re.sub(re.compile(suffix), "", c))
+                      for c in df.columns]
 
 
         column_counter = Counter(df.columns)
         column_counter = Counter(df.columns)
 
 
@@ -100,10 +109,12 @@ class CleaningUtils:
             df_melted = []
             df_melted = []
 
 
             for dup_var in dup_vars:
             for dup_var in dup_vars:
-                dup_var_melted = pd.melt(frame=df, id_vars=id_vars, value_vars=[dup_var], value_name=dup_var)\
+                dup_var_melted = pd.melt(frame=df,
+                                         id_vars=id_vars,
+                                         value_vars=[dup_var],
+                                         value_name=dup_var)\
                                    .set_index(id_vars)[dup_var]
                                    .set_index(id_vars)[dup_var]
 
 
                 df_melted.append(dup_var_melted)
                 df_melted.append(dup_var_melted)
 
 
             return pd.concat(df_melted, axis=1, sort=False).reset_index()
             return pd.concat(df_melted, axis=1, sort=False).reset_index()
-

+ 36 - 0
cdplib/utils/TypeConverter.py

@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Apr 24 09:06:13 2020
+
+@author: tanya
+"""
+
+import numpy as np
+import pandas as pd
+
+class TypeConverter:
+    """
+    Library for methods to manage python types
+    """
+    def __init__(self):
+        """
+        """
+        from cdplib.log import Log
+
+        self._logger = Log("TypeConverter")
+
+    def convert_to_ndarray(self, x: (pd.DataFrame, np.ndarray)) -> np.ndarray:
+        '''
+        Converts an DataFrame to an numpy array.
+        '''
+        if isinstance(x, np.ndarray):
+            return x
+
+        elif (isinstance(x, pd.core.frame.DataFrame))\
+                or (isinstance(x, pd.core.series.Series)):
+            return x.values
+
+        else:
+            self._logger.log_and_raise_error_stack_info(
+                    'The argument must be a numpy array or a pandas DataFrame')