Sfoglia il codice sorgente

Trying to speed up convert mongo into dataframe

ogert 4 anni fa
parent
commit
fd00b2ea13

+ 6 - 2
cdplib/db_handlers/MongodbHandler.py

@@ -325,6 +325,8 @@ class MongodbHandler:
             frames.append(pd.DataFrame(records))
         return_df = pd.concat(frames, axis=0, sort=False)
 
+
+        print(index)
         if index is not None:
             return_df.set_index(index, inplace=True)
 
@@ -365,7 +367,7 @@ class MongodbHandler:
         '''
         return self._database[collection_name].find({query_label:query_value}).count() > 0
 
-    def query_data_between_dates_and_generate_dataframe(self, collection_name: str, date_label: str, from_date_value: str, to_date_value: str, index: str = None, return_as_dataframe: bool = True):
+    def query_data_between_dates_and_generate_dataframe(self, collection_name: str, date_label: str, from_date_value: str, to_date_value: str, index: str = None, return_id: bool = False, return_as_dataframe: bool = True):
         '''
             Queries data between two dates.
 
@@ -376,8 +378,10 @@ class MongodbHandler:
             :param str index:
             :param bool return_as_dataframe:
         '''
+        assert(isinstance(collection_name, str)),\
+            "Parameter 'collection_name' must be a string type"
         try:
-            data = self._database[collection_name].find({date_label: {'$gt': from_date_value, '$lt': to_date_value}})
+            data = self._database[collection_name].find({date_label: {'$gt': from_date_value, '$lt': to_date_value}}, {'_id': return_id})
 
         except Exception as error:
             self._log.log_and_raise_error(('An error occured trying to query data from {}, with query {}: $gt:{}, $lt:{}. \nError:{}').format(collection_name, date_label, from_date_value, to_date_value, error))

+ 12 - 3
cdplib/unit_tests/TestMongodbHandler.py

@@ -26,12 +26,14 @@ class TestMongodbHandler(unittest.TestCase):
         self.valid_input = {
                         "test_value_string": "test_value",
                         "test_value_double": 2.4,
-                        "test_value_double_array": [1.4, 1.6, 3.5]
+                        "test_value_double_array": [1.4, 1.6, 3.5],
+                        "test_value_date": "2020-01-28T15:45:25.000Z"
                         }
         self.invalid_input = {
                         "test_value_string": 1,
                         "test_value_double": "Wrong value",
-                        "test_value_double_array": [1.4, 1.6, 3.5]
+                        "test_value_double_array": [1.4, 1.6, 3.5],
+                        "test_value_date": "2019-01-28T15:45:25.000Z"
                         }
 
 
@@ -82,6 +84,7 @@ class TestMongodbHandler(unittest.TestCase):
         Fetch data and confirms thats it is the same as was entered into the database
         Do the same with more specific query
         '''
+
         self.assertEqual(self.mongodb_handler.query_data_and_generate_dataframe(self.first_collection_name).to_dict()['test_value_double'][0], self.valid_input['test_value_double'])
         self.assertEqual(self.mongodb_handler.query_data_and_generate_dataframe(self.first_collection_name, 'test_value_string', 'test_value').to_dict()['test_value_double'][0], self.valid_input['test_value_double'])
 
@@ -94,7 +97,7 @@ class TestMongodbHandler(unittest.TestCase):
                                 { '$match': {}}
                                 ]
         self.assertEqual(self.mongodb_handler.aggregate_data_and_generate_dataframe(self.first_collection_name, aggregation_pipeline).to_dict()['test_value_double'][0], self.valid_input['test_value_double'])
-    
+
     def test_G_update_data_in_collection(self):
         '''
         Fetch data from database
@@ -117,6 +120,12 @@ class TestMongodbHandler(unittest.TestCase):
         index = 'test_value_string'
         self.mongodb_handler.create_index(self.first_collection_name, index)
         self.assertTrue(index in list(self.database[self.first_collection_name].index_information().keys()))
+
+    def test_I_query_data_between_dates_and_generate_dataframe(self):
+
+            data = self.mongodb_handler.query_data_between_dates_and_generate_dataframe(self.first_collection_name, "test_value_date", "2020-01-27T15:45:25.000Z", "2020-01-29T15:45:25.000Z", index ='test_value_string')
+            print(data)
+            self.assertEqual(data['test_value_double'][0], self.valid_input['test_value_double'])
     
     def test_Y_drop_collection(self):
         '''