Browse Source

Trying to speed up convert mongo into dataframe

ogert 4 years ago
parent
commit
fd00b2ea13
2 changed files with 18 additions and 5 deletions
  1. 6 2
      cdplib/db_handlers/MongodbHandler.py
  2. 12 3
      cdplib/unit_tests/TestMongodbHandler.py

+ 6 - 2
cdplib/db_handlers/MongodbHandler.py

@@ -325,6 +325,8 @@ class MongodbHandler:
             frames.append(pd.DataFrame(records))
             frames.append(pd.DataFrame(records))
         return_df = pd.concat(frames, axis=0, sort=False)
         return_df = pd.concat(frames, axis=0, sort=False)
 
 
+
+        print(index)
         if index is not None:
         if index is not None:
             return_df.set_index(index, inplace=True)
             return_df.set_index(index, inplace=True)
 
 
@@ -365,7 +367,7 @@ class MongodbHandler:
         '''
         '''
         return self._database[collection_name].find({query_label:query_value}).count() > 0
         return self._database[collection_name].find({query_label:query_value}).count() > 0
 
 
-    def query_data_between_dates_and_generate_dataframe(self, collection_name: str, date_label: str, from_date_value: str, to_date_value: str, index: str = None, return_as_dataframe: bool = True):
+    def query_data_between_dates_and_generate_dataframe(self, collection_name: str, date_label: str, from_date_value: str, to_date_value: str, index: str = None, return_id: bool = False, return_as_dataframe: bool = True):
         '''
         '''
             Queries data between two dates.
             Queries data between two dates.
 
 
@@ -376,8 +378,10 @@ class MongodbHandler:
             :param str index:
             :param str index:
             :param bool return_as_dataframe:
             :param bool return_as_dataframe:
         '''
         '''
+        assert(isinstance(collection_name, str)),\
+            "Parameter 'collection_name' must be a string type"
         try:
         try:
-            data = self._database[collection_name].find({date_label: {'$gt': from_date_value, '$lt': to_date_value}})
+            data = self._database[collection_name].find({date_label: {'$gt': from_date_value, '$lt': to_date_value}}, {'_id': return_id})
 
 
         except Exception as error:
         except Exception as error:
             self._log.log_and_raise_error(('An error occured trying to query data from {}, with query {}: $gt:{}, $lt:{}. \nError:{}').format(collection_name, date_label, from_date_value, to_date_value, error))
             self._log.log_and_raise_error(('An error occured trying to query data from {}, with query {}: $gt:{}, $lt:{}. \nError:{}').format(collection_name, date_label, from_date_value, to_date_value, error))

+ 12 - 3
cdplib/unit_tests/TestMongodbHandler.py

@@ -26,12 +26,14 @@ class TestMongodbHandler(unittest.TestCase):
         self.valid_input = {
         self.valid_input = {
                         "test_value_string": "test_value",
                         "test_value_string": "test_value",
                         "test_value_double": 2.4,
                         "test_value_double": 2.4,
-                        "test_value_double_array": [1.4, 1.6, 3.5]
+                        "test_value_double_array": [1.4, 1.6, 3.5],
+                        "test_value_date": "2020-01-28T15:45:25.000Z"
                         }
                         }
         self.invalid_input = {
         self.invalid_input = {
                         "test_value_string": 1,
                         "test_value_string": 1,
                         "test_value_double": "Wrong value",
                         "test_value_double": "Wrong value",
-                        "test_value_double_array": [1.4, 1.6, 3.5]
+                        "test_value_double_array": [1.4, 1.6, 3.5],
+                        "test_value_date": "2019-01-28T15:45:25.000Z"
                         }
                         }
 
 
 
 
@@ -82,6 +84,7 @@ class TestMongodbHandler(unittest.TestCase):
         Fetch data and confirms thats it is the same as was entered into the database
         Fetch data and confirms thats it is the same as was entered into the database
         Do the same with more specific query
         Do the same with more specific query
         '''
         '''
+
         self.assertEqual(self.mongodb_handler.query_data_and_generate_dataframe(self.first_collection_name).to_dict()['test_value_double'][0], self.valid_input['test_value_double'])
         self.assertEqual(self.mongodb_handler.query_data_and_generate_dataframe(self.first_collection_name).to_dict()['test_value_double'][0], self.valid_input['test_value_double'])
         self.assertEqual(self.mongodb_handler.query_data_and_generate_dataframe(self.first_collection_name, 'test_value_string', 'test_value').to_dict()['test_value_double'][0], self.valid_input['test_value_double'])
         self.assertEqual(self.mongodb_handler.query_data_and_generate_dataframe(self.first_collection_name, 'test_value_string', 'test_value').to_dict()['test_value_double'][0], self.valid_input['test_value_double'])
 
 
@@ -94,7 +97,7 @@ class TestMongodbHandler(unittest.TestCase):
                                 { '$match': {}}
                                 { '$match': {}}
                                 ]
                                 ]
         self.assertEqual(self.mongodb_handler.aggregate_data_and_generate_dataframe(self.first_collection_name, aggregation_pipeline).to_dict()['test_value_double'][0], self.valid_input['test_value_double'])
         self.assertEqual(self.mongodb_handler.aggregate_data_and_generate_dataframe(self.first_collection_name, aggregation_pipeline).to_dict()['test_value_double'][0], self.valid_input['test_value_double'])
-    
+
     def test_G_update_data_in_collection(self):
     def test_G_update_data_in_collection(self):
         '''
         '''
         Fetch data from database
         Fetch data from database
@@ -117,6 +120,12 @@ class TestMongodbHandler(unittest.TestCase):
         index = 'test_value_string'
         index = 'test_value_string'
         self.mongodb_handler.create_index(self.first_collection_name, index)
         self.mongodb_handler.create_index(self.first_collection_name, index)
         self.assertTrue(index in list(self.database[self.first_collection_name].index_information().keys()))
         self.assertTrue(index in list(self.database[self.first_collection_name].index_information().keys()))
+
+    def test_I_query_data_between_dates_and_generate_dataframe(self):
+
+            data = self.mongodb_handler.query_data_between_dates_and_generate_dataframe(self.first_collection_name, "test_value_date", "2020-01-27T15:45:25.000Z", "2020-01-29T15:45:25.000Z", index ='test_value_string')
+            print(data)
+            self.assertEqual(data['test_value_double'][0], self.valid_input['test_value_double'])
     
     
     def test_Y_drop_collection(self):
     def test_Y_drop_collection(self):
         '''
         '''