Selaa lähdekoodia

added a method query_and_insert_into_collection to MongodbHanlder

tanja 4 vuotta sitten
vanhempi
commit
08957e7472
1 muutettua tiedostoa jossa 27 lisäystä ja 18 poistoa
  1. 27 18
      cdplib/db_handlers/MongodbHandler.py

+ 27 - 18
cdplib/db_handlers/MongodbHandler.py

@@ -278,7 +278,7 @@ class MongodbHandler:
                     query = find_query
 
                 data = self._database[collection_name].find(query,return_values)
-                
+
             else:
                 query = {attribute: {comparison_operator: attribute_value}}
                 data = self._database[collection_name].find(query, return_values)
@@ -293,6 +293,15 @@ class MongodbHandler:
             else:
                 return data
 
+    def query_and_insert_into_collection(self,
+                                         input_collection_name: str,
+                                         output_collection_name: str,
+                                         query: dict = None):
+        """
+        """
+        self._database[output_collection_name].insert(
+                self._database[input_collection_name].find({}).toArray())
+
     def aggregate_data_and_generate_dataframe(self, collection_name: str, aggregation_pipeline: list, index: str = None, return_as_dataframe=True):
 
         try:
@@ -300,18 +309,18 @@ class MongodbHandler:
         except Exception as error:
             self._log.log_and_raise_error(('A problem occured when aggregating the collection {} with the pipeline {}. \nError: {}').format(collection_name, aggregation_pipeline, error))
             return None
-       
+
         if return_as_dataframe:
             return self.convert_mongo_data_into_dataframe(data, index, collection_name)
         else:
             return data
-      
+
     def convert_mongo_data_into_dataframe(self, data, index: str = None, collection_name: str = None, chunksize: int = 500) -> pd.DataFrame():
 
         start_time = time.time()
         '''
         self._log.info('Converting returned mongo data into a DataFrame')
-        
+
         data = list(data)
         try:
             if len(data)> 0:
@@ -330,7 +339,7 @@ class MongodbHandler:
         except Exception as error:
             self._log.log_and_raise_error(('An error occured trying to convert mongo data into pd.Dataframe. \nError: {} ').format(error))
         '''
-    
+
         frames = []
         records = []
         for iteration, value in enumerate(data):
@@ -356,11 +365,11 @@ class MongodbHandler:
             return_df.set_index(index, inplace=True)
 
         self._log.info(('{} Rows were fetched from {}. DataFrame conversion is done, took {} seconds').format(len(return_df.index), collection_name if collection_name is not None else 'the database', time.time()-start_time))
-        
+
         return return_df
 
- 
-        
+
+
 
     #def update_data_in_collection(self, query_label: str, query_value: str, update_label:str, update_value: str, collection_name:str):
     #    self._database[collection_name].update_one({query_label:query_value}, {"$set": {update_label: update_value}})
@@ -382,7 +391,7 @@ class MongodbHandler:
         '''
         if type(data) == list:
             self._database[collection_name].update_one({query_label:query_value}, {"$push": {update_label: {"$each": data}}})
-            self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))   
+            self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))
         else:
             self._database[collection_name].update_one({query_label:query_value}, {"$push": {update_label: data}})
             self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))
@@ -410,7 +419,7 @@ class MongodbHandler:
         '''
         assert(isinstance(collection_name, str)),\
             "Parameter 'collection_name' must be a string type"
-       
+
         if return_values is None:
             return_values = {'_id': return_id}
 
@@ -422,7 +431,7 @@ class MongodbHandler:
                     query = find_query
                 else:
                     query = {date_label: {'$gt': from_date_value, '$lt': to_date_value}}
-                
+
                 data = self._database[collection_name].find(query, return_values)
 
             except Exception as error:
@@ -492,8 +501,8 @@ class MongodbHandler:
         if find_query is None:
             if query_label and query_value:
                 find_query = {query_label:query_value}
-                
-                                             
+
+
         try:
             if update_many:
                 if find_query is not None:
@@ -528,17 +537,17 @@ class MongodbHandler:
             return [value[query_label] for value in data]
         else:
             return []
-        
+
     def get_distinct_value_of_key(self, collection_name: str, key: str):
-        
+
         assert(isinstance(collection_name, str)),\
             "Parameter 'collection_name' must be a string type"
-        
+
         assert(isinstance(key, str)),\
             "Parameter 'key' must be a string type"
-        
+
         data = self._database[collection_name].distinct(key)
-        
+
         return data