|
@@ -278,7 +278,7 @@ class MongodbHandler:
|
|
|
query = find_query
|
|
|
|
|
|
data = self._database[collection_name].find(query,return_values)
|
|
|
-
|
|
|
+
|
|
|
else:
|
|
|
query = {attribute: {comparison_operator: attribute_value}}
|
|
|
data = self._database[collection_name].find(query, return_values)
|
|
@@ -293,6 +293,15 @@ class MongodbHandler:
|
|
|
else:
|
|
|
return data
|
|
|
|
|
|
+ def query_and_insert_into_collection(self,
|
|
|
+ input_collection_name: str,
|
|
|
+ output_collection_name: str,
|
|
|
+ query: dict = None):
|
|
|
+ """
|
|
|
+ """
|
|
|
+ self._database[output_collection_name].insert(
|
|
|
+ self._database[input_collection_name].find({}).toArray())
|
|
|
+
|
|
|
def aggregate_data_and_generate_dataframe(self, collection_name: str, aggregation_pipeline: list, index: str = None, return_as_dataframe=True):
|
|
|
|
|
|
try:
|
|
@@ -300,18 +309,18 @@ class MongodbHandler:
|
|
|
except Exception as error:
|
|
|
self._log.log_and_raise_error(('A problem occured when aggregating the collection {} with the pipeline {}. \nError: {}').format(collection_name, aggregation_pipeline, error))
|
|
|
return None
|
|
|
-
|
|
|
+
|
|
|
if return_as_dataframe:
|
|
|
return self.convert_mongo_data_into_dataframe(data, index, collection_name)
|
|
|
else:
|
|
|
return data
|
|
|
-
|
|
|
+
|
|
|
def convert_mongo_data_into_dataframe(self, data, index: str = None, collection_name: str = None, chunksize: int = 500) -> pd.DataFrame():
|
|
|
|
|
|
start_time = time.time()
|
|
|
'''
|
|
|
self._log.info('Converting returned mongo data into a DataFrame')
|
|
|
-
|
|
|
+
|
|
|
data = list(data)
|
|
|
try:
|
|
|
if len(data)> 0:
|
|
@@ -330,7 +339,7 @@ class MongodbHandler:
|
|
|
except Exception as error:
|
|
|
self._log.log_and_raise_error(('An error occured trying to convert mongo data into pd.Dataframe. \nError: {} ').format(error))
|
|
|
'''
|
|
|
-
|
|
|
+
|
|
|
frames = []
|
|
|
records = []
|
|
|
for iteration, value in enumerate(data):
|
|
@@ -356,11 +365,11 @@ class MongodbHandler:
|
|
|
return_df.set_index(index, inplace=True)
|
|
|
|
|
|
self._log.info(('{} Rows were fetched from {}. DataFrame conversion is done, took {} seconds').format(len(return_df.index), collection_name if collection_name is not None else 'the database', time.time()-start_time))
|
|
|
-
|
|
|
+
|
|
|
return return_df
|
|
|
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
|
|
|
|
|
|
|
|
@@ -382,7 +391,7 @@ class MongodbHandler:
|
|
|
'''
|
|
|
if type(data) == list:
|
|
|
self._database[collection_name].update_one({query_label:query_value}, {"$push": {update_label: {"$each": data}}})
|
|
|
- self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))
|
|
|
+ self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))
|
|
|
else:
|
|
|
self._database[collection_name].update_one({query_label:query_value}, {"$push": {update_label: data}})
|
|
|
self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))
|
|
@@ -410,7 +419,7 @@ class MongodbHandler:
|
|
|
'''
|
|
|
assert(isinstance(collection_name, str)),\
|
|
|
"Parameter 'collection_name' must be a string type"
|
|
|
-
|
|
|
+
|
|
|
if return_values is None:
|
|
|
return_values = {'_id': return_id}
|
|
|
|
|
@@ -422,7 +431,7 @@ class MongodbHandler:
|
|
|
query = find_query
|
|
|
else:
|
|
|
query = {date_label: {'$gt': from_date_value, '$lt': to_date_value}}
|
|
|
-
|
|
|
+
|
|
|
data = self._database[collection_name].find(query, return_values)
|
|
|
|
|
|
except Exception as error:
|
|
@@ -492,8 +501,8 @@ class MongodbHandler:
|
|
|
if find_query is None:
|
|
|
if query_label and query_value:
|
|
|
find_query = {query_label:query_value}
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
try:
|
|
|
if update_many:
|
|
|
if find_query is not None:
|
|
@@ -528,17 +537,17 @@ class MongodbHandler:
|
|
|
return [value[query_label] for value in data]
|
|
|
else:
|
|
|
return []
|
|
|
-
|
|
|
+
|
|
|
def get_distinct_value_of_key(self, collection_name: str, key: str):
|
|
|
-
|
|
|
+
|
|
|
assert(isinstance(collection_name, str)),\
|
|
|
"Parameter 'collection_name' must be a string type"
|
|
|
-
|
|
|
+
|
|
|
assert(isinstance(key, str)),\
|
|
|
"Parameter 'key' must be a string type"
|
|
|
-
|
|
|
+
|
|
|
data = self._database[collection_name].distinct(key)
|
|
|
-
|
|
|
+
|
|
|
return data
|
|
|
|
|
|
|