|
@@ -278,7 +278,7 @@ class MongodbHandler:
|
|
query = find_query
|
|
query = find_query
|
|
|
|
|
|
data = self._database[collection_name].find(query,return_values)
|
|
data = self._database[collection_name].find(query,return_values)
|
|
-
|
|
|
|
|
|
+
|
|
else:
|
|
else:
|
|
query = {attribute: {comparison_operator: attribute_value}}
|
|
query = {attribute: {comparison_operator: attribute_value}}
|
|
data = self._database[collection_name].find(query, return_values)
|
|
data = self._database[collection_name].find(query, return_values)
|
|
@@ -293,6 +293,33 @@ class MongodbHandler:
|
|
else:
|
|
else:
|
|
return data
|
|
return data
|
|
|
|
|
|
|
|
+ def aggregate_and_insert_into_collection(self,
|
|
|
|
+ input_collection_name: str,
|
|
|
|
+ output_collection_name: str,
|
|
|
|
+ aggregation_pipeline: list = None):
|
|
|
|
+ """
|
|
|
|
+ """
|
|
|
|
+ if aggregation_pipeline is None:
|
|
|
|
+ aggregation_pipeline = [{"$out": output_collection_name}]
|
|
|
|
+ else:
|
|
|
|
+ aggregation_pipeline.append({"$out": output_collection_name})
|
|
|
|
+
|
|
|
|
+ self.aggregate_data_and_generate_dataframe(
|
|
|
|
+ collection_name=input_collection_name,
|
|
|
|
+ aggregation_pipeline=aggregation_pipeline)
|
|
|
|
+
|
|
|
|
+ def index_collection(self, collection_name: str, keys: list):
|
|
|
|
+ """
|
|
|
|
+ :param keys: compound indexes for the collection,
|
|
|
|
+ is either a list of tuples of shape (field_name, 1) or (field_name, -1)
|
|
|
|
+ for the indexing order, or a tuple of field namse, then the second element of the
|
|
|
|
+ tuple is set to 1
|
|
|
|
+ """
|
|
|
|
+ keys = [(key, 1) if not isinstance(key, tuple) else key for key in keys]
|
|
|
|
+
|
|
|
|
+ self._database[collection_name].create_index(keys)
|
|
|
|
+
|
|
|
|
+
|
|
def aggregate_data_and_generate_dataframe(self, collection_name: str, aggregation_pipeline: list, index: str = None, return_as_dataframe=True):
|
|
def aggregate_data_and_generate_dataframe(self, collection_name: str, aggregation_pipeline: list, index: str = None, return_as_dataframe=True):
|
|
|
|
|
|
try:
|
|
try:
|
|
@@ -300,18 +327,18 @@ class MongodbHandler:
|
|
except Exception as error:
|
|
except Exception as error:
|
|
self._log.log_and_raise_error(('A problem occured when aggregating the collection {} with the pipeline {}. \nError: {}').format(collection_name, aggregation_pipeline, error))
|
|
self._log.log_and_raise_error(('A problem occured when aggregating the collection {} with the pipeline {}. \nError: {}').format(collection_name, aggregation_pipeline, error))
|
|
return None
|
|
return None
|
|
-
|
|
|
|
|
|
+
|
|
if return_as_dataframe:
|
|
if return_as_dataframe:
|
|
return self.convert_mongo_data_into_dataframe(data, index, collection_name)
|
|
return self.convert_mongo_data_into_dataframe(data, index, collection_name)
|
|
else:
|
|
else:
|
|
return data
|
|
return data
|
|
-
|
|
|
|
|
|
+
|
|
def convert_mongo_data_into_dataframe(self, data, index: str = None, collection_name: str = None, chunksize: int = 500) -> pd.DataFrame():
|
|
def convert_mongo_data_into_dataframe(self, data, index: str = None, collection_name: str = None, chunksize: int = 500) -> pd.DataFrame():
|
|
|
|
|
|
start_time = time.time()
|
|
start_time = time.time()
|
|
'''
|
|
'''
|
|
self._log.info('Converting returned mongo data into a DataFrame')
|
|
self._log.info('Converting returned mongo data into a DataFrame')
|
|
-
|
|
|
|
|
|
+
|
|
data = list(data)
|
|
data = list(data)
|
|
try:
|
|
try:
|
|
if len(data)> 0:
|
|
if len(data)> 0:
|
|
@@ -330,7 +357,7 @@ class MongodbHandler:
|
|
except Exception as error:
|
|
except Exception as error:
|
|
self._log.log_and_raise_error(('An error occured trying to convert mongo data into pd.Dataframe. \nError: {} ').format(error))
|
|
self._log.log_and_raise_error(('An error occured trying to convert mongo data into pd.Dataframe. \nError: {} ').format(error))
|
|
'''
|
|
'''
|
|
-
|
|
|
|
|
|
+
|
|
frames = []
|
|
frames = []
|
|
records = []
|
|
records = []
|
|
for iteration, value in enumerate(data):
|
|
for iteration, value in enumerate(data):
|
|
@@ -356,11 +383,11 @@ class MongodbHandler:
|
|
return_df.set_index(index, inplace=True)
|
|
return_df.set_index(index, inplace=True)
|
|
|
|
|
|
self._log.info(('{} Rows were fetched from {}. DataFrame conversion is done, took {} seconds').format(len(return_df.index), collection_name if collection_name is not None else 'the database', time.time()-start_time))
|
|
self._log.info(('{} Rows were fetched from {}. DataFrame conversion is done, took {} seconds').format(len(return_df.index), collection_name if collection_name is not None else 'the database', time.time()-start_time))
|
|
-
|
|
|
|
|
|
+
|
|
return return_df
|
|
return return_df
|
|
|
|
|
|
-
|
|
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+
|
|
|
|
|
|
#def update_data_in_collection(self, query_label: str, query_value: str, update_label:str, update_value: str, collection_name:str):
|
|
#def update_data_in_collection(self, query_label: str, query_value: str, update_label:str, update_value: str, collection_name:str):
|
|
# self._database[collection_name].update_one({query_label:query_value}, {"$set": {update_label: update_value}})
|
|
# self._database[collection_name].update_one({query_label:query_value}, {"$set": {update_label: update_value}})
|
|
@@ -382,7 +409,7 @@ class MongodbHandler:
|
|
'''
|
|
'''
|
|
if type(data) == list:
|
|
if type(data) == list:
|
|
self._database[collection_name].update_one({query_label:query_value}, {"$push": {update_label: {"$each": data}}})
|
|
self._database[collection_name].update_one({query_label:query_value}, {"$push": {update_label: {"$each": data}}})
|
|
- self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))
|
|
|
|
|
|
+ self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))
|
|
else:
|
|
else:
|
|
self._database[collection_name].update_one({query_label:query_value}, {"$push": {update_label: data}})
|
|
self._database[collection_name].update_one({query_label:query_value}, {"$push": {update_label: data}})
|
|
self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))
|
|
self._log.info(('A document has been pushed into the {} array in the {} collection').format(query_value, collection_name))
|
|
@@ -410,7 +437,7 @@ class MongodbHandler:
|
|
'''
|
|
'''
|
|
assert(isinstance(collection_name, str)),\
|
|
assert(isinstance(collection_name, str)),\
|
|
"Parameter 'collection_name' must be a string type"
|
|
"Parameter 'collection_name' must be a string type"
|
|
-
|
|
|
|
|
|
+
|
|
if return_values is None:
|
|
if return_values is None:
|
|
return_values = {'_id': return_id}
|
|
return_values = {'_id': return_id}
|
|
|
|
|
|
@@ -422,7 +449,7 @@ class MongodbHandler:
|
|
query = find_query
|
|
query = find_query
|
|
else:
|
|
else:
|
|
query = {date_label: {'$gt': from_date_value, '$lt': to_date_value}}
|
|
query = {date_label: {'$gt': from_date_value, '$lt': to_date_value}}
|
|
-
|
|
|
|
|
|
+
|
|
data = self._database[collection_name].find(query, return_values)
|
|
data = self._database[collection_name].find(query, return_values)
|
|
|
|
|
|
except Exception as error:
|
|
except Exception as error:
|
|
@@ -492,8 +519,8 @@ class MongodbHandler:
|
|
if find_query is None:
|
|
if find_query is None:
|
|
if query_label and query_value:
|
|
if query_label and query_value:
|
|
find_query = {query_label:query_value}
|
|
find_query = {query_label:query_value}
|
|
-
|
|
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+
|
|
try:
|
|
try:
|
|
if update_many:
|
|
if update_many:
|
|
if find_query is not None:
|
|
if find_query is not None:
|
|
@@ -528,17 +555,26 @@ class MongodbHandler:
|
|
return [value[query_label] for value in data]
|
|
return [value[query_label] for value in data]
|
|
else:
|
|
else:
|
|
return []
|
|
return []
|
|
-
|
|
|
|
|
|
+
|
|
def get_distinct_value_of_key(self, collection_name: str, key: str):
|
|
def get_distinct_value_of_key(self, collection_name: str, key: str):
|
|
-
|
|
|
|
|
|
+
|
|
assert(isinstance(collection_name, str)),\
|
|
assert(isinstance(collection_name, str)),\
|
|
"Parameter 'collection_name' must be a string type"
|
|
"Parameter 'collection_name' must be a string type"
|
|
-
|
|
|
|
|
|
+
|
|
assert(isinstance(key, str)),\
|
|
assert(isinstance(key, str)),\
|
|
"Parameter 'key' must be a string type"
|
|
"Parameter 'key' must be a string type"
|
|
-
|
|
|
|
|
|
+
|
|
data = self._database[collection_name].distinct(key)
|
|
data = self._database[collection_name].distinct(key)
|
|
-
|
|
|
|
|
|
+
|
|
|
|
+ return data
|
|
|
|
+
|
|
|
|
+ def get_number_of_entries_in_collection(self, collection_name: str):
|
|
|
|
+
|
|
|
|
+ assert(isinstance(collection_name, str)),\
|
|
|
|
+ "Parameter 'collection_name' must be a string type"
|
|
|
|
+
|
|
|
|
+ data = self._database[collection_name].count()
|
|
|
|
+
|
|
return data
|
|
return data
|
|
|
|
|
|
|
|
|