浏览代码

Trying to speed up convert mongo into dataframe

ogert 5 年之前
父节点
当前提交
c565344023
共有 1 个文件被更改,包括 16 次插入0 次删除
  1. 16 0
      cdplib/db_handlers/MongodbHandler.py

+ 16 - 0
cdplib/db_handlers/MongodbHandler.py

@@ -291,6 +291,7 @@ class MongodbHandler:
     def convert_mongo_data_into_dataframe(self, data, index: str = None, collection_name: str = None) -> pd.DataFrame():
 
         start_time = time.time()
+        '''
         self._log.info('Converting returned mongo data into a DataFrame')
         df = pd.DataFrame.from_records(data)
         data = list(data)
@@ -311,6 +312,21 @@ class MongodbHandler:
                 self._log.warning(('No data for the query was found').format())
         except Exception as error:
             self._log.log_and_raise_error(('An error occured trying to convert mongo data into pd.Dataframe. \nError: {} ').format(error))
+        '''
+        frames = []
+        records = []
+        for index, value in enumerate(data):
+
+            records.append(value)
+            if index % 1000 == 0:
+                frames.append(pd.DataFrame(records))
+                records = []
+        if records:
+            frames.append(pd.DataFrame(records))
+        return_df = pd.concat(frames)
+        self._log.info(('DataFrame conversion is done, took {} seconds').format(time.time()-start_time))
+        
+        return return_df
 
     #def update_data_in_collection(self, query_label: str, query_value: str, update_label:str, update_value: str, collection_name:str):
     #    self._database[collection_name].update_one({query_label:query_value}, {"$set": {update_label: update_value}})