Browse Source

Merge branch 'master' of https://intra.acdp.at/gogs/tanja/cdplib

ogert 5 years ago
parent
commit
0dbddbd6d6
1 changed files with 21 additions and 11 deletions
  1. 21 11
      cdplib/db_migration/DataFrameToCollection.py

+ 21 - 11
cdplib/db_migration/DataFrameToCollection.py

@@ -10,7 +10,6 @@ Created on Mon Jul 22 11:05:47 2019
 """
 
 import pandas as pd
-import numpy as np
 import os
 import sys
 
@@ -78,7 +77,7 @@ class DataFrameToCollection():
             schema = self.schema
 
         for field in schema["properties"]:
-
+            
             if field not in self._unroll_nested_names(data.columns):
                 continue
 
@@ -112,25 +111,28 @@ class DataFrameToCollection():
             elif field_type == "object":
 
                 sub_schema = deepcopy(schema["properties"][field])
-
+            
                 # rename sub-schema properties to match with data column names
                 sub_schema["properties"] =\
                     {".".join([field, k]): v for k, v
                      in sub_schema["properties"].items()}
-
+                
                 sub_data = self.to_list_of_documents(
                             data=data,
                             schema=sub_schema,
                             grp_fields=grp_fields,
                             _final_step=False)
-
-                reshaped_field = sub_data.apply(self._make_dict, axis=1)
-                reshaped_field.name = field
-
-                reshaped_fields.append(reshaped_field)
+                
+                # Need to be checked since child elements can be empty
+                if sub_data is not None:
+                    reshaped_field = sub_data.apply(self._make_dict, axis=1)
+                    reshaped_field.name = field
+    
+                    reshaped_fields.append(reshaped_field)
 
             # if field is a list of dictionaries
             elif field_type == "array":
+             
 
                 items_type = schema["properties"][field]["items"]["bsonType"]
 
@@ -155,7 +157,7 @@ class DataFrameToCollection():
 
                         self._log.error(err)
                         raise Exception(err)
-
+                        
                     # group and reshape sub-fields with complex types
                     sub_data = self.to_list_of_documents(
                                 data=data,
@@ -303,7 +305,7 @@ class DataFrameToCollection():
 
 if __name__ == "__main__":
 
-    # Testing
+#     Testing
 
     df = pd.DataFrame({
                        "a": [1]*8 + [2]*8,
@@ -394,3 +396,11 @@ if __name__ == "__main__":
                     data=df,
                     schema=schm,
                     grp_fields=grp_fields)
+
+    
+    
+    
+    
+    
+    
+