Quellcode durchsuchen

resolved conflicts: added deferenced schema

tanja vor 5 Jahren
Ursprung
Commit
316f568a5c

+ 10 - 0
README.md

@@ -2,6 +2,16 @@
 
 Install cdplib via pipenv: `pipenv install -e git+https://readonly:readonly@intra.acdp.at/gogs/tanja/cdplib.git#egg=cdplib  `
 
+
+#### Install via Pipfile
+
+* To install the master branch: Add to the \[packages\] section of the Pipfile the line:
+cdplib = {editable = true, git = "https://readonly:readonly@intra.acdp.at/gogs/tanja/cdplib.git"}
+
+* To install any other branch:
+cdplib = {editable = true, ref = "BRANCH", git = "https://readonly:readonly@intra.acdp.at/gogs/tanja/cdplib.git"}
+
+
 ### Adding new Features/Folders/Packages
 How to set up a new Package: 
 

+ 3 - 2
cdplib/db_migration/DataFrameToCollection.py

@@ -90,8 +90,9 @@ class DataFrameToCollection():
 
                 # check that there is only one possible value of this field
                 n_distinct_values = data.groupby(grp_fields, sort=False)[field].nunique().max()
-
-                if n_distinct_values != 1:
+                
+                #n_distinct_valus can be 0 if the column only contains NaN values
+                if n_distinct_values > 1:
                     err = "Field {0} is not unique with respect to {1}"\
                           .format(field, grp_fields)
 

+ 9 - 9
cdplib/db_migration/MigrationCleaning.py

@@ -36,7 +36,7 @@ class MigrationCleaning:
                  schema_parser: type = ParseJsonSchema):
         '''
         '''
-        self._log = Log('Migration Cleaning')
+        self.log = Log('Migration Cleaning')
         self._exception_handler = ExceptionsHandler()
 
         if inconsist_report_table is not None:
@@ -161,7 +161,7 @@ class MigrationCleaning:
                     (target_field in target_types) and\
                     (target_types[target_field] != source_types[source_field]):
 
-                self.log_and_raise(("Type {0} of field {1} "
+                self.log.log_and_raise_error(("Type {0} of field {1} "
                                     "in schema does not match "
                                     "type {2} of field {3} in "
                                     "migration mapping")
@@ -269,7 +269,7 @@ class MigrationCleaning:
         del data_inconsist
         gc.collect()
 
-        self._log.warning(("Filtering: {0} ."
+        self.log.warning(("Filtering: {0} ."
                            "Filtered {1} rows "
                            "and {2} instances"
                            .format(reason, n_rows_filtered, n_instances_filtered)))
@@ -335,11 +335,11 @@ class MigrationCleaning:
 
             except Exception as e:
 
-                self._exception_handler.log_and_raise(("Failed to replace {0} values "
+               self.log.log_and_raise_error(("Failed to replace {0} values "
                                     "in {1}. Exit with error {2}"
                                     .format(default_str, column, e)))
 
-        self._log.info("Replaced {} values".format(default_str))
+        self.log.info("Replaced {} values".format(default_str))
 
         return data
 
@@ -372,7 +372,7 @@ class MigrationCleaning:
 
                 elif (python_type == int) and data[column].isnull().any():
 
-                    self.log_and_raise(("Column {} contains missing values "
+                    self.log.log_and_raise_error(("Column {} contains missing values "
                                         "and cannot be of integer type"
                                         .format(column)))
 
@@ -387,7 +387,7 @@ class MigrationCleaning:
 
                 if data[column].dtype != python_type:
 
-                    self._log.warning(("After conversion type in {0} "
+                    self.log.warning(("After conversion type in {0} "
                                        "should be {1} "
                                        "but is still {2}"
                                        .format(column,
@@ -396,11 +396,11 @@ class MigrationCleaning:
 
             except Exception as e:
 
-                self._exception_handler.log_and_raise(("Failed to convert types in {0}. "
+                self.log.log_and_raise_error(("Failed to convert types in {0}. "
                                     "Exit with error {1}"
                                     .format(column, e)))
 
-        self._log.info("Converted dtypes")
+        self.log.info("Converted dtypes")
 
         return data
 

+ 2 - 2
cdplib/db_migration/ParseJsonSchema.py

@@ -49,9 +49,9 @@ class ParseJsonSchema(ParseDbSchema):
         for schema_path in schema_paths:
             try:
                 with open(schema_path, "r") as f:
-                    self.schemas.append(json.load(f))
+                   schema = json.load(f)
                 # Load schmea dereferenced and cleaned by default values
-                # self.schemas.append(self.read_schema_and_parse_for_mongodb(schema_path))
+                self.schemas.append(self._dereference_schema(schema))
 
             except Exception as e:
                 err = ("Could not load json schema {0}, "