|
@@ -255,9 +255,11 @@ class MigrationCleaning:
|
|
columns = db.get_column_names(tablename=self._inconsist_report_table)
|
|
columns = db.get_column_names(tablename=self._inconsist_report_table)
|
|
|
|
|
|
if len(columns) > 0:
|
|
if len(columns) > 0:
|
|
- columns_not_in_data = [column for column in columns if column not in data.columns]
|
|
|
|
- for value in columns_not_in_data:
|
|
|
|
- data_inconsist[value] = 'Column does not exist in the mongo database and has therefore been dropped'
|
|
|
|
|
|
+ # TODO Tanya:The commented lines caused the reason to be the same for all entries.
|
|
|
|
+
|
|
|
|
+ #columns_not_in_data = [column for column in columns if column not in data.columns]
|
|
|
|
+ #for value in columns_not_in_data:
|
|
|
|
+ # data_inconsist[value] = 'Column does not exist in the mongo database and has therefore been dropped'
|
|
data_inconsist = data_inconsist[columns]
|
|
data_inconsist = data_inconsist[columns]
|
|
|
|
|
|
db.append_to_table(data=data_inconsist,
|
|
db.append_to_table(data=data_inconsist,
|
|
@@ -396,7 +398,7 @@ class MigrationCleaning:
|
|
data[column] = data[column].astype(python_type)
|
|
data[column] = data[column].astype(python_type)
|
|
|
|
|
|
elif python_type == float:
|
|
elif python_type == float:
|
|
-
|
|
|
|
|
|
+
|
|
data[column] = data[column].fillna(np.inf)
|
|
data[column] = data[column].fillna(np.inf)
|
|
# Replaces empty fields when type is string
|
|
# Replaces empty fields when type is string
|
|
if data[column].dtypes == object:
|
|
if data[column].dtypes == object:
|
|
@@ -564,15 +566,15 @@ class MigrationCleaning:
|
|
return data
|
|
return data
|
|
|
|
|
|
def clean_json_from_None_object(self, data: pd.DataFrame, clean_bool: bool = True) -> pd.DataFrame():
|
|
def clean_json_from_None_object(self, data: pd.DataFrame, clean_bool: bool = True) -> pd.DataFrame():
|
|
-
|
|
|
|
|
|
+
|
|
data = data.to_json(date_format="iso")
|
|
data = data.to_json(date_format="iso")
|
|
data = json.loads(data)
|
|
data = json.loads(data)
|
|
new_data = remap(data, lambda p, k, v: v is not None)
|
|
new_data = remap(data, lambda p, k, v: v is not None)
|
|
new_data = remap(new_data, lambda p, k, v: v != 'None')
|
|
new_data = remap(new_data, lambda p, k, v: v != 'None')
|
|
new_data = remap(new_data, lambda p, k, v: v != 'inf')
|
|
new_data = remap(new_data, lambda p, k, v: v != 'inf')
|
|
- # cleans not only bool type also int which are 0 or 1
|
|
|
|
|
|
+ # cleans not only bool type also int which are 0 or 1
|
|
# only use if it is necessary have to be change that it only considers
|
|
# only use if it is necessary have to be change that it only considers
|
|
- # Ture and False for bools
|
|
|
|
|
|
+ # Ture and False for bools
|
|
if clean_bool:
|
|
if clean_bool:
|
|
new_data = remap(new_data, lambda p, k, v: (isinstance(v,bool) or (not isinstance(v,bool) and bool(v))))
|
|
new_data = remap(new_data, lambda p, k, v: (isinstance(v,bool) or (not isinstance(v,bool) and bool(v))))
|
|
return new_data
|
|
return new_data
|
|
@@ -588,27 +590,27 @@ class MigrationCleaning:
|
|
|
|
|
|
|
|
|
|
def map_toleranzen_values(self, data: pd.DataFrame, toleranzen: pd.DataFrame):
|
|
def map_toleranzen_values(self, data: pd.DataFrame, toleranzen: pd.DataFrame):
|
|
-
|
|
|
|
|
|
+
|
|
toleranzen.drop('nr', axis=1, inplace=True)
|
|
toleranzen.drop('nr', axis=1, inplace=True)
|
|
-
|
|
|
|
|
|
+
|
|
toleranzen.columns = ['toleranzbez_wellen_reference', 'toleranzbez_innenring_reference', 'wellenschenkel.geometrie.durchmesser.min', 'wellenschenkel.geometrie.durchmesser.max', 'innenring.geometrie.durchmesser.min',
|
|
toleranzen.columns = ['toleranzbez_wellen_reference', 'toleranzbez_innenring_reference', 'wellenschenkel.geometrie.durchmesser.min', 'wellenschenkel.geometrie.durchmesser.max', 'innenring.geometrie.durchmesser.min',
|
|
'innenring.geometrie.durchmesser.max', 'wellenschenkel_innenring_difference.geometrie.durchmesser.min', 'wellenschenkel_innenring_difference.geometrie.durchmesser.max']
|
|
'innenring.geometrie.durchmesser.max', 'wellenschenkel_innenring_difference.geometrie.durchmesser.min', 'wellenschenkel_innenring_difference.geometrie.durchmesser.max']
|
|
|
|
|
|
- labyrinten_drop_columns = ['innenring.geometrie.durchmesser.min', 'innenring.geometrie.durchmesser.max',
|
|
|
|
|
|
+ labyrinten_drop_columns = ['innenring.geometrie.durchmesser.min', 'innenring.geometrie.durchmesser.max',
|
|
'wellenschenkel_innenring_difference.geometrie.durchmesser.min', 'wellenschenkel_innenring_difference.geometrie.durchmesser.max']
|
|
'wellenschenkel_innenring_difference.geometrie.durchmesser.min', 'wellenschenkel_innenring_difference.geometrie.durchmesser.max']
|
|
-
|
|
|
|
|
|
+
|
|
labyrinten_columns= ['toleranzbez_wellen_reference', 'toleranzbez_innenring_reference', 'labyrinthring.geometrie.durchmesser.min', 'labyrinthring.geometrie.durchmesser.max']
|
|
labyrinten_columns= ['toleranzbez_wellen_reference', 'toleranzbez_innenring_reference', 'labyrinthring.geometrie.durchmesser.min', 'labyrinthring.geometrie.durchmesser.max']
|
|
-
|
|
|
|
|
|
+
|
|
reparatur_stufe_labyrinten_columns= ['toleranzbez_wellen_reference', 'toleranzbez_innenring_reference', 'labyrinthring.reparatur_stufe.durchmesser.min', 'labyrinthring.reparatur_stufe.durchmesser.max']
|
|
reparatur_stufe_labyrinten_columns= ['toleranzbez_wellen_reference', 'toleranzbez_innenring_reference', 'labyrinthring.reparatur_stufe.durchmesser.min', 'labyrinthring.reparatur_stufe.durchmesser.max']
|
|
|
|
|
|
- reparatur_stufe_columns = ['toleranzbez_wellen_reference', 'toleranzbez_innenring_reference', 'wellenschenkel.reparatur_stufe.durchmesser.min',
|
|
|
|
|
|
+ reparatur_stufe_columns = ['toleranzbez_wellen_reference', 'toleranzbez_innenring_reference', 'wellenschenkel.reparatur_stufe.durchmesser.min',
|
|
'wellenschenkel.reparatur_stufe.durchmesser.max', 'innenring.reparatur_stufe.durchmesser.min',
|
|
'wellenschenkel.reparatur_stufe.durchmesser.max', 'innenring.reparatur_stufe.durchmesser.min',
|
|
- 'innenring.reparatur_stufe.durchmesser.max', 'wellenschenkel_innenring_difference.reparatur_stufe.durchmesser.min',
|
|
|
|
|
|
+ 'innenring.reparatur_stufe.durchmesser.max', 'wellenschenkel_innenring_difference.reparatur_stufe.durchmesser.min',
|
|
'wellenschenkel_innenring_difference.reparatur_stufe.durchmesser.max']
|
|
'wellenschenkel_innenring_difference.reparatur_stufe.durchmesser.max']
|
|
|
|
|
|
-
|
|
|
|
|
|
+
|
|
toleranzen_reference_columns = ['wellenschenkel_toleranz', 'labyrinthring_toleranz', 'wellen_reparatur_stufe_toleranz', 'labyrinthring_reparatur_stufe_toleranz']
|
|
toleranzen_reference_columns = ['wellenschenkel_toleranz', 'labyrinthring_toleranz', 'wellen_reparatur_stufe_toleranz', 'labyrinthring_reparatur_stufe_toleranz']
|
|
-
|
|
|
|
|
|
+
|
|
available_columns = [column for column in data.columns if column in toleranzen_reference_columns]
|
|
available_columns = [column for column in data.columns if column in toleranzen_reference_columns]
|
|
for column in available_columns:
|
|
for column in available_columns:
|
|
merge_map = [False] *len(data.index)
|
|
merge_map = [False] *len(data.index)
|
|
@@ -623,13 +625,13 @@ class MigrationCleaning:
|
|
|
|
|
|
else:
|
|
else:
|
|
temp_toleranzen.columns = labyrinten_columns
|
|
temp_toleranzen.columns = labyrinten_columns
|
|
-
|
|
|
|
|
|
+
|
|
elif 'reparatur_stufe' in column:
|
|
elif 'reparatur_stufe' in column:
|
|
temp_toleranzen.columns = reparatur_stufe_columns
|
|
temp_toleranzen.columns = reparatur_stufe_columns
|
|
merge_map = data['innenring_reparatur_stufe_zulaessig'] == 'Ja'
|
|
merge_map = data['innenring_reparatur_stufe_zulaessig'] == 'Ja'
|
|
data_before = len(data.index)
|
|
data_before = len(data.index)
|
|
data = data.merge(temp_toleranzen, how='left', left_on=column, right_on='toleranzbez_wellen_reference')
|
|
data = data.merge(temp_toleranzen, how='left', left_on=column, right_on='toleranzbez_wellen_reference')
|
|
- data.loc[merge_map, temp_toleranzen.columns] = np.nan
|
|
|
|
|
|
+ data.loc[merge_map, temp_toleranzen.columns] = np.nan
|
|
if data_before != len(data.index):
|
|
if data_before != len(data.index):
|
|
print('WEVE LOST DATA!!')
|
|
print('WEVE LOST DATA!!')
|
|
print('before:', data_before, 'now:', len(data.index))
|
|
print('before:', data_before, 'now:', len(data.index))
|
|
@@ -641,9 +643,9 @@ class MigrationCleaning:
|
|
|
|
|
|
def label_is_level(
|
|
def label_is_level(
|
|
self,
|
|
self,
|
|
- data: pd.DataFrame,
|
|
|
|
- column: str = "is",
|
|
|
|
- include_schrott: bool = False,
|
|
|
|
|
|
+ data: pd.DataFrame,
|
|
|
|
+ column: str = "is",
|
|
|
|
+ include_schrott: bool = False,
|
|
drop_rows_with_no_is: bool = False) -> pd.DataFrame:
|
|
drop_rows_with_no_is: bool = False) -> pd.DataFrame:
|
|
'''
|
|
'''
|
|
'''
|
|
'''
|
|
@@ -659,16 +661,16 @@ class MigrationCleaning:
|
|
data.loc[data[column].isin(v), column] = k
|
|
data.loc[data[column].isin(v), column] = k
|
|
else:
|
|
else:
|
|
data.loc[data[column].isnull(), column] = k
|
|
data.loc[data[column].isnull(), column] = k
|
|
-
|
|
|
|
|
|
+
|
|
if include_schrott and ("operation_type_2" in data.columns):
|
|
if include_schrott and ("operation_type_2" in data.columns):
|
|
schrott_mask = (data["operation_type_2"] == 2)
|
|
schrott_mask = (data["operation_type_2"] == 2)
|
|
data.loc[schrott_mask, column] = 5
|
|
data.loc[schrott_mask, column] = 5
|
|
-
|
|
|
|
|
|
+
|
|
data.loc[~data[column].isin([0,1,2,3,4,5]), column] = 0
|
|
data.loc[~data[column].isin([0,1,2,3,4,5]), column] = 0
|
|
-
|
|
|
|
|
|
+
|
|
if drop_rows_with_no_is:
|
|
if drop_rows_with_no_is:
|
|
data = data.loc[data[column] != 0].copy(deep=True)
|
|
data = data.loc[data[column] != 0].copy(deep=True)
|
|
-
|
|
|
|
|
|
+
|
|
return data.reset_index(drop=True)
|
|
return data.reset_index(drop=True)
|
|
|
|
|
|
|
|
|