mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-15 20:28:22 +01:00
tests with invalid box-cox transformation
This commit is contained in:
parent
79e50f47a2
commit
b8147d006b
File diff suppressed because one or more lines are too long
@ -1,5 +1,5 @@
|
|||||||
import keras
|
import keras
|
||||||
from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU
|
from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LayerNormalization, LeakyReLU
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import h5py
|
import h5py
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -17,6 +17,8 @@ from collections import Counter
|
|||||||
import os
|
import os
|
||||||
from sklearn import set_config
|
from sklearn import set_config
|
||||||
from importlib import reload
|
from importlib import reload
|
||||||
|
from scipy.stats import boxcox
|
||||||
|
from scipy.special import inv_boxcox
|
||||||
|
|
||||||
set_config(transform_output="pandas")
|
set_config(transform_output="pandas")
|
||||||
|
|
||||||
@ -354,14 +356,14 @@ def mass_balance_evaluation(model, X, preprocess):
|
|||||||
# backtransform min/max or standard scaler
|
# backtransform min/max or standard scaler
|
||||||
|
|
||||||
|
|
||||||
if preprocess.scaler_X is None:
|
if preprocess.scaler_X is not None:
|
||||||
X = pd.DataFrame(
|
X = pd.DataFrame(
|
||||||
preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
|
preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
|
||||||
columns=columns,
|
columns=columns,
|
||||||
)
|
)
|
||||||
prediction = pd.DataFrame(
|
prediction = pd.DataFrame(
|
||||||
preprocess.scaler_y.inverse_transform(prediction), columns=columns
|
preprocess.scaler_y.inverse_transform(prediction), columns=columns
|
||||||
)
|
)
|
||||||
|
|
||||||
# apply backtransformation if log transformation was applied
|
# apply backtransformation if log transformation was applied
|
||||||
if preprocess.func_dict_out is not None:
|
if preprocess.func_dict_out is not None:
|
||||||
@ -424,6 +426,7 @@ class preprocessing:
|
|||||||
self.func_dict_out = func_dict_out if func_dict_out is not None else None
|
self.func_dict_out = func_dict_out if func_dict_out is not None else None
|
||||||
self.state = {"cluster": False, "log": False,
|
self.state = {"cluster": False, "log": False,
|
||||||
"balance": False, "scale": False}
|
"balance": False, "scale": False}
|
||||||
|
self.boxcox_dict = None
|
||||||
|
|
||||||
def funcTranform(self, *args):
|
def funcTranform(self, *args):
|
||||||
"""Apply the transformation function to the data columnwise.
|
"""Apply the transformation function to the data columnwise.
|
||||||
@ -434,7 +437,7 @@ class preprocessing:
|
|||||||
for i in args:
|
for i in args:
|
||||||
for key in i.keys():
|
for key in i.keys():
|
||||||
if "Class" not in key:
|
if "Class" not in key:
|
||||||
i[key] = i[key].apply(self.func_dict_in)
|
i.loc[:, key] = i[key].apply(self.func_dict_in)
|
||||||
self.state["log"] = True
|
self.state["log"] = True
|
||||||
return args
|
return args
|
||||||
|
|
||||||
@ -447,10 +450,51 @@ class preprocessing:
|
|||||||
for i in args:
|
for i in args:
|
||||||
for key in i.keys():
|
for key in i.keys():
|
||||||
if "Class" not in key:
|
if "Class" not in key:
|
||||||
i[key] = i[key].apply(self.func_dict_out)
|
i.loc[:, key] = i[key].apply(self.func_dict_out)
|
||||||
self.state["log"] = False
|
self.state["log"] = False
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
def boxcox_transform(self, *args):
|
||||||
|
c = 1
|
||||||
|
transformed_dataframes = []
|
||||||
|
dict_boxcox_list = []
|
||||||
|
if (len(args) != 2):
|
||||||
|
raise Exception("Only two dataframes are allowed for boxcox transformation")
|
||||||
|
|
||||||
|
for i in args:
|
||||||
|
dict_boxcox = {}
|
||||||
|
transformed_df = i.copy()
|
||||||
|
|
||||||
|
for key in transformed_df.columns:
|
||||||
|
if "Class" not in key:
|
||||||
|
transformed, lambda_i = boxcox(transformed_df[key] + c)
|
||||||
|
transformed_df[key] = transformed
|
||||||
|
dict_boxcox[key] = lambda_i
|
||||||
|
|
||||||
|
transformed_dataframes.append(transformed_df)
|
||||||
|
dict_boxcox_list.append(dict_boxcox)
|
||||||
|
|
||||||
|
self.boxcox_dict = dict_boxcox_list
|
||||||
|
return transformed_dataframes
|
||||||
|
|
||||||
|
def boxcox_inverse(self, *args):
|
||||||
|
c = 1
|
||||||
|
inverse_dataframes = []
|
||||||
|
if(len(args) != 2):
|
||||||
|
raise Exception("Only two dataframes are allowed for boxcox transformation")
|
||||||
|
j = 0
|
||||||
|
for i in args:
|
||||||
|
inverse_df = i.copy()
|
||||||
|
|
||||||
|
for key in inverse_df.columns:
|
||||||
|
if "Class" not in key:
|
||||||
|
|
||||||
|
inverse_df[key] = inv_boxcox(inverse_df[key], self.boxcox_dict[j][key]) - c
|
||||||
|
j += 1
|
||||||
|
inverse_dataframes.append(inverse_df)
|
||||||
|
return inverse_dataframes
|
||||||
|
|
||||||
|
|
||||||
def cluster(self, X, y, species="Barite", n_clusters=2, x_length=50, y_length=50):
|
def cluster(self, X, y, species="Barite", n_clusters=2, x_length=50, y_length=50):
|
||||||
"""Apply k-means clustering to the data to differentiate betweeen reactive and non-reactive cells.
|
"""Apply k-means clustering to the data to differentiate betweeen reactive and non-reactive cells.
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user