tests with invalid box-cox transformation

This commit is contained in:
Hannes Signer 2025-02-28 14:50:25 +01:00
parent 79e50f47a2
commit b8147d006b
2 changed files with 1601 additions and 57 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
import keras import keras
from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LayerNormalization, LeakyReLU
import tensorflow as tf import tensorflow as tf
import h5py import h5py
import numpy as np import numpy as np
@ -17,6 +17,8 @@ from collections import Counter
import os import os
from sklearn import set_config from sklearn import set_config
from importlib import reload from importlib import reload
from scipy.stats import boxcox
from scipy.special import inv_boxcox
set_config(transform_output="pandas") set_config(transform_output="pandas")
@ -354,14 +356,14 @@ def mass_balance_evaluation(model, X, preprocess):
# backtransform min/max or standard scaler # backtransform min/max or standard scaler
if preprocess.scaler_X is None: if preprocess.scaler_X is not None:
X = pd.DataFrame( X = pd.DataFrame(
preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]), preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
columns=columns, columns=columns,
) )
prediction = pd.DataFrame( prediction = pd.DataFrame(
preprocess.scaler_y.inverse_transform(prediction), columns=columns preprocess.scaler_y.inverse_transform(prediction), columns=columns
) )
# apply backtransformation if log transformation was applied # apply backtransformation if log transformation was applied
if preprocess.func_dict_out is not None: if preprocess.func_dict_out is not None:
@ -424,6 +426,7 @@ class preprocessing:
self.func_dict_out = func_dict_out if func_dict_out is not None else None self.func_dict_out = func_dict_out if func_dict_out is not None else None
self.state = {"cluster": False, "log": False, self.state = {"cluster": False, "log": False,
"balance": False, "scale": False} "balance": False, "scale": False}
self.boxcox_dict = None
def funcTranform(self, *args): def funcTranform(self, *args):
"""Apply the transformation function to the data columnwise. """Apply the transformation function to the data columnwise.
@ -434,7 +437,7 @@ class preprocessing:
for i in args: for i in args:
for key in i.keys(): for key in i.keys():
if "Class" not in key: if "Class" not in key:
i[key] = i[key].apply(self.func_dict_in) i.loc[:, key] = i[key].apply(self.func_dict_in)
self.state["log"] = True self.state["log"] = True
return args return args
@ -447,10 +450,51 @@ class preprocessing:
for i in args: for i in args:
for key in i.keys(): for key in i.keys():
if "Class" not in key: if "Class" not in key:
i[key] = i[key].apply(self.func_dict_out) i.loc[:, key] = i[key].apply(self.func_dict_out)
self.state["log"] = False self.state["log"] = False
return args return args
def boxcox_transform(self, *args):
c = 1
transformed_dataframes = []
dict_boxcox_list = []
if (len(args) != 2):
raise Exception("Only two dataframes are allowed for boxcox transformation")
for i in args:
dict_boxcox = {}
transformed_df = i.copy()
for key in transformed_df.columns:
if "Class" not in key:
transformed, lambda_i = boxcox(transformed_df[key] + c)
transformed_df[key] = transformed
dict_boxcox[key] = lambda_i
transformed_dataframes.append(transformed_df)
dict_boxcox_list.append(dict_boxcox)
self.boxcox_dict = dict_boxcox_list
return transformed_dataframes
def boxcox_inverse(self, *args):
c = 1
inverse_dataframes = []
if(len(args) != 2):
raise Exception("Only two dataframes are allowed for boxcox transformation")
j = 0
for i in args:
inverse_df = i.copy()
for key in inverse_df.columns:
if "Class" not in key:
inverse_df[key] = inv_boxcox(inverse_df[key], self.boxcox_dict[j][key]) - c
j += 1
inverse_dataframes.append(inverse_df)
return inverse_dataframes
def cluster(self, X, y, species="Barite", n_clusters=2, x_length=50, y_length=50): def cluster(self, X, y, species="Barite", n_clusters=2, x_length=50, y_length=50):
"""Apply k-means clustering to the data to differentiate betweeen reactive and non-reactive cells. """Apply k-means clustering to the data to differentiate betweeen reactive and non-reactive cells.