tests with invalid box-cox transformation

This commit is contained in:
Hannes Signer 2025-02-28 14:50:25 +01:00
parent 79e50f47a2
commit b8147d006b
2 changed files with 1601 additions and 57 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,5 +1,5 @@
import keras
from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU
from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LayerNormalization, LeakyReLU
import tensorflow as tf
import h5py
import numpy as np
@ -17,6 +17,8 @@ from collections import Counter
import os
from sklearn import set_config
from importlib import reload
from scipy.stats import boxcox
from scipy.special import inv_boxcox
set_config(transform_output="pandas")
@ -354,14 +356,14 @@ def mass_balance_evaluation(model, X, preprocess):
# backtransform min/max or standard scaler
if preprocess.scaler_X is None:
if preprocess.scaler_X is not None:
X = pd.DataFrame(
preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
columns=columns,
)
prediction = pd.DataFrame(
preprocess.scaler_y.inverse_transform(prediction), columns=columns
)
)
# apply backtransformation if log transformation was applied
if preprocess.func_dict_out is not None:
@ -424,6 +426,7 @@ class preprocessing:
self.func_dict_out = func_dict_out if func_dict_out is not None else None
self.state = {"cluster": False, "log": False,
"balance": False, "scale": False}
self.boxcox_dict = None
def funcTranform(self, *args):
"""Apply the transformation function to the data columnwise.
@ -434,7 +437,7 @@ class preprocessing:
for i in args:
for key in i.keys():
if "Class" not in key:
i[key] = i[key].apply(self.func_dict_in)
i.loc[:, key] = i[key].apply(self.func_dict_in)
self.state["log"] = True
return args
@ -447,10 +450,51 @@ class preprocessing:
for i in args:
for key in i.keys():
if "Class" not in key:
i[key] = i[key].apply(self.func_dict_out)
i.loc[:, key] = i[key].apply(self.func_dict_out)
self.state["log"] = False
return args
def boxcox_transform(self, *args):
c = 1
transformed_dataframes = []
dict_boxcox_list = []
if (len(args) != 2):
raise Exception("Only two dataframes are allowed for boxcox transformation")
for i in args:
dict_boxcox = {}
transformed_df = i.copy()
for key in transformed_df.columns:
if "Class" not in key:
transformed, lambda_i = boxcox(transformed_df[key] + c)
transformed_df[key] = transformed
dict_boxcox[key] = lambda_i
transformed_dataframes.append(transformed_df)
dict_boxcox_list.append(dict_boxcox)
self.boxcox_dict = dict_boxcox_list
return transformed_dataframes
def boxcox_inverse(self, *args):
c = 1
inverse_dataframes = []
if(len(args) != 2):
raise Exception("Only two dataframes are allowed for boxcox transformation")
j = 0
for i in args:
inverse_df = i.copy()
for key in inverse_df.columns:
if "Class" not in key:
inverse_df[key] = inv_boxcox(inverse_df[key], self.boxcox_dict[j][key]) - c
j += 1
inverse_dataframes.append(inverse_df)
return inverse_dataframes
def cluster(self, X, y, species="Barite", n_clusters=2, x_length=50, y_length=50):
"""Apply k-means clustering to the data to differentiate betweeen reactive and non-reactive cells.