tests with invalid box-cox transformation

2025-12-15 20:28:22 +01:00 · 2025-02-28 14:50:25 +01:00 · 2025-02-28 14:50:25 +01:00 · b8147d006b
commit b8147d006b
parent 79e50f47a2
2 changed files with 1601 additions and 57 deletions
--- a/src/POET_Training.ipynb
+++ b/src/POET_Training.ipynb
--- a/src/preprocessing.py
+++ b/src/preprocessing.py
@ -1,5 +1,5 @@
 import keras
-from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU
+from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LayerNormalization, LeakyReLU
 import tensorflow as tf
 import h5py
 import numpy as np
@ -17,6 +17,8 @@ from collections import Counter
 import os
 from sklearn import set_config
 from importlib import reload
 from scipy.stats import boxcox
 from scipy.special import inv_boxcox
 set_config(transform_output="pandas")
@ -354,14 +356,14 @@ def mass_balance_evaluation(model, X, preprocess):
    # backtransform min/max or standard scaler
-    if preprocess.scaler_X is None:
+    if preprocess.scaler_X is not None:
        X = pd.DataFrame(
        preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
        columns=columns,
        )
        prediction = pd.DataFrame(
            preprocess.scaler_y.inverse_transform(prediction), columns=columns
-        )      
+            )      
    # apply backtransformation if log transformation was applied
    if preprocess.func_dict_out is not None:
@ -424,6 +426,7 @@ class preprocessing:
        self.func_dict_out = func_dict_out if func_dict_out is not None else None
        self.state = {"cluster": False, "log": False,
                      "balance": False, "scale": False}
        self.boxcox_dict = None
    def funcTranform(self, *args):
        """Apply the transformation function to the data columnwise.
@ -434,7 +437,7 @@ class preprocessing:
        for i in args:
            for key in i.keys():
                if "Class" not in key:
-                    i[key] = i[key].apply(self.func_dict_in)
+                    i.loc[:, key] = i[key].apply(self.func_dict_in)
        self.state["log"] = True
        return args
@ -447,10 +450,51 @@ class preprocessing:
        for i in args:
            for key in i.keys():
                if "Class" not in key:
-                    i[key] = i[key].apply(self.func_dict_out)
+                    i.loc[:, key] = i[key].apply(self.func_dict_out)
        self.state["log"] = False
        return args
    def boxcox_transform(self, *args):
        c = 1
        transformed_dataframes = []
        dict_boxcox_list = []
        if (len(args) != 2):
            raise Exception("Only two dataframes are allowed for boxcox transformation")
        for i in args:
            dict_boxcox = {}
            transformed_df = i.copy()
            for key in transformed_df.columns:
                if "Class" not in key:
                    transformed, lambda_i = boxcox(transformed_df[key] + c)
                    transformed_df[key] = transformed 
                    dict_boxcox[key] = lambda_i
            transformed_dataframes.append(transformed_df) 
            dict_boxcox_list.append(dict_boxcox)
        self.boxcox_dict = dict_boxcox_list
        return transformed_dataframes  
    def boxcox_inverse(self, *args):
        c = 1
        inverse_dataframes = []
        if(len(args) != 2):
            raise Exception("Only two dataframes are allowed for boxcox transformation")
        j = 0
        for i in args:    
            inverse_df = i.copy()
            for key in inverse_df.columns: 
                if "Class" not in key:
                    inverse_df[key] = inv_boxcox(inverse_df[key], self.boxcox_dict[j][key]) - c
            j += 1
            inverse_dataframes.append(inverse_df) 
        return inverse_dataframes
    def cluster(self, X, y, species="Barite", n_clusters=2, x_length=50, y_length=50):
        """Apply k-means clustering to the data to differentiate betweeen reactive and non-reactive cells.