tests with invalid box-cox transformation

2025-12-13 12:18:22 +01:00 · 2025-02-28 14:50:25 +01:00 · 2025-02-28 14:50:25 +01:00 · b8147d006b
commit b8147d006b
parent 79e50f47a2
2 changed files with 1601 additions and 57 deletions
--- a/src/POET_Training.ipynb
+++ b/src/POET_Training.ipynb
--- a/src/preprocessing.py
+++ b/src/preprocessing.py
@ -1,5 +1,5 @@
 import keras
-from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU
+from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LayerNormalization, LeakyReLU
 import tensorflow as tf
 import h5py
 import numpy as np
@ -17,6 +17,8 @@ from collections import Counter
 import os
 from sklearn import set_config
 from importlib import reload
+from scipy.stats import boxcox
+from scipy.special import inv_boxcox

 set_config(transform_output="pandas")

@ -354,14 +356,14 @@ def mass_balance_evaluation(model, X, preprocess):
    # backtransform min/max or standard scaler
    
    
-    if preprocess.scaler_X is None:
+    if preprocess.scaler_X is not None:
        X = pd.DataFrame(
        preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
        columns=columns,
        )
        prediction = pd.DataFrame(
            preprocess.scaler_y.inverse_transform(prediction), columns=columns
-        )      
+            )      

    # apply backtransformation if log transformation was applied
    if preprocess.func_dict_out is not None:
@ -424,6 +426,7 @@ class preprocessing:
        self.func_dict_out = func_dict_out if func_dict_out is not None else None
        self.state = {"cluster": False, "log": False,
                      "balance": False, "scale": False}
+        self.boxcox_dict = None

    def funcTranform(self, *args):
        """Apply the transformation function to the data columnwise.
@ -434,7 +437,7 @@ class preprocessing:
        for i in args:
            for key in i.keys():
                if "Class" not in key:
-                    i[key] = i[key].apply(self.func_dict_in)
+                    i.loc[:, key] = i[key].apply(self.func_dict_in)
        self.state["log"] = True
        return args

@ -447,10 +450,51 @@ class preprocessing:
        for i in args:
            for key in i.keys():
                if "Class" not in key:
-                    i[key] = i[key].apply(self.func_dict_out)
+                    i.loc[:, key] = i[key].apply(self.func_dict_out)
        self.state["log"] = False
        return args

+    def boxcox_transform(self, *args):
+        c = 1
+        transformed_dataframes = []
+        dict_boxcox_list = []
+        if (len(args) != 2):
+            raise Exception("Only two dataframes are allowed for boxcox transformation")
+        
+        for i in args:
+            dict_boxcox = {}
+            transformed_df = i.copy()
+            
+            for key in transformed_df.columns:
+                if "Class" not in key:
+                    transformed, lambda_i = boxcox(transformed_df[key] + c)
+                    transformed_df[key] = transformed 
+                    dict_boxcox[key] = lambda_i
+            
+            transformed_dataframes.append(transformed_df) 
+            dict_boxcox_list.append(dict_boxcox)
+        
+        self.boxcox_dict = dict_boxcox_list
+        return transformed_dataframes  
+    
+    def boxcox_inverse(self, *args):
+        c = 1
+        inverse_dataframes = []
+        if(len(args) != 2):
+            raise Exception("Only two dataframes are allowed for boxcox transformation")
+        j = 0
+        for i in args:    
+            inverse_df = i.copy()
+            
+            for key in inverse_df.columns: 
+                if "Class" not in key:
+                    
+                    inverse_df[key] = inv_boxcox(inverse_df[key], self.boxcox_dict[j][key]) - c
+            j += 1
+            inverse_dataframes.append(inverse_df) 
+        return inverse_dataframes
+
+
    def cluster(self, X, y, species="Barite", n_clusters=2, x_length=50, y_length=50):
        """Apply k-means clustering to the data to differentiate betweeen reactive and non-reactive cells.