update mass_balance_evaluation

2025-12-13 10:38:22 +01:00 · 2025-02-26 18:32:11 +01:00 · 2025-02-26 18:32:11 +01:00 · 331306c141
commit 331306c141
parent cce2f696a0
1 changed files with 52 additions and 28 deletions
--- a/src/preprocessing.py
+++ b/src/preprocessing.py
@ -116,30 +116,34 @@ def custom_loss(

    # as far as I know tensorflow does not directly support the use of scaler objects
    # therefore, the backtransformation is done manually
-    if scaler_type == "minmax":
-        scale_X = tf.convert_to_tensor(
-            preprocess.scaler_X.data_range_, dtype=tf.float32
-        )
-        min_X = tf.convert_to_tensor(
-            preprocess.scaler_X.data_min_, dtype=tf.float32)
-        scale_y = tf.convert_to_tensor(
-            preprocess.scaler_y.data_range_, dtype=tf.float32
-        )
-        min_y = tf.convert_to_tensor(
-            preprocess.scaler_y.data_min_, dtype=tf.float32)
+    try:
+        if scaler_type == "minmax":
+            scale_X = tf.convert_to_tensor(
+                preprocess.scaler_X.data_range_, dtype=tf.float32
+            )
+            min_X = tf.convert_to_tensor(
+                preprocess.scaler_X.data_min_, dtype=tf.float32)
+            scale_y = tf.convert_to_tensor(
+                preprocess.scaler_y.data_range_, dtype=tf.float32
+            )
+            min_y = tf.convert_to_tensor(
+                preprocess.scaler_y.data_min_, dtype=tf.float32)

-    elif scaler_type == "standard":
-        scale_X = tf.convert_to_tensor(
-            preprocess.scaler_X.scale_, dtype=tf.float32)
-        mean_X = tf.convert_to_tensor(
-            preprocess.scaler_X.mean_, dtype=tf.float32)
-        scale_y = tf.convert_to_tensor(
-            preprocess.scaler_y.scale_, dtype=tf.float32)
-        mean_y = tf.convert_to_tensor(
-            preprocess.scaler_y.mean_, dtype=tf.float32)
-        
-    else:
-        raise Exception("No valid scaler type found. Choose between 'standard' and 'minmax'.")
+        elif scaler_type == "standard":
+            scale_X = tf.convert_to_tensor(
+                preprocess.scaler_X.scale_, dtype=tf.float32)
+            mean_X = tf.convert_to_tensor(
+                preprocess.scaler_X.mean_, dtype=tf.float32)
+            scale_y = tf.convert_to_tensor(
+                preprocess.scaler_y.scale_, dtype=tf.float32)
+            mean_y = tf.convert_to_tensor(
+                preprocess.scaler_y.mean_, dtype=tf.float32)
+
+        else:
+            raise Exception("No valid scaler type found. Choose between 'standard' and 'minmax'.")
+    
+    except AttributeError:
+        raise Exception("Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training.") 

    def loss(results, predicted):
        # inverse min/max scaling
@ -302,8 +306,9 @@ def mass_balance_evaluation(model, X, preprocess):
    
    # predict the chemistry
    columns = X.iloc[:, X.columns != "Class"].columns
+    classes = X["Class"]
+    classes.reset_index(drop=True, inplace=True)
    prediction = pd.DataFrame(model.predict(X[columns]), columns=columns)
-
    # backtransform min/max or standard scaler
    X = pd.DataFrame(
        preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
@ -313,20 +318,39 @@ def mass_balance_evaluation(model, X, preprocess):
        preprocess.scaler_y.inverse_transform(prediction), columns=columns
    )

-    # apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was applied
+    # apply backtransformation if log transformation was applied
    if preprocess.func_dict_out is not None:
-        X = preprocess.funcInverse(X)
+        X = preprocess.funcInverse(X)[0]
+        prediction = preprocess.funcInverse(prediction)[0]

    # calculate mass balance
    dBa = np.abs(
        (prediction["Ba"] + prediction["Barite"]) - (X["Ba"] + X["Barite"]))
-    print(dBa.min())
    dSr = np.abs(
        (prediction["Sr"] + prediction["Celestite"]) -
        (X["Sr"] + X["Celestite"])
    )
+    
+    mass_balance_result = pd.DataFrame(
+        {"dBa":dBa, "dSr":dSr, "mass_balance":dBa+dSr, "Class": classes}
+    )

-    return dBa + dSr
+    return mass_balance_result
+
+def mass_balance_ratio(results, threshold=1e-5):
+    proportion = {}
+    
+    mass_balance_threshold = results[results["mass_balance"] <= threshold]
+    
+    overall = len(mass_balance_threshold)
+    class_0_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 0])
+    class_1_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 1])
+    
+    proportion["overall"] = overall / len(results)
+    proportion["class_0"] = class_0_amount / len(results[results["Class"] == 0])
+    proportion["class_1"] = class_1_amount / len(results[results["Class"] == 1])
+    
+    return proportion


 class preprocessing: