update linting

2025-12-15 19:38:21 +01:00 · 2025-02-26 18:51:40 +01:00 · 2025-02-26 18:51:40 +01:00 · 471038de50
commit 471038de50
parent 5f4c863b39
1 changed files with 53 additions and 40 deletions
--- a/src/preprocessing.py
+++ b/src/preprocessing.py
@ -95,14 +95,16 @@ def custom_loss(
    loss_variant="huber",
    delta=1.0,
 ):
-    """Custom tensorflow loss function to combine Huber Loss with mass balance.
+    """
+    Custom tensorflow loss function to combine Huber Loss with mass balance.
    This is inspired by PINN (Physics Informed Neural Networks) where the loss function is a combination of the physics-based loss and the data-driven loss.
    The mass balance is a physics-based loss that ensures the conservation of mass in the system.
    A tensorflow loss function accepts only the two arguments y_true and y_pred. Therefore, a nested function is used to pass the additional arguments.

    Args:
        preprocess: preprocessing object
-           column_dict: dictionary with the column names as keys and the corresponding index as values. (i.e {'H': 0, 'O': 1, 'Ba': 2, 'Cl': 3, 'S': 4, 'Sr': 5, 'Barite': 6, 'Celestite': 7})
+        column_dict: dictionary with the column names as keys and the corresponding index as values. 
+        (i.e {'H': 0, 'O': 1, 'Ba': 2, 'Cl': 3, 'S': 4, 'Sr': 5, 'Barite': 6, 'Celestite': 7})
        h1: hyperparameter for the importance of the huber loss
        h2: hyperparameter for the importance of the Barium mass balance term
        h3: hyperparameter for the importance of the Strontium mass balance term
@ -122,12 +124,14 @@ def custom_loss(
                preprocess.scaler_X.data_range_, dtype=tf.float32
            )
            min_X = tf.convert_to_tensor(
-                preprocess.scaler_X.data_min_, dtype=tf.float32)
+                preprocess.scaler_X.data_min_, dtype=tf.float32
+            )
            scale_y = tf.convert_to_tensor(
                preprocess.scaler_y.data_range_, dtype=tf.float32
            )
            min_y = tf.convert_to_tensor(
-                preprocess.scaler_y.data_min_, dtype=tf.float32)
+                preprocess.scaler_y.data_min_, dtype=tf.float32
+            )

        elif scaler_type == "standard":
            scale_X = tf.convert_to_tensor(
@ -140,10 +144,14 @@ def custom_loss(
                preprocess.scaler_y.mean_, dtype=tf.float32)

        else:
-            raise Exception("No valid scaler type found. Choose between 'standard' and 'minmax'.")
+            raise Exception(
+                "No valid scaler type found. Choose between 'standard' and 'minmax'."
+            )

    except AttributeError:
-        raise Exception("Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training.") 
+        raise Exception(
+            "Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training."
+        )

    def loss(results, predicted):
        # inverse min/max scaling
@ -194,7 +202,8 @@ def custom_loss(
            total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr
        else:
            raise Exception(
-                "No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'.")
+                "No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'."
+            )

        return total_loss

@ -213,7 +222,6 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
        mean of both mass balance terms
    """

-    
    if scaler_type == "minmax":
        scale_X = tf.convert_to_tensor(
            preprocess.scaler_X.data_range_, dtype=tf.float32
@ -337,18 +345,23 @@ def mass_balance_evaluation(model, X, preprocess):

    return mass_balance_result

+
 def mass_balance_ratio(results, threshold=1e-5):
    proportion = {}

    mass_balance_threshold = results[results["mass_balance"] <= threshold]

    overall = len(mass_balance_threshold)
-    class_0_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 0])
-    class_1_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 1])
+    class_0_amount = len(
+        mass_balance_threshold[mass_balance_threshold["Class"] == 0])
+    class_1_amount = len(
+        mass_balance_threshold[mass_balance_threshold["Class"] == 1])

    proportion["overall"] = overall / len(results)
-    proportion["class_0"] = class_0_amount / len(results[results["Class"] == 0])
-    proportion["class_1"] = class_1_amount / len(results[results["Class"] == 1])
+    proportion["class_0"] = class_0_amount / \
+        len(results[results["Class"] == 0])
+    proportion["class_1"] = class_1_amount / \
+        len(results[results["Class"] == 1])

    return proportion