update linting

This commit is contained in:
Hannes Signer 2025-02-26 18:51:40 +01:00
parent 5f4c863b39
commit 471038de50

View File

@ -95,23 +95,25 @@ def custom_loss(
loss_variant="huber",
delta=1.0,
):
"""Custom tensorflow loss function to combine Huber Loss with mass balance.
This is inspired by PINN (Physics Informed Neural Networks) where the loss function is a combination of the physics-based loss and the data-driven loss.
The mass balance is a physics-based loss that ensures the conservation of mass in the system.
A tensorflow loss function accepts only the two arguments y_true and y_pred. Therefore, a nested function is used to pass the additional arguments.
"""
Custom tensorflow loss function to combine Huber Loss with mass balance.
This is inspired by PINN (Physics Informed Neural Networks) where the loss function is a combination of the physics-based loss and the data-driven loss.
The mass balance is a physics-based loss that ensures the conservation of mass in the system.
A tensorflow loss function accepts only the two arguments y_true and y_pred. Therefore, a nested function is used to pass the additional arguments.
Args:
preprocess: preprocessing object
column_dict: dictionary with the column names as keys and the corresponding index as values. (i.e {'H': 0, 'O': 1, 'Ba': 2, 'Cl': 3, 'S': 4, 'Sr': 5, 'Barite': 6, 'Celestite': 7})
h1: hyperparameter for the importance of the huber loss
h2: hyperparameter for the importance of the Barium mass balance term
h3: hyperparameter for the importance of the Strontium mass balance term
scaler_type: Normalization approach. Choose between "standard" and "minmax". Defaults to "minmax".
loss_variant: Loss function approach. Choose between "huber and "huber_mass_balance". Defaults to "huber".
delta: Hyperparameter for the Huber function threshold. Defaults to 1.0.
Args:
preprocess: preprocessing object
column_dict: dictionary with the column names as keys and the corresponding index as values.
(i.e {'H': 0, 'O': 1, 'Ba': 2, 'Cl': 3, 'S': 4, 'Sr': 5, 'Barite': 6, 'Celestite': 7})
h1: hyperparameter for the importance of the huber loss
h2: hyperparameter for the importance of the Barium mass balance term
h3: hyperparameter for the importance of the Strontium mass balance term
scaler_type: Normalization approach. Choose between "standard" and "minmax". Defaults to "minmax".
loss_variant: Loss function approach. Choose between "huber and "huber_mass_balance". Defaults to "huber".
delta: Hyperparameter for the Huber function threshold. Defaults to 1.0.
Returns:
loss function
Returns:
loss function
"""
# as far as I know tensorflow does not directly support the use of scaler objects
@ -122,12 +124,14 @@ def custom_loss(
preprocess.scaler_X.data_range_, dtype=tf.float32
)
min_X = tf.convert_to_tensor(
preprocess.scaler_X.data_min_, dtype=tf.float32)
preprocess.scaler_X.data_min_, dtype=tf.float32
)
scale_y = tf.convert_to_tensor(
preprocess.scaler_y.data_range_, dtype=tf.float32
)
min_y = tf.convert_to_tensor(
preprocess.scaler_y.data_min_, dtype=tf.float32)
preprocess.scaler_y.data_min_, dtype=tf.float32
)
elif scaler_type == "standard":
scale_X = tf.convert_to_tensor(
@ -140,10 +144,14 @@ def custom_loss(
preprocess.scaler_y.mean_, dtype=tf.float32)
else:
raise Exception("No valid scaler type found. Choose between 'standard' and 'minmax'.")
raise Exception(
"No valid scaler type found. Choose between 'standard' and 'minmax'."
)
except AttributeError:
raise Exception("Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training.")
raise Exception(
"Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training."
)
def loss(results, predicted):
# inverse min/max scaling
@ -194,7 +202,8 @@ def custom_loss(
total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr
else:
raise Exception(
"No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'.")
"No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'."
)
return total_loss
@ -213,7 +222,6 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
mean of both mass balance terms
"""
if scaler_type == "minmax":
scale_X = tf.convert_to_tensor(
preprocess.scaler_X.data_range_, dtype=tf.float32
@ -332,23 +340,28 @@ def mass_balance_evaluation(model, X, preprocess):
)
mass_balance_result = pd.DataFrame(
{"dBa":dBa, "dSr":dSr, "mass_balance":dBa+dSr, "Class": classes}
{"dBa": dBa, "dSr": dSr, "mass_balance": dBa + dSr, "Class": classes}
)
return mass_balance_result
def mass_balance_ratio(results, threshold=1e-5):
proportion = {}
mass_balance_threshold = results[results["mass_balance"] <= threshold]
overall = len(mass_balance_threshold)
class_0_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 0])
class_1_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 1])
class_0_amount = len(
mass_balance_threshold[mass_balance_threshold["Class"] == 0])
class_1_amount = len(
mass_balance_threshold[mass_balance_threshold["Class"] == 1])
proportion["overall"] = overall / len(results)
proportion["class_0"] = class_0_amount / len(results[results["Class"] == 0])
proportion["class_1"] = class_1_amount / len(results[results["Class"] == 1])
proportion["class_0"] = class_0_amount / \
len(results[results["Class"] == 0])
proportion["class_1"] = class_1_amount / \
len(results[results["Class"] == 1])
return proportion
@ -552,7 +565,7 @@ class preprocessing:
self.state["scale"] = True
def scale_transform(self, X_train, X_test, y_train, y_test):
""" Apply learned scaler on datasets.
"""Apply learned scaler on datasets.
Args:
X_train: design training data