update linting

This commit is contained in:
Hannes Signer 2025-02-26 18:51:40 +01:00
parent 5f4c863b39
commit 471038de50

View File

@ -95,23 +95,25 @@ def custom_loss(
loss_variant="huber",
delta=1.0,
):
"""Custom tensorflow loss function to combine Huber Loss with mass balance.
This is inspired by PINN (Physics Informed Neural Networks) where the loss function is a combination of the physics-based loss and the data-driven loss.
The mass balance is a physics-based loss that ensures the conservation of mass in the system.
A tensorflow loss function accepts only the two arguments y_true and y_pred. Therefore, a nested function is used to pass the additional arguments.
"""
Custom tensorflow loss function to combine Huber Loss with mass balance.
This is inspired by PINN (Physics Informed Neural Networks) where the loss function is a combination of the physics-based loss and the data-driven loss.
The mass balance is a physics-based loss that ensures the conservation of mass in the system.
A tensorflow loss function accepts only the two arguments y_true and y_pred. Therefore, a nested function is used to pass the additional arguments.
Args:
preprocess: preprocessing object
column_dict: dictionary with the column names as keys and the corresponding index as values. (i.e {'H': 0, 'O': 1, 'Ba': 2, 'Cl': 3, 'S': 4, 'Sr': 5, 'Barite': 6, 'Celestite': 7})
h1: hyperparameter for the importance of the huber loss
h2: hyperparameter for the importance of the Barium mass balance term
h3: hyperparameter for the importance of the Strontium mass balance term
scaler_type: Normalization approach. Choose between "standard" and "minmax". Defaults to "minmax".
loss_variant: Loss function approach. Choose between "huber and "huber_mass_balance". Defaults to "huber".
delta: Hyperparameter for the Huber function threshold. Defaults to 1.0.
Args:
preprocess: preprocessing object
column_dict: dictionary with the column names as keys and the corresponding index as values.
(i.e {'H': 0, 'O': 1, 'Ba': 2, 'Cl': 3, 'S': 4, 'Sr': 5, 'Barite': 6, 'Celestite': 7})
h1: hyperparameter for the importance of the huber loss
h2: hyperparameter for the importance of the Barium mass balance term
h3: hyperparameter for the importance of the Strontium mass balance term
scaler_type: Normalization approach. Choose between "standard" and "minmax". Defaults to "minmax".
loss_variant: Loss function approach. Choose between "huber and "huber_mass_balance". Defaults to "huber".
delta: Hyperparameter for the Huber function threshold. Defaults to 1.0.
Returns:
loss function
Returns:
loss function
"""
# as far as I know tensorflow does not directly support the use of scaler objects
@ -122,12 +124,14 @@ def custom_loss(
preprocess.scaler_X.data_range_, dtype=tf.float32
)
min_X = tf.convert_to_tensor(
preprocess.scaler_X.data_min_, dtype=tf.float32)
preprocess.scaler_X.data_min_, dtype=tf.float32
)
scale_y = tf.convert_to_tensor(
preprocess.scaler_y.data_range_, dtype=tf.float32
)
min_y = tf.convert_to_tensor(
preprocess.scaler_y.data_min_, dtype=tf.float32)
preprocess.scaler_y.data_min_, dtype=tf.float32
)
elif scaler_type == "standard":
scale_X = tf.convert_to_tensor(
@ -140,10 +144,14 @@ def custom_loss(
preprocess.scaler_y.mean_, dtype=tf.float32)
else:
raise Exception("No valid scaler type found. Choose between 'standard' and 'minmax'.")
raise Exception(
"No valid scaler type found. Choose between 'standard' and 'minmax'."
)
except AttributeError:
raise Exception("Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training.")
raise Exception(
"Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training."
)
def loss(results, predicted):
# inverse min/max scaling
@ -194,7 +202,8 @@ def custom_loss(
total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr
else:
raise Exception(
"No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'.")
"No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'."
)
return total_loss
@ -212,8 +221,7 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
Returns:
mean of both mass balance terms
"""
if scaler_type == "minmax":
scale_X = tf.convert_to_tensor(
preprocess.scaler_X.data_range_, dtype=tf.float32
@ -284,7 +292,7 @@ def huber_metric(delta=1.0):
scaler_type (str, optional): _description_. Defaults to "minmax".
delta (float, optional): _description_. Defaults to 1.0.
"""
def huber(results, predicted):
huber_loss = tf.keras.losses.Huber(delta)(results, predicted)
return huber_loss
@ -303,7 +311,7 @@ def mass_balance_evaluation(model, X, preprocess):
Returns:
vector with the mass balance difference for each cell
"""
# predict the chemistry
columns = X.iloc[:, X.columns != "Class"].columns
classes = X["Class"]
@ -330,26 +338,31 @@ def mass_balance_evaluation(model, X, preprocess):
(prediction["Sr"] + prediction["Celestite"]) -
(X["Sr"] + X["Celestite"])
)
mass_balance_result = pd.DataFrame(
{"dBa":dBa, "dSr":dSr, "mass_balance":dBa+dSr, "Class": classes}
{"dBa": dBa, "dSr": dSr, "mass_balance": dBa + dSr, "Class": classes}
)
return mass_balance_result
def mass_balance_ratio(results, threshold=1e-5):
proportion = {}
mass_balance_threshold = results[results["mass_balance"] <= threshold]
overall = len(mass_balance_threshold)
class_0_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 0])
class_1_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 1])
class_0_amount = len(
mass_balance_threshold[mass_balance_threshold["Class"] == 0])
class_1_amount = len(
mass_balance_threshold[mass_balance_threshold["Class"] == 1])
proportion["overall"] = overall / len(results)
proportion["class_0"] = class_0_amount / len(results[results["Class"] == 0])
proportion["class_1"] = class_1_amount / len(results[results["Class"] == 1])
proportion["class_0"] = class_0_amount / \
len(results[results["Class"] == 0])
proportion["class_1"] = class_1_amount / \
len(results[results["Class"] == 1])
return proportion
@ -358,7 +371,7 @@ class preprocessing:
A class used to preprocess data for model training.
Attributes
"""
def __init__(self, func_dict_in=None, func_dict_out=None, random_state=42):
"""Initialization of the preprocessing object.
@ -524,7 +537,7 @@ class preprocessing:
scaling: learn individual scaler for X and y when "individual" is selected or one global scaler on all data in X and y if "global" is selected (scaler_X and scaler_y are equal)
type (str, optional): Using MinMax Scaling or Standarization. Defaults to "Standard".
"""
if type == "minmax":
self.scaler_X = MinMaxScaler()
self.scaler_y = MinMaxScaler()
@ -552,7 +565,7 @@ class preprocessing:
self.state["scale"] = True
def scale_transform(self, X_train, X_test, y_train, y_test):
""" Apply learned scaler on datasets.
"""Apply learned scaler on datasets.
Args:
X_train: design training data
@ -563,7 +576,7 @@ class preprocessing:
Returns:
transformed dataframes
"""
X_train = pd.concat(
[
self.scaler_X.transform(
@ -608,7 +621,7 @@ class preprocessing:
Returns:
Backtransformed data frames
"""
result = []
for i in args:
if "Class" in i.columns: