mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-15 19:38:21 +01:00
update linting
This commit is contained in:
parent
5f4c863b39
commit
471038de50
@ -95,23 +95,25 @@ def custom_loss(
|
|||||||
loss_variant="huber",
|
loss_variant="huber",
|
||||||
delta=1.0,
|
delta=1.0,
|
||||||
):
|
):
|
||||||
"""Custom tensorflow loss function to combine Huber Loss with mass balance.
|
"""
|
||||||
This is inspired by PINN (Physics Informed Neural Networks) where the loss function is a combination of the physics-based loss and the data-driven loss.
|
Custom tensorflow loss function to combine Huber Loss with mass balance.
|
||||||
The mass balance is a physics-based loss that ensures the conservation of mass in the system.
|
This is inspired by PINN (Physics Informed Neural Networks) where the loss function is a combination of the physics-based loss and the data-driven loss.
|
||||||
A tensorflow loss function accepts only the two arguments y_true and y_pred. Therefore, a nested function is used to pass the additional arguments.
|
The mass balance is a physics-based loss that ensures the conservation of mass in the system.
|
||||||
|
A tensorflow loss function accepts only the two arguments y_true and y_pred. Therefore, a nested function is used to pass the additional arguments.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
preprocess: preprocessing object
|
preprocess: preprocessing object
|
||||||
column_dict: dictionary with the column names as keys and the corresponding index as values. (i.e {'H': 0, 'O': 1, 'Ba': 2, 'Cl': 3, 'S': 4, 'Sr': 5, 'Barite': 6, 'Celestite': 7})
|
column_dict: dictionary with the column names as keys and the corresponding index as values.
|
||||||
h1: hyperparameter for the importance of the huber loss
|
(i.e {'H': 0, 'O': 1, 'Ba': 2, 'Cl': 3, 'S': 4, 'Sr': 5, 'Barite': 6, 'Celestite': 7})
|
||||||
h2: hyperparameter for the importance of the Barium mass balance term
|
h1: hyperparameter for the importance of the huber loss
|
||||||
h3: hyperparameter for the importance of the Strontium mass balance term
|
h2: hyperparameter for the importance of the Barium mass balance term
|
||||||
scaler_type: Normalization approach. Choose between "standard" and "minmax". Defaults to "minmax".
|
h3: hyperparameter for the importance of the Strontium mass balance term
|
||||||
loss_variant: Loss function approach. Choose between "huber and "huber_mass_balance". Defaults to "huber".
|
scaler_type: Normalization approach. Choose between "standard" and "minmax". Defaults to "minmax".
|
||||||
delta: Hyperparameter for the Huber function threshold. Defaults to 1.0.
|
loss_variant: Loss function approach. Choose between "huber and "huber_mass_balance". Defaults to "huber".
|
||||||
|
delta: Hyperparameter for the Huber function threshold. Defaults to 1.0.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
loss function
|
loss function
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# as far as I know tensorflow does not directly support the use of scaler objects
|
# as far as I know tensorflow does not directly support the use of scaler objects
|
||||||
@ -122,12 +124,14 @@ def custom_loss(
|
|||||||
preprocess.scaler_X.data_range_, dtype=tf.float32
|
preprocess.scaler_X.data_range_, dtype=tf.float32
|
||||||
)
|
)
|
||||||
min_X = tf.convert_to_tensor(
|
min_X = tf.convert_to_tensor(
|
||||||
preprocess.scaler_X.data_min_, dtype=tf.float32)
|
preprocess.scaler_X.data_min_, dtype=tf.float32
|
||||||
|
)
|
||||||
scale_y = tf.convert_to_tensor(
|
scale_y = tf.convert_to_tensor(
|
||||||
preprocess.scaler_y.data_range_, dtype=tf.float32
|
preprocess.scaler_y.data_range_, dtype=tf.float32
|
||||||
)
|
)
|
||||||
min_y = tf.convert_to_tensor(
|
min_y = tf.convert_to_tensor(
|
||||||
preprocess.scaler_y.data_min_, dtype=tf.float32)
|
preprocess.scaler_y.data_min_, dtype=tf.float32
|
||||||
|
)
|
||||||
|
|
||||||
elif scaler_type == "standard":
|
elif scaler_type == "standard":
|
||||||
scale_X = tf.convert_to_tensor(
|
scale_X = tf.convert_to_tensor(
|
||||||
@ -140,10 +144,14 @@ def custom_loss(
|
|||||||
preprocess.scaler_y.mean_, dtype=tf.float32)
|
preprocess.scaler_y.mean_, dtype=tf.float32)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise Exception("No valid scaler type found. Choose between 'standard' and 'minmax'.")
|
raise Exception(
|
||||||
|
"No valid scaler type found. Choose between 'standard' and 'minmax'."
|
||||||
|
)
|
||||||
|
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
raise Exception("Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training.")
|
raise Exception(
|
||||||
|
"Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training."
|
||||||
|
)
|
||||||
|
|
||||||
def loss(results, predicted):
|
def loss(results, predicted):
|
||||||
# inverse min/max scaling
|
# inverse min/max scaling
|
||||||
@ -194,7 +202,8 @@ def custom_loss(
|
|||||||
total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr
|
total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr
|
||||||
else:
|
else:
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'.")
|
"No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'."
|
||||||
|
)
|
||||||
|
|
||||||
return total_loss
|
return total_loss
|
||||||
|
|
||||||
@ -212,8 +221,7 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
|
|||||||
Returns:
|
Returns:
|
||||||
mean of both mass balance terms
|
mean of both mass balance terms
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
if scaler_type == "minmax":
|
if scaler_type == "minmax":
|
||||||
scale_X = tf.convert_to_tensor(
|
scale_X = tf.convert_to_tensor(
|
||||||
preprocess.scaler_X.data_range_, dtype=tf.float32
|
preprocess.scaler_X.data_range_, dtype=tf.float32
|
||||||
@ -284,7 +292,7 @@ def huber_metric(delta=1.0):
|
|||||||
scaler_type (str, optional): _description_. Defaults to "minmax".
|
scaler_type (str, optional): _description_. Defaults to "minmax".
|
||||||
delta (float, optional): _description_. Defaults to 1.0.
|
delta (float, optional): _description_. Defaults to 1.0.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def huber(results, predicted):
|
def huber(results, predicted):
|
||||||
huber_loss = tf.keras.losses.Huber(delta)(results, predicted)
|
huber_loss = tf.keras.losses.Huber(delta)(results, predicted)
|
||||||
return huber_loss
|
return huber_loss
|
||||||
@ -303,7 +311,7 @@ def mass_balance_evaluation(model, X, preprocess):
|
|||||||
Returns:
|
Returns:
|
||||||
vector with the mass balance difference for each cell
|
vector with the mass balance difference for each cell
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# predict the chemistry
|
# predict the chemistry
|
||||||
columns = X.iloc[:, X.columns != "Class"].columns
|
columns = X.iloc[:, X.columns != "Class"].columns
|
||||||
classes = X["Class"]
|
classes = X["Class"]
|
||||||
@ -330,26 +338,31 @@ def mass_balance_evaluation(model, X, preprocess):
|
|||||||
(prediction["Sr"] + prediction["Celestite"]) -
|
(prediction["Sr"] + prediction["Celestite"]) -
|
||||||
(X["Sr"] + X["Celestite"])
|
(X["Sr"] + X["Celestite"])
|
||||||
)
|
)
|
||||||
|
|
||||||
mass_balance_result = pd.DataFrame(
|
mass_balance_result = pd.DataFrame(
|
||||||
{"dBa":dBa, "dSr":dSr, "mass_balance":dBa+dSr, "Class": classes}
|
{"dBa": dBa, "dSr": dSr, "mass_balance": dBa + dSr, "Class": classes}
|
||||||
)
|
)
|
||||||
|
|
||||||
return mass_balance_result
|
return mass_balance_result
|
||||||
|
|
||||||
|
|
||||||
def mass_balance_ratio(results, threshold=1e-5):
|
def mass_balance_ratio(results, threshold=1e-5):
|
||||||
proportion = {}
|
proportion = {}
|
||||||
|
|
||||||
mass_balance_threshold = results[results["mass_balance"] <= threshold]
|
mass_balance_threshold = results[results["mass_balance"] <= threshold]
|
||||||
|
|
||||||
overall = len(mass_balance_threshold)
|
overall = len(mass_balance_threshold)
|
||||||
class_0_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 0])
|
class_0_amount = len(
|
||||||
class_1_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 1])
|
mass_balance_threshold[mass_balance_threshold["Class"] == 0])
|
||||||
|
class_1_amount = len(
|
||||||
|
mass_balance_threshold[mass_balance_threshold["Class"] == 1])
|
||||||
|
|
||||||
proportion["overall"] = overall / len(results)
|
proportion["overall"] = overall / len(results)
|
||||||
proportion["class_0"] = class_0_amount / len(results[results["Class"] == 0])
|
proportion["class_0"] = class_0_amount / \
|
||||||
proportion["class_1"] = class_1_amount / len(results[results["Class"] == 1])
|
len(results[results["Class"] == 0])
|
||||||
|
proportion["class_1"] = class_1_amount / \
|
||||||
|
len(results[results["Class"] == 1])
|
||||||
|
|
||||||
return proportion
|
return proportion
|
||||||
|
|
||||||
|
|
||||||
@ -358,7 +371,7 @@ class preprocessing:
|
|||||||
A class used to preprocess data for model training.
|
A class used to preprocess data for model training.
|
||||||
Attributes
|
Attributes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, func_dict_in=None, func_dict_out=None, random_state=42):
|
def __init__(self, func_dict_in=None, func_dict_out=None, random_state=42):
|
||||||
"""Initialization of the preprocessing object.
|
"""Initialization of the preprocessing object.
|
||||||
|
|
||||||
@ -524,7 +537,7 @@ class preprocessing:
|
|||||||
scaling: learn individual scaler for X and y when "individual" is selected or one global scaler on all data in X and y if "global" is selected (scaler_X and scaler_y are equal)
|
scaling: learn individual scaler for X and y when "individual" is selected or one global scaler on all data in X and y if "global" is selected (scaler_X and scaler_y are equal)
|
||||||
type (str, optional): Using MinMax Scaling or Standarization. Defaults to "Standard".
|
type (str, optional): Using MinMax Scaling or Standarization. Defaults to "Standard".
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if type == "minmax":
|
if type == "minmax":
|
||||||
self.scaler_X = MinMaxScaler()
|
self.scaler_X = MinMaxScaler()
|
||||||
self.scaler_y = MinMaxScaler()
|
self.scaler_y = MinMaxScaler()
|
||||||
@ -552,7 +565,7 @@ class preprocessing:
|
|||||||
self.state["scale"] = True
|
self.state["scale"] = True
|
||||||
|
|
||||||
def scale_transform(self, X_train, X_test, y_train, y_test):
|
def scale_transform(self, X_train, X_test, y_train, y_test):
|
||||||
""" Apply learned scaler on datasets.
|
"""Apply learned scaler on datasets.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
X_train: design training data
|
X_train: design training data
|
||||||
@ -563,7 +576,7 @@ class preprocessing:
|
|||||||
Returns:
|
Returns:
|
||||||
transformed dataframes
|
transformed dataframes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
X_train = pd.concat(
|
X_train = pd.concat(
|
||||||
[
|
[
|
||||||
self.scaler_X.transform(
|
self.scaler_X.transform(
|
||||||
@ -608,7 +621,7 @@ class preprocessing:
|
|||||||
Returns:
|
Returns:
|
||||||
Backtransformed data frames
|
Backtransformed data frames
|
||||||
"""
|
"""
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
for i in args:
|
for i in args:
|
||||||
if "Class" in i.columns:
|
if "Class" in i.columns:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user