mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-16 08:08:22 +01:00
correction of scaling error in mass balance loss
This commit is contained in:
parent
8051eb3c3d
commit
0f7ee78a8a
@ -93,10 +93,10 @@ def custom_loss(preprocess, column_dict, h1, h2, h3, scaler_type="minmax", loss_
|
|||||||
# extract the scaling parameters
|
# extract the scaling parameters
|
||||||
|
|
||||||
if scaler_type == "minmax":
|
if scaler_type == "minmax":
|
||||||
scale_X = tf.convert_to_tensor(preprocess.scaler_X.scale_, dtype=tf.float32)
|
scale_X = tf.convert_to_tensor(preprocess.scaler_X.data_range_, dtype=tf.float32)
|
||||||
min_X = tf.convert_to_tensor(preprocess.scaler_X.min_, dtype=tf.float32)
|
min_X = tf.convert_to_tensor(preprocess.scaler_X.data_min_, dtype=tf.float32)
|
||||||
scale_y = tf.convert_to_tensor(preprocess.scaler_y.scale_, dtype=tf.float32)
|
scale_y = tf.convert_to_tensor(preprocess.scaler_y.data_range_, dtype=tf.float32)
|
||||||
min_y = tf.convert_to_tensor(preprocess.scaler_y.min_, dtype=tf.float32)
|
min_y = tf.convert_to_tensor(preprocess.scaler_y.data_min_, dtype=tf.float32)
|
||||||
|
|
||||||
elif scaler_type == "standard":
|
elif scaler_type == "standard":
|
||||||
scale_X = tf.convert_to_tensor(preprocess.scaler_X.scale_, dtype=tf.float32)
|
scale_X = tf.convert_to_tensor(preprocess.scaler_X.scale_, dtype=tf.float32)
|
||||||
@ -117,11 +117,11 @@ def custom_loss(preprocess, column_dict, h1, h2, h3, scaler_type="minmax", loss_
|
|||||||
results_inverse = results * scale_X + mean_X
|
results_inverse = results * scale_X + mean_X
|
||||||
|
|
||||||
|
|
||||||
# apply exp1m on the columns of predicted_inverse and results_inverse
|
# apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was applied
|
||||||
predicted_inverse = tf.math.expm1(predicted_inverse)
|
if preprocess.func_dict_out is not None:
|
||||||
results_inverse = tf.math.expm1(results_inverse)
|
predicted_inverse = tf.math.expm1(predicted_inverse)
|
||||||
print(predicted_inverse)
|
results_inverse = tf.math.expm1(results_inverse)
|
||||||
|
|
||||||
# mass balance
|
# mass balance
|
||||||
dBa = tf.keras.backend.abs(
|
dBa = tf.keras.backend.abs(
|
||||||
(predicted_inverse[:, column_dict["Ba"]] + predicted_inverse[:, column_dict["Barite"]]) -
|
(predicted_inverse[:, column_dict["Ba"]] + predicted_inverse[:, column_dict["Barite"]]) -
|
||||||
@ -148,10 +148,10 @@ def custom_loss(preprocess, column_dict, h1, h2, h3, scaler_type="minmax", loss_
|
|||||||
def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
|
def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
|
||||||
|
|
||||||
if scaler_type == "minmax":
|
if scaler_type == "minmax":
|
||||||
scale_X = tf.convert_to_tensor(preprocess.scaler_X.scale_, dtype=tf.float32)
|
scale_X = tf.convert_to_tensor(preprocess.scaler_X.data_range_, dtype=tf.float32)
|
||||||
min_X = tf.convert_to_tensor(preprocess.scaler_X.min_, dtype=tf.float32)
|
min_X = tf.convert_to_tensor(preprocess.scaler_X.data_min_, dtype=tf.float32)
|
||||||
scale_y = tf.convert_to_tensor(preprocess.scaler_y.scale_, dtype=tf.float32)
|
scale_y = tf.convert_to_tensor(preprocess.scaler_y.data_range_, dtype=tf.float32)
|
||||||
min_y = tf.convert_to_tensor(preprocess.scaler_y.min_, dtype=tf.float32)
|
min_y = tf.convert_to_tensor(preprocess.scaler_y.data_min_, dtype=tf.float32)
|
||||||
|
|
||||||
elif scaler_type == "standard":
|
elif scaler_type == "standard":
|
||||||
scale_X = tf.convert_to_tensor(preprocess.scaler_X.scale_, dtype=tf.float32)
|
scale_X = tf.convert_to_tensor(preprocess.scaler_X.scale_, dtype=tf.float32)
|
||||||
@ -169,6 +169,10 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
|
|||||||
elif scaler_type == "standard":
|
elif scaler_type == "standard":
|
||||||
predicted_inverse = predicted * scale_y + mean_y
|
predicted_inverse = predicted * scale_y + mean_y
|
||||||
results_inverse = results * scale_X + mean_X
|
results_inverse = results * scale_X + mean_X
|
||||||
|
|
||||||
|
if preprocess.func_dict_out is not None:
|
||||||
|
predicted_inverse = tf.math.expm1(predicted_inverse)
|
||||||
|
results_inverse = tf.math.expm1(results_inverse)
|
||||||
|
|
||||||
# mass balance
|
# mass balance
|
||||||
dBa = tf.keras.backend.abs(
|
dBa = tf.keras.backend.abs(
|
||||||
@ -201,6 +205,10 @@ def mass_balance_evaluation(model, X, preprocess):
|
|||||||
# backtransform min/max or standard scaler
|
# backtransform min/max or standard scaler
|
||||||
X = pd.DataFrame(preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]), columns=columns)
|
X = pd.DataFrame(preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]), columns=columns)
|
||||||
prediction = pd.DataFrame(preprocess.scaler_y.inverse_transform(prediction), columns=columns)
|
prediction = pd.DataFrame(preprocess.scaler_y.inverse_transform(prediction), columns=columns)
|
||||||
|
|
||||||
|
# apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was applied
|
||||||
|
if preprocess.func_dict_out is not None:
|
||||||
|
X = preprocess.funcInverse(X)
|
||||||
|
|
||||||
# calculate mass balance
|
# calculate mass balance
|
||||||
dBa = np.abs((prediction["Ba"] + prediction["Barite"]) - (X["Ba"] + X["Barite"]))
|
dBa = np.abs((prediction["Ba"] + prediction["Barite"]) - (X["Ba"] + X["Barite"]))
|
||||||
@ -216,28 +224,27 @@ class preprocessing:
|
|||||||
self.random_state = random_state
|
self.random_state = random_state
|
||||||
self.scaler_X = None
|
self.scaler_X = None
|
||||||
self.scaler_y = None
|
self.scaler_y = None
|
||||||
self.func_dict_in = None
|
|
||||||
self.func_dict_in = func_dict_in if func_dict_in is not None else None
|
self.func_dict_in = func_dict_in if func_dict_in is not None else None
|
||||||
self.func_dict_out = func_dict_out if func_dict_out is not None else None
|
self.func_dict_out = func_dict_out if func_dict_out is not None else None
|
||||||
self.state = {"cluster": False, "log": False, "balance": False, "scale": False}
|
self.state = {"cluster": False, "log": False, "balance": False, "scale": False}
|
||||||
|
|
||||||
def funcTranform(self, X, y):
|
def funcTranform(self, *args):
|
||||||
for key in X.keys():
|
|
||||||
if "Class" not in key:
|
for i in args:
|
||||||
X[key] = X[key].apply(self.func_dict_in)
|
for key in i.keys():
|
||||||
y[key] = y[key].apply(self.func_dict_in)
|
if "Class" not in key:
|
||||||
|
i[key] = i[key].apply(self.func_dict_in)
|
||||||
self.state["log"] = True
|
self.state["log"] = True
|
||||||
|
return args
|
||||||
|
|
||||||
return X, y
|
def funcInverse(self, *args):
|
||||||
|
|
||||||
def funcInverse(self, X, y):
|
|
||||||
|
|
||||||
for key in X.keys():
|
for i in args:
|
||||||
if "Class" not in key:
|
for key in i.keys():
|
||||||
X[key] = X[key].apply(self.func_dict_out)
|
if "Class" not in key:
|
||||||
y[key] = y[key].apply(self.func_dict_out)
|
i[key] = i[key].apply(self.func_dict_out)
|
||||||
self.state["log"] = False
|
self.state["log"] = False
|
||||||
return X, y
|
return args
|
||||||
|
|
||||||
def cluster(self, X, y, species='Barite', n_clusters=2, x_length=50, y_length=50):
|
def cluster(self, X, y, species='Barite', n_clusters=2, x_length=50, y_length=50):
|
||||||
|
|
||||||
@ -339,26 +346,29 @@ class preprocessing:
|
|||||||
|
|
||||||
return X_train, X_test, y_train, y_test
|
return X_train, X_test, y_train, y_test
|
||||||
|
|
||||||
def scale_inverse(self, X):
|
def scale_inverse(self, *args):
|
||||||
|
result = []
|
||||||
if("Class" in X.columns):
|
for i in args:
|
||||||
print("Class column found")
|
if "Class" in i.columns:
|
||||||
X = pd.concat([pd.DataFrame(self.scaler_X.inverse_transform(X.loc[:, X.columns != "Class"]), columns=X.columns[:-1]), X.loc[:, "Class"]], axis=1)
|
inversed = pd.DataFrame(self.scaler_X.inverse_transform(i.loc[:, i.columns != "Class"]), columns=i.columns[:-1])
|
||||||
else:
|
class_column = i.loc[:, "Class"].reset_index(drop=True)
|
||||||
X = self.scaler_X.inverse_transform(X)
|
i = pd.concat([inversed, class_column], axis=1)
|
||||||
|
else:
|
||||||
return X
|
i = pd.DataFrame(self.scaler_X.inverse_transform(i), columns=i.columns)
|
||||||
|
result.append(i)
|
||||||
|
return result
|
||||||
|
|
||||||
def split(self, X, y, ratio=0.8):
|
def split(self, X, y, ratio=0.8):
|
||||||
X_train, y_train, X_test, y_test = sk.train_test_split(X, y, test_size = ratio, random_state=self.random_state)
|
X_train, y_train, X_test, y_test = sk.train_test_split(X, y, test_size = ratio, random_state=self.random_state)
|
||||||
|
|
||||||
return X_train, y_train, X_test, y_test
|
return X_train, y_train, X_test, y_test
|
||||||
|
|
||||||
def class_selection(self, X, y, class_label):
|
def class_selection(self, *args, class_label=0):
|
||||||
X = X[X['Class'] == class_label]
|
|
||||||
y = y[y['Class'] == class_label]
|
|
||||||
|
|
||||||
return X, y
|
for i in args:
|
||||||
|
i = i[i['Class'] == class_label]
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user