From 331306c141a43f045b2fb8242d5e834460800730 Mon Sep 17 00:00:00 2001 From: Hannes Signer Date: Wed, 26 Feb 2025 18:32:11 +0100 Subject: [PATCH] update mass_balance_evaluation --- src/preprocessing.py | 80 ++++++++++++++++++++++++++++---------------- 1 file changed, 52 insertions(+), 28 deletions(-) diff --git a/src/preprocessing.py b/src/preprocessing.py index 61ba0bc..2f44d5c 100644 --- a/src/preprocessing.py +++ b/src/preprocessing.py @@ -116,30 +116,34 @@ def custom_loss( # as far as I know tensorflow does not directly support the use of scaler objects # therefore, the backtransformation is done manually - if scaler_type == "minmax": - scale_X = tf.convert_to_tensor( - preprocess.scaler_X.data_range_, dtype=tf.float32 - ) - min_X = tf.convert_to_tensor( - preprocess.scaler_X.data_min_, dtype=tf.float32) - scale_y = tf.convert_to_tensor( - preprocess.scaler_y.data_range_, dtype=tf.float32 - ) - min_y = tf.convert_to_tensor( - preprocess.scaler_y.data_min_, dtype=tf.float32) + try: + if scaler_type == "minmax": + scale_X = tf.convert_to_tensor( + preprocess.scaler_X.data_range_, dtype=tf.float32 + ) + min_X = tf.convert_to_tensor( + preprocess.scaler_X.data_min_, dtype=tf.float32) + scale_y = tf.convert_to_tensor( + preprocess.scaler_y.data_range_, dtype=tf.float32 + ) + min_y = tf.convert_to_tensor( + preprocess.scaler_y.data_min_, dtype=tf.float32) - elif scaler_type == "standard": - scale_X = tf.convert_to_tensor( - preprocess.scaler_X.scale_, dtype=tf.float32) - mean_X = tf.convert_to_tensor( - preprocess.scaler_X.mean_, dtype=tf.float32) - scale_y = tf.convert_to_tensor( - preprocess.scaler_y.scale_, dtype=tf.float32) - mean_y = tf.convert_to_tensor( - preprocess.scaler_y.mean_, dtype=tf.float32) - - else: - raise Exception("No valid scaler type found. Choose between 'standard' and 'minmax'.") + elif scaler_type == "standard": + scale_X = tf.convert_to_tensor( + preprocess.scaler_X.scale_, dtype=tf.float32) + mean_X = tf.convert_to_tensor( + preprocess.scaler_X.mean_, dtype=tf.float32) + scale_y = tf.convert_to_tensor( + preprocess.scaler_y.scale_, dtype=tf.float32) + mean_y = tf.convert_to_tensor( + preprocess.scaler_y.mean_, dtype=tf.float32) + + else: + raise Exception("No valid scaler type found. Choose between 'standard' and 'minmax'.") + + except AttributeError: + raise Exception("Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training.") def loss(results, predicted): # inverse min/max scaling @@ -302,8 +306,9 @@ def mass_balance_evaluation(model, X, preprocess): # predict the chemistry columns = X.iloc[:, X.columns != "Class"].columns + classes = X["Class"] + classes.reset_index(drop=True, inplace=True) prediction = pd.DataFrame(model.predict(X[columns]), columns=columns) - # backtransform min/max or standard scaler X = pd.DataFrame( preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]), @@ -313,20 +318,39 @@ def mass_balance_evaluation(model, X, preprocess): preprocess.scaler_y.inverse_transform(prediction), columns=columns ) - # apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was applied + # apply backtransformation if log transformation was applied if preprocess.func_dict_out is not None: - X = preprocess.funcInverse(X) + X = preprocess.funcInverse(X)[0] + prediction = preprocess.funcInverse(prediction)[0] # calculate mass balance dBa = np.abs( (prediction["Ba"] + prediction["Barite"]) - (X["Ba"] + X["Barite"])) - print(dBa.min()) dSr = np.abs( (prediction["Sr"] + prediction["Celestite"]) - (X["Sr"] + X["Celestite"]) ) + + mass_balance_result = pd.DataFrame( + {"dBa":dBa, "dSr":dSr, "mass_balance":dBa+dSr, "Class": classes} + ) - return dBa + dSr + return mass_balance_result + +def mass_balance_ratio(results, threshold=1e-5): + proportion = {} + + mass_balance_threshold = results[results["mass_balance"] <= threshold] + + overall = len(mass_balance_threshold) + class_0_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 0]) + class_1_amount = len(mass_balance_threshold[mass_balance_threshold["Class"] == 1]) + + proportion["overall"] = overall / len(results) + proportion["class_0"] = class_0_amount / len(results[results["Class"] == 0]) + proportion["class_1"] = class_1_amount / len(results[results["Class"] == 1]) + + return proportion class preprocessing: