model-training/optuna_runs.py
2025-02-17 18:13:57 +01:00

225 lines
7.8 KiB
Python

import keras
from keras.layers import Dense, Dropout, Input,BatchNormalization
import tensorflow as tf
import h5py
import numpy as np
import pandas as pd
import time
import sklearn.model_selection as sk
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from collections import Counter
import os
from preprocessing import *
from sklearn import set_config
from importlib import reload
set_config(transform_output = "pandas")
dtype = "float32"
activation = "relu"
lr = 0.001
batch_size = 512
epochs = 50 # default 400 epochs
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
initial_learning_rate=lr,
decay_steps=2000,
decay_rate=0.9,
staircase=True
)
optimizer_simple = keras.optimizers.Adam(learning_rate=lr_schedule)
optimizer_large = keras.optimizers.Adam(learning_rate=lr_schedule)
optimizer_paper = keras.optimizers.Adam(learning_rate=lr_schedule)
sample_fraction = 0.8
# small model
model_simple = keras.Sequential(
[
keras.Input(shape = (9,), dtype = "float32"),
keras.layers.Dense(units = 128, activation = "linear", dtype = "float32"),
# Dropout(0.2),
keras.layers.Dense(units = 128, activation = "elu", dtype = "float32"),
keras.layers.Dense(units = 9, dtype = "float32")
]
)
def Safelog(val):
# get range of vector
if val > 0:
return np.log10(val)
elif val < 0:
return -np.log10(-val)
else:
return 0
def Safeexp(val):
if val > 0:
return -10 ** -val
elif val < 0:
return 10 ** val
else:
return 0
# ? Why does the charge is using another logarithm than the other species
func_dict_in = {
"H" : np.log1p,
"O" : np.log1p,
"Charge" : Safelog,
"H_0_" : np.log1p,
"O_0_" : np.log1p,
"Ba" : np.log1p,
"Cl" : np.log1p,
"S_2_" : np.log1p,
"S_6_" : np.log1p,
"Sr" : np.log1p,
"Barite" : np.log1p,
"Celestite" : np.log1p,
}
func_dict_out = {
"H" : np.expm1,
"O" : np.expm1,
"Charge" : Safeexp,
"H_0_" : np.expm1,
"O_0_" : np.expm1,
"Ba" : np.expm1,
"Cl" : np.expm1,
"S_2_" : np.expm1,
"S_6_" : np.expm1,
"Sr" : np.expm1,
"Barite" : np.expm1,
"Celestite" : np.expm1,
}
# os.chdir('/mnt/beegfs/home/signer/projects/model-training')
data_file = h5py.File("barite_50_4_corner.h5")
design = data_file["design"]
results = data_file["result"]
df_design = pd.DataFrame(np.array(design["data"]).transpose(), columns = np.array(design["names"].asstr()))
df_results = pd.DataFrame(np.array(results["data"]).transpose(), columns = np.array(results["names"].asstr()))
data_file.close()
species_columns = ['H', 'O', 'Charge', 'Ba', 'Cl', 'S', 'Sr', 'Barite', 'Celestite']
preprocess = preprocessing(func_dict_in=func_dict_in, func_dict_out=func_dict_out)
X, y = preprocess.cluster(df_design[species_columns], df_results[species_columns])
# X, y = preprocess.funcTranform(X, y)
X_train, X_test, y_train, y_test = preprocess.split(X, y, ratio = 0.2)
X_train, y_train = preprocess.balancer(X_train, y_train, strategy = "over")
preprocess.scale_fit(X_train, y_train, scaling = "individual")
X_train, X_test, y_train, y_test = preprocess.scale_transform(X_train, X_test, y_train, y_test)
X_train, X_val, y_train, y_val = preprocess.split(X_train, y_train, ratio = 0.1)
column_dict = {"Ba": X.columns.get_loc("Ba"), "Barite":X.columns.get_loc("Barite"), "Sr":X.columns.get_loc("Sr"), "Celestite":X.columns.get_loc("Celestite"), "H":X.columns.get_loc("H"), "H":X.columns.get_loc("H"), "O":X.columns.get_loc("O")}
def custom_loss(preprocess, column_dict, h1, h2, h3, h4):
# extract the scaling parameters
scale_X = tf.convert_to_tensor(preprocess.scaler_X.scale_, dtype=tf.float32)
min_X = tf.convert_to_tensor(preprocess.scaler_X.min_, dtype=tf.float32)
scale_y = tf.convert_to_tensor(preprocess.scaler_y.scale_, dtype=tf.float32)
min_y = tf.convert_to_tensor(preprocess.scaler_y.min_, dtype=tf.float32)
def loss(results, predicted):
# inverse min/max scaling
predicted_inverse = predicted * scale_X + min_X
results_inverse = results * scale_y + min_y
# mass balance
dBa = tf.keras.backend.abs(
(predicted_inverse[:, column_dict["Ba"]] + predicted_inverse[:, column_dict["Barite"]]) -
(results_inverse[:, column_dict["Ba"]] + results_inverse[:, column_dict["Barite"]])
)
dSr = tf.keras.backend.abs(
(predicted_inverse[:, column_dict["Sr"]] + predicted_inverse[:, column_dict["Celestite"]]) -
(results_inverse[:, column_dict["Sr"]] + results_inverse[:, column_dict["Celestite"]])
)
# H/O ratio has to be 2
h2o_ratio = tf.keras.backend.abs(
(predicted_inverse[:, column_dict["H"]] / predicted_inverse[:, column_dict["O"]]) - 2
)
# huber loss
huber_loss = tf.keras.losses.Huber()(results, predicted)
# total loss
total_loss = h1 * huber_loss + h2 * dBa**2 + h3 * dSr**2 #+ h4 * h2o_ratio**2
return total_loss
return loss
def mass_balance(model, X, preprocess):
# predict the chemistry
columns = X.iloc[:, X.columns != "Class"].columns
prediction = pd.DataFrame(model.predict(X[columns]), columns=columns)
# backtransform min/max
X = pd.DataFrame(preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]), columns=columns)
prediction = pd.DataFrame(preprocess.scaler_y.inverse_transform(prediction), columns=columns)
# calculate mass balance dBa = np.abs((prediction["Ba"] + prediction["Barite"]) - (X["Ba"] + X["Barite"]))
dSr = np.abs((prediction["Sr"] + prediction["Celestite"]) - (X["Sr"] + X["Celestite"]))
return dBa + dSr
import optuna
def create_model(model, preprocess, h1, h2, h3, h4):
model.compile(optimizer=optimizer_simple, loss=custom_loss(preprocess, column_dict, h1, h2, h3, h4))
return model
def objective(trial, preprocess, X_train, y_train, X_val, y_val, X_test, y_test):
h1 = trial.suggest_float("h1", 0.1, 10)
h2 = trial.suggest_float("h2", 0.1, 10)
h3 = trial.suggest_float("h3", 0.1, 10)
h4 = trial.suggest_float("h4", 0.1, 10)
model = create_model(model_simple, preprocess, h1, h2, h3, h4)
callback = keras.callbacks.EarlyStopping(monitor='loss', patience=3)
history = model.fit(X_train.loc[:, X_train.columns != "Class"],
y_train.loc[:, y_train.columns != "Class"],
batch_size=batch_size,
epochs=50,
validation_data=(X_val.loc[:, X_val.columns != "Class"], y_val.loc[:, y_val.columns != "Class"]),
callbacks=[callback])
prediction_loss = model.evaluate(X_test.loc[:, X_test.columns != "Class"], y_test.loc[:, y_test.columns != "Class"])
mass_balance_results = mass_balance(model, X_test, preprocess)
mass_balance_ratio = len(mass_balance_results[mass_balance_results < 1e-5]) / len(mass_balance_results)
return prediction_loss, mass_balance_ratio
if __name__ == "__main__":
study = optuna.create_study(storage="sqlite:///model_optimization.db", study_name="model_optimization", directions=["minimize", "maximize"])
study.optimize(lambda trial: objective(trial, preprocess, X_train, y_train, X_val, y_val, X_test, y_test), n_trials=1000)
print("Number of finished trials: ", len(study.trials))
print("Best trial:")
trial = study.best_trial
print(" Value: ", trial.value)
print(" Params: ")
for key, value in trial.params.items():
print(" {}: {}".format(key, value))