mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-13 10:38:22 +01:00
Compare commits
5 Commits
04f5c40b29
...
69355a1e4e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
69355a1e4e | ||
|
|
d678c0bfde | ||
|
|
4f954cbc84 | ||
|
|
5175e3d6fa | ||
|
|
e1227775e9 |
BIN
results/history_minmax_feature_engineering_false_adam_huber_mass_balance
(Stored with Git LFS)
Normal file
BIN
results/history_minmax_feature_engineering_false_adam_huber_mass_balance
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
results/history_standard_feature_engineering_false_adam_huber_mass_balance
(Stored with Git LFS)
Normal file
BIN
results/history_standard_feature_engineering_false_adam_huber_mass_balance
(Stored with Git LFS)
Normal file
Binary file not shown.
1
results/models/.gitattributes
vendored
Normal file
1
results/models/.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
BIN
results/models/model_large_minmax.keras
(Stored with Git LFS)
BIN
results/models/model_large_minmax.keras
(Stored with Git LFS)
Binary file not shown.
BIN
results/models/model_large_standardization.keras
(Stored with Git LFS)
BIN
results/models/model_large_standardization.keras
(Stored with Git LFS)
Binary file not shown.
BIN
results/models/model_minmax_feature_engineering_false_adam_huber_mass_balance.weights.h5
(Stored with Git LFS)
Normal file
BIN
results/models/model_minmax_feature_engineering_false_adam_huber_mass_balance.weights.h5
(Stored with Git LFS)
Normal file
Binary file not shown.
BIN
results/models/model_standard_feature_engineering_false_adam_huber_mass_balance.weights.h5
(Stored with Git LFS)
Normal file
BIN
results/models/model_standard_feature_engineering_false_adam_huber_mass_balance.weights.h5
(Stored with Git LFS)
Normal file
Binary file not shown.
File diff suppressed because one or more lines are too long
@ -1,9 +1,175 @@
|
||||
from preprocessing import *
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.colors as mcolors
|
||||
import matplotlib.patches as mpatches
|
||||
import pickle
|
||||
import csv
|
||||
|
||||
|
||||
scaler_experiments = ["none", "minmax", "standard"]
|
||||
###### Experimental parameters
|
||||
scaler_type = "minmax"
|
||||
feature_engineering = False
|
||||
optimizer_type = "adam"
|
||||
loss_variant = "huber_mass_balance"
|
||||
|
||||
|
||||
###### load dataset
|
||||
data_file = h5py.File("../datasets/Barite_4c_mdl.h5")
|
||||
|
||||
design = data_file["design"]
|
||||
results = data_file["result"]
|
||||
|
||||
df_design = pd.DataFrame(
|
||||
np.array(design["data"]).transpose(), columns=np.array(design["names"].asstr())
|
||||
)
|
||||
df_results = pd.DataFrame(
|
||||
np.array(results["data"]).transpose(), columns=np.array(results["names"].asstr())
|
||||
)
|
||||
|
||||
data_file.close()
|
||||
|
||||
# remove charge as species
|
||||
df_design.drop("Charge", axis=1, inplace=True, errors="ignore")
|
||||
df_results.drop("Charge", axis=1, inplace=True, errors="ignore")
|
||||
|
||||
|
||||
###### preprocessing
|
||||
|
||||
if feature_engineering == True:
|
||||
df_design["Ba\Sr"] = df_design["Ba"] / df_design["Sr"]
|
||||
df_design["BaxS"] = df_design["Ba"] * df_design["S"]
|
||||
|
||||
preprocess = preprocessing()
|
||||
X, y = preprocess.cluster_manual(df_design[df_design.columns], df_design[df_results.columns], "Cl")
|
||||
|
||||
X_train, X_test, y_train, y_test = preprocess.split(X, y, ratio=0.2)
|
||||
X_train, y_train = preprocess.balancer(X_train, y_train, strategy="off")
|
||||
|
||||
# train only on reactive cells
|
||||
X_train, y_train = preprocess.class_selection(X_train, y_train, class_label=1.0)
|
||||
|
||||
preprocess.scale_fit(X_train, y_train, type=scaler_type)
|
||||
X_train, X_test, y_train, y_test = preprocess.scale_transform(
|
||||
X_train, X_test, y_train, y_test
|
||||
)
|
||||
X_train, X_val, y_train, y_val = preprocess.split(X_train, y_train, ratio=0.1)
|
||||
|
||||
|
||||
###### create and compile model
|
||||
|
||||
|
||||
def model_training(model, batch_size=512, epochs=100):
|
||||
start = time.time()
|
||||
callback = keras.callbacks.EarlyStopping(monitor="loss", patience=30)
|
||||
history = model.fit(
|
||||
X_train.loc[:, X_train.columns != "Class"],
|
||||
y_train.loc[:, y_train.columns != "Class"],
|
||||
batch_size=batch_size,
|
||||
epochs=epochs,
|
||||
validation_data=(
|
||||
X_val.loc[:, X_val.columns != "Class"],
|
||||
y_val.loc[:, y_val.columns != "Class"],
|
||||
),
|
||||
callbacks=[callback],
|
||||
)
|
||||
|
||||
end = time.time()
|
||||
|
||||
print("Training took {} seconds".format(end - start))
|
||||
|
||||
return history
|
||||
|
||||
|
||||
|
||||
# mapping of column names to column index
|
||||
column_dict = {}
|
||||
for i in df_results.columns:
|
||||
column_dict[i] = y.columns.get_loc(i)
|
||||
|
||||
|
||||
# select model architecture
|
||||
model = model_definition("large", len(df_design.columns), len(df_results.columns))
|
||||
|
||||
# define learning rate adaptation
|
||||
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
|
||||
initial_learning_rate=0.01, decay_steps=2000, decay_rate=0.9, staircase=True
|
||||
)
|
||||
|
||||
# hyperparameters that are determined by hyperparameter optimization
|
||||
h1 = 0.16726490480995826
|
||||
h2 = 0.5283208497548787
|
||||
h3 = 0.5099528144902471
|
||||
h4 = h3
|
||||
|
||||
delta = 1.7642791340966357
|
||||
|
||||
match optimizer_type:
|
||||
case "adam":
|
||||
optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)
|
||||
case "sgd":
|
||||
optimizer = keras.optimizers.SGD(learning_rate=lr_schedule)
|
||||
case "rmsprop":
|
||||
optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule)
|
||||
|
||||
model.compile(
|
||||
optimizer=optimizer,
|
||||
loss=custom_loss(preprocess, column_dict, h1, h2, h3, h4, scaler_type, loss_variant, 1),
|
||||
metrics=[
|
||||
huber_metric(delta),
|
||||
mass_balance_metric(preprocess, column_dict, scaler_type, loss_variant),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
|
||||
###### train model
|
||||
|
||||
epochs = 3
|
||||
|
||||
history = model_training(model, epochs=epochs)
|
||||
|
||||
|
||||
###### save model and history
|
||||
|
||||
delimiter = "_"
|
||||
idx_string = scaler_type + delimiter + "feature_engineering_" + feature_engineering + delimiter + optimizer_type + delimiter + loss_variant
|
||||
file_name = "history_" + idx_string
|
||||
with open('../results/'+file_name, 'wb') as file_pi:
|
||||
pickle.dump(history.history, file_pi)
|
||||
|
||||
model.save_weights("../results/models/model_"+idx_string + ".weights.h5")
|
||||
|
||||
|
||||
###### evaluate model
|
||||
|
||||
results = mass_balance_evaluation(model, X_test, preprocess)
|
||||
proportion = mass_balance_ratio(results, threshold=1e-5)
|
||||
|
||||
X_test.reset_index(inplace=True, drop=True)
|
||||
y_test.reset_index(inplace=True, drop=True)
|
||||
all_classes = model.evaluate(X_test.loc[:, X_test.columns != "Class"], y_test.loc[:, y_test.columns != "Class"])
|
||||
class_0 = model.evaluate(X_test[X_test["Class"] == 0].iloc[:, X_test.columns != "Class"], y_test[X_test["Class"] == 0].iloc[:, y_test.columns != "Class"])
|
||||
class_1 = model.evaluate(X_test[X_test["Class"] == 1].iloc[:, :-1], y_test[X_test["Class"] == 1].iloc[:, :-1])
|
||||
|
||||
print("metric all data: ", all_classes)
|
||||
print("metric class 0: ", class_0)
|
||||
print("metric class 1: ", class_1)
|
||||
|
||||
|
||||
# Save evaluation results to a file
|
||||
results_file_name = "../results/evaluation_" + idx_string + ".csv"
|
||||
with open(results_file_name, mode="w", newline="") as results_file:
|
||||
writer = csv.writer(results_file)
|
||||
writer.writerow(["Metric", "Value"])
|
||||
writer.writerow(["Mass balance fulfilled (all classes)", proportion["overall"]])
|
||||
writer.writerow(["Mass balance fulfilled (class 0)", proportion["class_0"]])
|
||||
writer.writerow(["Mass balance fulfilled (class 1)", proportion["class_1"]])
|
||||
writer.writerow(["Metrics (all classes)", all_classes])
|
||||
writer.writerow(["Metrics (class 0)", class_0])
|
||||
writer.writerow(["Metrics (class 1)", class_1])
|
||||
|
||||
results_file.close()
|
||||
|
||||
|
||||
|
||||
for i in scaler_experiments:
|
||||
|
||||
@ -126,6 +126,7 @@ def custom_loss(
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
h4,
|
||||
scaler_type="minmax",
|
||||
loss_variant="huber",
|
||||
delta=1.0,
|
||||
@ -175,6 +176,8 @@ def custom_loss(
|
||||
|
||||
def loss(results, predicted):
|
||||
# inverse min/max scaling
|
||||
preprocess.scaler_input(results)
|
||||
|
||||
if scaler_type == "minmax":
|
||||
predicted_inverse = predicted * data_range + min_values
|
||||
results_inverse = results * data_range + min_values
|
||||
@ -240,7 +243,7 @@ def custom_loss(
|
||||
elif loss_variant == "huber_mass_balance":
|
||||
total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr
|
||||
elif "huber_mass_balance_extended":
|
||||
total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr + h3 * dS
|
||||
total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr + h4 * dS
|
||||
else:
|
||||
raise Exception(
|
||||
"No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'."
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user