custom loss function

This commit is contained in:
Hannes Signer 2025-02-14 16:33:32 +01:00
parent e9f49308f2
commit 3171dd3643
6 changed files with 637 additions and 1264 deletions

File diff suppressed because one or more lines are too long

Binary file not shown.

BIN
loss_1_to_end.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 85 KiB

BIN
loss_all.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

BIN
model_large.keras Normal file

Binary file not shown.

View File

@ -190,50 +190,143 @@ def preprocessing_training(df_design, df_targets, func_dict_in, func_dict_out, s
class preprocessing:
def __init__(self, df_design, df_targets, random_state=42):
self.X = df_design
self.y = df_targets
def __init__(self, func_dict_in, func_dict_out, random_state=42):
self.random_state = random_state
self.scaler_X = None
self.scaler_y = None
self.func_dict_in = func_dict_in
self.func_dict_out = func_dict_out
self.state = {"cluster": False, "log": False, "balance": False, "scale": False}
def funcTranform(self, func_dict_in):
for key in self.X.keys():
def funcTranform(self, X, y):
for key in X.keys():
if "Class" not in key:
self.X[key] = self.X[key].apply(func_dict_in[key])
self.y[key] = self.y[key].apply(func_dict_in[key])
X[key] = X[key].apply(self.func_dict_in[key])
y[key] = y[key].apply(self.func_dict_in[key])
self.state["log"] = True
def funcInverse(self, func_dict_out):
if(self.state["log"] == False):
raise Exception("Data has to be transformed first")
for key in self.X.keys():
if "Class" not in key:
self.X[key] = self.X[key].apply(func_dict_out[key])
self.y[key] = self.y[key].apply(func_dict_out[key])
def cluster(self, species='Barite', n_clusters=2, x_length=50, y_length=50):
return X, y
def funcInverse(self, X, y):
for key in X.keys():
if "Class" not in key:
X[key] = X[key].apply(self.func_dict_out[key])
y[key] = y[key].apply(self.func_dict_out[key])
self.state["log"] = False
return X, y
def cluster(self, X, y, species='Barite', n_clusters=2, x_length=50, y_length=50):
if(self.state["log"] == False):
raise Exception("Data has to be transformed first")
class_labels = np.array([])
grid_length = x_length * y_length
iterations = int(len(self.X) / grid_length)
iterations = int(len(X) / grid_length)
for i in range(0, iterations):
field = np.array(self.X['Barite'][(i*grid_length):(i*grid_length+grid_length)]
field = np.array(X[species][(i*grid_length):(i*grid_length+grid_length)]
).reshape(x_length, y_length)
kmeans = KMeans(n_clusters=n_clusters, random_state=self.random_state).fit(field.reshape(-1, 1))
class_labels = np.append(class_labels.astype(int), kmeans.labels_)
class_labels = np.append(class_labels.astype(int), kmeans.labels_)
if ("Class" in self.X.columns and "Class" in self.y.columns):
if ("Class" in X.columns and "Class" in y.columns):
print("Class column already exists")
else:
class_labels_df = pd.DataFrame(class_labels, columns=['Class'])
self.X = pd.concat([self.X, class_labels_df], axis=1)
self.y = pd.concat([self.y, class_labels_df], axis=1)
X = pd.concat([X, class_labels_df], axis=1)
y = pd.concat([y, class_labels_df], axis=1)
self.state["cluster"] = True
return X, y
def balancer(self, X, y, strategy, sample_fraction=0.5):
number_features = (X.columns != "Class").sum()
if("Class" not in X.columns):
if("Class" in y.columns):
classes = y['Class']
else:
raise Exception("No class column found")
else:
classes = X['Class']
counter = classes.value_counts()
print("Amount class 0 before:", counter[0] / (counter[0] + counter[1]) )
print("Amount class 1 before:", counter[1] / (counter[0] + counter[1]) )
df = pd.concat([X.loc[:,X.columns != "Class"], y.loc[:, y.columns != "Class"], classes], axis=1)
if strategy == 'smote':
print("Using SMOTE strategy")
smote = SMOTE(sampling_strategy=sample_fraction)
df_resampled, classes_resampled = smote.fit_resample(df.loc[:, df.columns != "Class"], df.loc[:, df. columns == "Class"])
elif strategy == 'over':
print("Using Oversampling")
over = RandomOverSampler()
df_resampled, classes_resampled = over.fit_resample(df.loc[:, df.columns != "Class"], df.loc[:, df. columns == "Class"])
elif strategy == 'under':
print("Using Undersampling")
under = RandomUnderSampler()
df_resampled, classes_resampled = under.fit_resample(df.loc[:, df.columns != "Class"], df.loc[:, df. columns == "Class"])
else:
return X, y
counter = classes_resampled["Class"].value_counts()
print("Amount class 0 after:", counter[0] / (counter[0] + counter[1]) )
print("Amount class 1 after:", counter[1] / (counter[0] + counter[1]) )
design_resampled = pd.concat([df_resampled.iloc[:,0:number_features], classes_resampled], axis=1)
target_resampled = pd.concat([df_resampled.iloc[:,number_features:], classes_resampled], axis=1)
self.state['balance'] = True
return design_resampled, target_resampled
def scale_fit(self, X, y, scaling):
if scaling == 'individual':
self.scaler_X = MinMaxScaler()
self.scaler_y = MinMaxScaler()
self.scaler_X.fit(X.iloc[:, X.columns != "Class"])
self.scaler_y.fit(y.iloc[:, y.columns != "Class"])
elif scaling == 'global':
self.scaler_X = MinMaxScaler()
self.scaler_X.fit(pd.concat([X.iloc[:, X.columns != "Class"], y.iloc[:, y.columns != "Class"]], axis=0))
self.scaler_y = self.scaler_X
self.state['scale'] = True
def scale_transform(self, X_train, X_test, y_train, y_test):
X_train = pd.concat([self.scaler_X.transform(X_train.loc[:, X_train.columns != "Class"]), X_train.loc[:, "Class"]], axis=1)
X_test = pd.concat([self.scaler_X.transform(X_test.loc[:, X_test.columns != "Class"]), X_test.loc[:, "Class"]], axis=1)
y_train = pd.concat([self.scaler_y.transform(y_train.loc[:, y_train.columns != "Class"]), y_train.loc[:, "Class"]], axis=1)
y_test = pd.concat([self.scaler_y.transform(y_test.loc[:, y_test.columns != "Class"]), y_test.loc[:, "Class"]], axis=1)
return X_train, X_test, y_train, y_test
def scale_inverse(self, X):
if("Class" in X.columns):
X = pd.concat([self.scaler_X.inverse_transform(X.loc[:, X.columns != "Class"]), X.loc[:, "Class"]], axis=1)
else:
X = self.scaler_X.inverse_transform(X)
return X
def split(self, X, y, ratio=0.8):
X_train, y_train, X_test, y_test = sk.train_test_split(X, y, test_size = ratio, random_state=self.random_state)
return X_train, y_train, X_test, y_test