mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-13 10:38:22 +01:00
custom loss function
This commit is contained in:
parent
e9f49308f2
commit
3171dd3643
1756
POET_Training.ipynb
1756
POET_Training.ipynb
File diff suppressed because one or more lines are too long
BIN
__pycache__/preprocessing.cpython-311.pyc
Normal file
BIN
__pycache__/preprocessing.cpython-311.pyc
Normal file
Binary file not shown.
BIN
loss_1_to_end.png
Normal file
BIN
loss_1_to_end.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 85 KiB |
BIN
loss_all.png
Normal file
BIN
loss_all.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 62 KiB |
BIN
model_large.keras
Normal file
BIN
model_large.keras
Normal file
Binary file not shown.
145
preprocessing.py
145
preprocessing.py
@ -190,50 +190,143 @@ def preprocessing_training(df_design, df_targets, func_dict_in, func_dict_out, s
|
||||
|
||||
class preprocessing:
|
||||
|
||||
def __init__(self, df_design, df_targets, random_state=42):
|
||||
self.X = df_design
|
||||
self.y = df_targets
|
||||
def __init__(self, func_dict_in, func_dict_out, random_state=42):
|
||||
self.random_state = random_state
|
||||
self.scaler_X = None
|
||||
self.scaler_y = None
|
||||
self.func_dict_in = func_dict_in
|
||||
self.func_dict_out = func_dict_out
|
||||
self.state = {"cluster": False, "log": False, "balance": False, "scale": False}
|
||||
|
||||
def funcTranform(self, func_dict_in):
|
||||
for key in self.X.keys():
|
||||
def funcTranform(self, X, y):
|
||||
for key in X.keys():
|
||||
if "Class" not in key:
|
||||
self.X[key] = self.X[key].apply(func_dict_in[key])
|
||||
self.y[key] = self.y[key].apply(func_dict_in[key])
|
||||
X[key] = X[key].apply(self.func_dict_in[key])
|
||||
y[key] = y[key].apply(self.func_dict_in[key])
|
||||
self.state["log"] = True
|
||||
|
||||
def funcInverse(self, func_dict_out):
|
||||
|
||||
if(self.state["log"] == False):
|
||||
raise Exception("Data has to be transformed first")
|
||||
for key in self.X.keys():
|
||||
if "Class" not in key:
|
||||
self.X[key] = self.X[key].apply(func_dict_out[key])
|
||||
self.y[key] = self.y[key].apply(func_dict_out[key])
|
||||
|
||||
def cluster(self, species='Barite', n_clusters=2, x_length=50, y_length=50):
|
||||
return X, y
|
||||
|
||||
def funcInverse(self, X, y):
|
||||
|
||||
for key in X.keys():
|
||||
if "Class" not in key:
|
||||
X[key] = X[key].apply(self.func_dict_out[key])
|
||||
y[key] = y[key].apply(self.func_dict_out[key])
|
||||
self.state["log"] = False
|
||||
return X, y
|
||||
|
||||
def cluster(self, X, y, species='Barite', n_clusters=2, x_length=50, y_length=50):
|
||||
|
||||
if(self.state["log"] == False):
|
||||
raise Exception("Data has to be transformed first")
|
||||
class_labels = np.array([])
|
||||
grid_length = x_length * y_length
|
||||
iterations = int(len(self.X) / grid_length)
|
||||
iterations = int(len(X) / grid_length)
|
||||
|
||||
for i in range(0, iterations):
|
||||
field = np.array(self.X['Barite'][(i*grid_length):(i*grid_length+grid_length)]
|
||||
field = np.array(X[species][(i*grid_length):(i*grid_length+grid_length)]
|
||||
).reshape(x_length, y_length)
|
||||
kmeans = KMeans(n_clusters=n_clusters, random_state=self.random_state).fit(field.reshape(-1, 1))
|
||||
class_labels = np.append(class_labels.astype(int), kmeans.labels_)
|
||||
|
||||
class_labels = np.append(class_labels.astype(int), kmeans.labels_)
|
||||
|
||||
if ("Class" in self.X.columns and "Class" in self.y.columns):
|
||||
if ("Class" in X.columns and "Class" in y.columns):
|
||||
print("Class column already exists")
|
||||
else:
|
||||
class_labels_df = pd.DataFrame(class_labels, columns=['Class'])
|
||||
self.X = pd.concat([self.X, class_labels_df], axis=1)
|
||||
self.y = pd.concat([self.y, class_labels_df], axis=1)
|
||||
X = pd.concat([X, class_labels_df], axis=1)
|
||||
y = pd.concat([y, class_labels_df], axis=1)
|
||||
self.state["cluster"] = True
|
||||
|
||||
return X, y
|
||||
|
||||
|
||||
def balancer(self, X, y, strategy, sample_fraction=0.5):
|
||||
|
||||
number_features = (X.columns != "Class").sum()
|
||||
if("Class" not in X.columns):
|
||||
if("Class" in y.columns):
|
||||
classes = y['Class']
|
||||
else:
|
||||
raise Exception("No class column found")
|
||||
else:
|
||||
classes = X['Class']
|
||||
counter = classes.value_counts()
|
||||
print("Amount class 0 before:", counter[0] / (counter[0] + counter[1]) )
|
||||
print("Amount class 1 before:", counter[1] / (counter[0] + counter[1]) )
|
||||
df = pd.concat([X.loc[:,X.columns != "Class"], y.loc[:, y.columns != "Class"], classes], axis=1)
|
||||
|
||||
if strategy == 'smote':
|
||||
print("Using SMOTE strategy")
|
||||
smote = SMOTE(sampling_strategy=sample_fraction)
|
||||
df_resampled, classes_resampled = smote.fit_resample(df.loc[:, df.columns != "Class"], df.loc[:, df. columns == "Class"])
|
||||
|
||||
elif strategy == 'over':
|
||||
print("Using Oversampling")
|
||||
over = RandomOverSampler()
|
||||
df_resampled, classes_resampled = over.fit_resample(df.loc[:, df.columns != "Class"], df.loc[:, df. columns == "Class"])
|
||||
|
||||
elif strategy == 'under':
|
||||
print("Using Undersampling")
|
||||
under = RandomUnderSampler()
|
||||
df_resampled, classes_resampled = under.fit_resample(df.loc[:, df.columns != "Class"], df.loc[:, df. columns == "Class"])
|
||||
|
||||
else:
|
||||
return X, y
|
||||
|
||||
counter = classes_resampled["Class"].value_counts()
|
||||
print("Amount class 0 after:", counter[0] / (counter[0] + counter[1]) )
|
||||
print("Amount class 1 after:", counter[1] / (counter[0] + counter[1]) )
|
||||
|
||||
design_resampled = pd.concat([df_resampled.iloc[:,0:number_features], classes_resampled], axis=1)
|
||||
target_resampled = pd.concat([df_resampled.iloc[:,number_features:], classes_resampled], axis=1)
|
||||
|
||||
self.state['balance'] = True
|
||||
return design_resampled, target_resampled
|
||||
|
||||
|
||||
def scale_fit(self, X, y, scaling):
|
||||
|
||||
if scaling == 'individual':
|
||||
self.scaler_X = MinMaxScaler()
|
||||
self.scaler_y = MinMaxScaler()
|
||||
self.scaler_X.fit(X.iloc[:, X.columns != "Class"])
|
||||
self.scaler_y.fit(y.iloc[:, y.columns != "Class"])
|
||||
|
||||
elif scaling == 'global':
|
||||
self.scaler_X = MinMaxScaler()
|
||||
self.scaler_X.fit(pd.concat([X.iloc[:, X.columns != "Class"], y.iloc[:, y.columns != "Class"]], axis=0))
|
||||
self.scaler_y = self.scaler_X
|
||||
|
||||
self.state['scale'] = True
|
||||
|
||||
def scale_transform(self, X_train, X_test, y_train, y_test):
|
||||
X_train = pd.concat([self.scaler_X.transform(X_train.loc[:, X_train.columns != "Class"]), X_train.loc[:, "Class"]], axis=1)
|
||||
|
||||
X_test = pd.concat([self.scaler_X.transform(X_test.loc[:, X_test.columns != "Class"]), X_test.loc[:, "Class"]], axis=1)
|
||||
|
||||
y_train = pd.concat([self.scaler_y.transform(y_train.loc[:, y_train.columns != "Class"]), y_train.loc[:, "Class"]], axis=1)
|
||||
|
||||
y_test = pd.concat([self.scaler_y.transform(y_test.loc[:, y_test.columns != "Class"]), y_test.loc[:, "Class"]], axis=1)
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
|
||||
def scale_inverse(self, X):
|
||||
|
||||
if("Class" in X.columns):
|
||||
X = pd.concat([self.scaler_X.inverse_transform(X.loc[:, X.columns != "Class"]), X.loc[:, "Class"]], axis=1)
|
||||
else:
|
||||
X = self.scaler_X.inverse_transform(X)
|
||||
|
||||
return X
|
||||
|
||||
def split(self, X, y, ratio=0.8):
|
||||
X_train, y_train, X_test, y_test = sk.train_test_split(X, y, test_size = ratio, random_state=self.random_state)
|
||||
|
||||
return X_train, y_train, X_test, y_test
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user