mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-13 10:38:22 +01:00
Compare commits
4 Commits
f2c89e0b83
...
09a5687580
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
09a5687580 | ||
|
|
bbccd1444d | ||
|
|
e21c7bede8 | ||
|
|
5b520c368d |
BIN
results/adam_history.pkl
(Stored with Git LFS)
BIN
results/adam_history.pkl
(Stored with Git LFS)
Binary file not shown.
BIN
results/rmsprop_history.pkl
(Stored with Git LFS)
BIN
results/rmsprop_history.pkl
(Stored with Git LFS)
Binary file not shown.
BIN
results/sgd_history.pkl
(Stored with Git LFS)
BIN
results/sgd_history.pkl
(Stored with Git LFS)
Binary file not shown.
File diff suppressed because one or more lines are too long
@ -160,38 +160,29 @@ def custom_loss(
|
||||
preprocess.scaler_type, scaler_type))
|
||||
|
||||
if scaler_type == "minmax":
|
||||
scale_X = tf.convert_to_tensor(
|
||||
preprocess.scaler_X.data_range_, dtype=tf.float32
|
||||
data_range = tf.convert_to_tensor(
|
||||
preprocess.scaler_output.data_range_, dtype=tf.float32
|
||||
)
|
||||
min_X = tf.convert_to_tensor(
|
||||
preprocess.scaler_X.data_min_, dtype=tf.float32
|
||||
)
|
||||
scale_y = tf.convert_to_tensor(
|
||||
preprocess.scaler_y.data_range_, dtype=tf.float32
|
||||
)
|
||||
min_y = tf.convert_to_tensor(
|
||||
preprocess.scaler_y.data_min_, dtype=tf.float32
|
||||
min_values = tf.convert_to_tensor(
|
||||
preprocess.scaler_output.data_min_, dtype=tf.float32
|
||||
)
|
||||
|
||||
elif scaler_type == "standard":
|
||||
scale_X = tf.convert_to_tensor(
|
||||
preprocess.scaler_X.scale_, dtype=tf.float32)
|
||||
mean_X = tf.convert_to_tensor(
|
||||
preprocess.scaler_X.mean_, dtype=tf.float32)
|
||||
scale_y = tf.convert_to_tensor(
|
||||
preprocess.scaler_y.scale_, dtype=tf.float32)
|
||||
mean_y = tf.convert_to_tensor(
|
||||
preprocess.scaler_y.mean_, dtype=tf.float32)
|
||||
scale_output = tf.convert_to_tensor(
|
||||
preprocess.scaler_output.scale_, dtype=tf.float32)
|
||||
mean_output = tf.convert_to_tensor(
|
||||
preprocess.scaler_output.mean_, dtype=tf.float32)
|
||||
|
||||
def loss(results, predicted):
|
||||
# inverse min/max scaling
|
||||
if scaler_type == "minmax":
|
||||
predicted_inverse = predicted * scale_y + min_y
|
||||
results_inverse = results * scale_X + min_X
|
||||
predicted_inverse = predicted * data_range + min_values
|
||||
results_inverse = results * data_range + min_values
|
||||
|
||||
# inverse standard scaling
|
||||
elif scaler_type == "standard":
|
||||
predicted_inverse = predicted * scale_y + mean_y
|
||||
results_inverse = results * scale_X + mean_X
|
||||
predicted_inverse = predicted * scale_output + mean_output
|
||||
results_inverse = results * scale_output + mean_output
|
||||
|
||||
elif scaler_type == "none":
|
||||
predicted_inverse = predicted
|
||||
@ -204,6 +195,8 @@ def custom_loss(
|
||||
|
||||
# mass balance
|
||||
# in total no Barium and Strontium should be lost in one simulation step
|
||||
|
||||
# TODO: encapsulate the mass balance terms in a function
|
||||
dBa = tf.keras.backend.abs(
|
||||
(
|
||||
predicted_inverse[:, column_dict["Ba"]]
|
||||
@ -224,6 +217,19 @@ def custom_loss(
|
||||
+ results_inverse[:, column_dict["Celestite"]]
|
||||
)
|
||||
)
|
||||
|
||||
dS = tf.keras.backend.abs(
|
||||
(
|
||||
predicted_inverse[:, column_dict["S"]]
|
||||
+ predicted_inverse[:, column_dict["Celestite"]]
|
||||
+ predicted_inverse[:, column_dict["Barite"]]
|
||||
)
|
||||
- (
|
||||
results_inverse[:, column_dict["S"]]
|
||||
+ results_inverse[:, column_dict["Celestite"]]
|
||||
+ results_inverse[:, column_dict["Barite"]]
|
||||
)
|
||||
)
|
||||
|
||||
# huber loss
|
||||
huber_loss = tf.keras.losses.Huber(delta)(results, predicted)
|
||||
@ -233,6 +239,8 @@ def custom_loss(
|
||||
total_loss = huber_loss
|
||||
elif loss_variant == "huber_mass_balance":
|
||||
total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr
|
||||
elif "huber_mass_balance_extended":
|
||||
total_loss = h1 * huber_loss + h2 * dBa + h3 * dSr + h3 * dS
|
||||
else:
|
||||
raise Exception(
|
||||
"No valid loss variant found. Choose between 'huber' and 'huber_mass_balance'."
|
||||
@ -243,7 +251,7 @@ def custom_loss(
|
||||
return loss
|
||||
|
||||
|
||||
def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
|
||||
def mass_balance_metric(preprocess, column_dict, scaler_type="minmax", loss_variant="huber_mass_balance"):
|
||||
"""Auxilary function to calculate the mass balance during training.
|
||||
|
||||
Args:
|
||||
@ -256,36 +264,29 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
|
||||
"""
|
||||
|
||||
if scaler_type == "minmax":
|
||||
scale_X = tf.convert_to_tensor(
|
||||
preprocess.scaler_X.data_range_, dtype=tf.float32
|
||||
data_range = tf.convert_to_tensor(
|
||||
preprocess.scaler_output.data_range_, dtype=tf.float32
|
||||
)
|
||||
min_X = tf.convert_to_tensor(
|
||||
preprocess.scaler_X.data_min_, dtype=tf.float32)
|
||||
scale_y = tf.convert_to_tensor(
|
||||
preprocess.scaler_y.data_range_, dtype=tf.float32
|
||||
min_values = tf.convert_to_tensor(
|
||||
preprocess.scaler_output.data_min_, dtype=tf.float32
|
||||
)
|
||||
min_y = tf.convert_to_tensor(
|
||||
preprocess.scaler_y.data_min_, dtype=tf.float32)
|
||||
|
||||
|
||||
elif scaler_type == "standard":
|
||||
scale_X = tf.convert_to_tensor(
|
||||
preprocess.scaler_X.scale_, dtype=tf.float32)
|
||||
mean_X = tf.convert_to_tensor(
|
||||
preprocess.scaler_X.mean_, dtype=tf.float32)
|
||||
scale_y = tf.convert_to_tensor(
|
||||
preprocess.scaler_y.scale_, dtype=tf.float32)
|
||||
mean_y = tf.convert_to_tensor(
|
||||
preprocess.scaler_y.mean_, dtype=tf.float32)
|
||||
scale_output = tf.convert_to_tensor(
|
||||
preprocess.scaler_output.scale_, dtype=tf.float32)
|
||||
mean_output = tf.convert_to_tensor(
|
||||
preprocess.scaler_output.mean_, dtype=tf.float32)
|
||||
|
||||
def mass_balance(results, predicted):
|
||||
# inverse min/max scaling
|
||||
if scaler_type == "minmax":
|
||||
predicted_inverse = predicted * scale_y + min_y
|
||||
results_inverse = results * scale_X + min_X
|
||||
predicted_inverse = predicted * data_range + min_values
|
||||
results_inverse = results * data_range + min_values
|
||||
|
||||
# inverse standard scaling
|
||||
elif scaler_type == "standard":
|
||||
predicted_inverse = predicted * scale_y + mean_y
|
||||
results_inverse = results * scale_X + mean_X
|
||||
predicted_inverse = predicted * scale_output + mean_output
|
||||
results_inverse = results * scale_output + mean_output
|
||||
|
||||
elif scaler_type == "none":
|
||||
predicted_inverse = predicted
|
||||
@ -306,6 +307,7 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
|
||||
+ results_inverse[:, column_dict["Barite"]]
|
||||
)
|
||||
)
|
||||
|
||||
dSr = tf.keras.backend.abs(
|
||||
(
|
||||
predicted_inverse[:, column_dict["Sr"]]
|
||||
@ -316,11 +318,74 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
|
||||
+ results_inverse[:, column_dict["Celestite"]]
|
||||
)
|
||||
)
|
||||
return tf.reduce_mean(dBa + dSr)
|
||||
|
||||
dS = tf.keras.backend.abs(
|
||||
(
|
||||
predicted_inverse[:, column_dict["S"]]
|
||||
+ predicted_inverse[:, column_dict["Celestite"]]
|
||||
+ predicted_inverse[:, column_dict["Barite"]]
|
||||
)
|
||||
- (
|
||||
results_inverse[:, column_dict["S"]]
|
||||
+ results_inverse[:, column_dict["Celestite"]]
|
||||
+ results_inverse[:, column_dict["Barite"]]
|
||||
)
|
||||
)
|
||||
|
||||
if loss_variant == "huber_mass_balance":
|
||||
return tf.reduce_mean(dBa + dSr)
|
||||
elif loss_variant == "huber_mass_balance_extended":
|
||||
return tf.reduce_mean(dBa + dSr + dS)
|
||||
|
||||
return mass_balance
|
||||
|
||||
|
||||
# def mass_balance_barium(predicted_inverse, results_inverse, column_dict):
|
||||
# dBa = tf.keras.backend.abs(
|
||||
# (
|
||||
# predicted_inverse[:, column_dict["Ba"]]
|
||||
# + predicted_inverse[:, column_dict["Barite"]]
|
||||
# )
|
||||
# - (
|
||||
# results_inverse[:, column_dict["Ba"]]
|
||||
# + results_inverse[:, column_dict["Barite"]]
|
||||
# )
|
||||
# )
|
||||
|
||||
# return dBa
|
||||
|
||||
|
||||
# def mass_balance_strontium(predicted_inverse, results_inverse, column_dict):
|
||||
# dSr = tf.keras.backend.abs(
|
||||
# (
|
||||
# predicted_inverse[:, column_dict["Sr"]]
|
||||
# + predicted_inverse[:, column_dict["Celestite"]]
|
||||
# )
|
||||
# - (
|
||||
# results_inverse[:, column_dict["Sr"]]
|
||||
# + results_inverse[:, column_dict["Celestite"]]
|
||||
# )
|
||||
# )
|
||||
|
||||
# return dSr
|
||||
|
||||
# def mass_balance_sulfur(predicted_inverse, results_inverse, column_dict):
|
||||
# dS = tf.keras.backend.abs(
|
||||
# (
|
||||
# predicted_inverse[:, column_dict["S"]]
|
||||
# + predicted_inverse[:, column_dict["Celestite"]]
|
||||
# + predicted_inverse[:, column_dict["Barite"]]
|
||||
# )
|
||||
# - (
|
||||
# results_inverse[:, column_dict["S"]]
|
||||
# + results_inverse[:, column_dict["Celestite"]]
|
||||
# + results_inverse[:, column_dict["Barite"]]
|
||||
# )
|
||||
# )
|
||||
|
||||
# return dS
|
||||
|
||||
|
||||
def huber_metric(delta=1.0):
|
||||
"""Auxilary function to calculate the Huber loss during training.
|
||||
|
||||
@ -337,8 +402,9 @@ def huber_metric(delta=1.0):
|
||||
return huber
|
||||
|
||||
|
||||
def mass_balance_evaluation(model, X, y, preprocess):
|
||||
"""Calculates the mass balance difference for each cell.
|
||||
def mass_balance_evaluation(model, X, preprocess):
|
||||
"""Calculates the mass balance difference for each cell
|
||||
between the predicted values and the design dataset.
|
||||
|
||||
Args:
|
||||
model: trained model
|
||||
@ -353,18 +419,12 @@ def mass_balance_evaluation(model, X, y, preprocess):
|
||||
columns = X.iloc[:, X.columns != "Class"].columns
|
||||
classes = X["Class"]
|
||||
classes.reset_index(drop=True, inplace=True)
|
||||
prediction = pd.DataFrame(model.predict(X[columns]), columns=y.columns)
|
||||
prediction = pd.DataFrame(model.predict(X[columns]), columns=preprocess.scaler_output.feature_names_in_)
|
||||
|
||||
# backtransform min/max or standard scaler
|
||||
|
||||
if preprocess.scaler_X is not None:
|
||||
X = pd.DataFrame(
|
||||
preprocess.scaler_X.inverse_transform(
|
||||
X.iloc[:, X.columns != "Class"]),
|
||||
columns=columns,
|
||||
)
|
||||
prediction = pd.DataFrame(
|
||||
preprocess.scaler_y.inverse_transform(prediction), columns=columns
|
||||
)
|
||||
if preprocess.scaler_input is not None:
|
||||
X = preprocess.scale_inverse(X)[0]
|
||||
prediction = preprocess.scale_inverse(prediction)[0]
|
||||
|
||||
# apply backtransformation if log transformation was applied
|
||||
if preprocess.func_dict_out is not None:
|
||||
@ -378,9 +438,12 @@ def mass_balance_evaluation(model, X, y, preprocess):
|
||||
(prediction["Sr"] + prediction["Celestite"]) -
|
||||
(X["Sr"] + X["Celestite"])
|
||||
)
|
||||
dS = np.abs(
|
||||
(prediction["S"] + prediction["Celestite"] + prediction["Barite"]) -
|
||||
(X["S"] + X["Celestite"] + X["Barite"]))
|
||||
|
||||
mass_balance_result = pd.DataFrame(
|
||||
{"dBa": dBa, "dSr": dSr, "mass_balance": dBa + dSr, "Class": classes}
|
||||
{"dBa": dBa, "dSr": dSr, "dS": dS, "mass_balance": dBa + dSr, "mass_balance_extended": dBa+dSr+dS, "Class": classes}
|
||||
)
|
||||
|
||||
return mass_balance_result
|
||||
@ -421,8 +484,8 @@ class preprocessing:
|
||||
random_state (int, optional): Seed for reproducability. Defaults to 42.
|
||||
"""
|
||||
self.random_state = random_state
|
||||
self.scaler_X = None
|
||||
self.scaler_y = None
|
||||
self.scaler_input = None
|
||||
self.scaler_output = None
|
||||
self.func_dict_in = func_dict_in if func_dict_in is not None else None
|
||||
self.func_dict_out = func_dict_out if func_dict_out is not None else None
|
||||
self.state = {"cluster": False, "log": False,
|
||||
@ -500,8 +563,10 @@ class preprocessing:
|
||||
|
||||
label = np.zeros(len(X))
|
||||
label[X[species] > threshold] = 1
|
||||
X["Class"] = label
|
||||
y["Class"] = label
|
||||
X = X.copy()
|
||||
y = y.copy()
|
||||
X.loc[:, "Class"] = label
|
||||
y.loc[:, "Class"] = label
|
||||
|
||||
return X, y
|
||||
|
||||
@ -584,52 +649,47 @@ class preprocessing:
|
||||
self.state["balance"] = True
|
||||
return design_resampled, target_resampled
|
||||
|
||||
def scale_fit(self, X, y, scaling, type="standard"):
|
||||
def scale_fit(self, X, y, type="standard"):
|
||||
self.scaler_type = type
|
||||
self.scaler_scope = scaling
|
||||
"""Fit a scaler for data preprocessing.
|
||||
|
||||
Args:
|
||||
X: design dataset
|
||||
y: target dataset
|
||||
scaling: learn individual scaler for X and y when "individual" is selected or one global scaler on all data in X and y if "global" is selected (scaler_X and scaler_y are equal)
|
||||
scaling: fit a scaler on all data in X and y. If X and y have different dimensions
|
||||
input and output scaler are trained for the specific columns.
|
||||
type (str, optional): Using MinMax Scaling or Standarization. Defaults to "Standard".
|
||||
"""
|
||||
|
||||
if type == "minmax":
|
||||
self.scaler_X = MinMaxScaler()
|
||||
self.scaler_y = MinMaxScaler()
|
||||
self.scaler_input = MinMaxScaler()
|
||||
self.scaler_output = MinMaxScaler()
|
||||
elif type == "standard":
|
||||
self.scaler_X = StandardScaler()
|
||||
self.scaler_y = StandardScaler()
|
||||
self.scaler_input = StandardScaler()
|
||||
self.scaler_output = StandardScaler()
|
||||
|
||||
else:
|
||||
raise Exception("No valid scaler type found")
|
||||
|
||||
if scaling == "individual":
|
||||
self.scaler_X.fit(X.iloc[:, X.columns != "Class"])
|
||||
self.scaler_y.fit(y.iloc[:, y.columns != "Class"])
|
||||
|
||||
elif scaling == "global":
|
||||
self.scaler_X.fit(
|
||||
pd.concat(
|
||||
[X.iloc[:, X.columns != "Class"],
|
||||
y.iloc[:, y.columns != "Class"]],
|
||||
axis=0,
|
||||
)
|
||||
)
|
||||
self.scaler_y = self.scaler_X
|
||||
all_data = pd.concat([X, y],axis=0)
|
||||
|
||||
if len(X.columns) == len(y.columns):
|
||||
self.scaler_input.fit(all_data.loc[:, X.columns != "Class"])
|
||||
self.scaler_output = self.scaler_input
|
||||
|
||||
else:
|
||||
self.scaler_input.fit(all_data[X.columns[X.columns != "Class"]])
|
||||
self.scaler_output.fit(all_data[y.columns[y.columns != "Class"]])
|
||||
|
||||
|
||||
self.state["scale"] = True
|
||||
|
||||
|
||||
return pd.concat(
|
||||
[X.iloc[:, X.columns != "Class"],
|
||||
y.iloc[:, y.columns != "Class"]],
|
||||
axis=0,
|
||||
)
|
||||
|
||||
|
||||
|
||||
def scale_transform(self, X_train, X_test, y_train, y_test):
|
||||
def scale_transform(self, *args):
|
||||
"""Apply learned scaler on datasets.
|
||||
|
||||
Args:
|
||||
@ -641,82 +701,31 @@ class preprocessing:
|
||||
Returns:
|
||||
transformed dataframes
|
||||
"""
|
||||
|
||||
if self.scaler_scope == "global":
|
||||
if len(X_train.columns) > len(y_train.columns):
|
||||
y_train_modified = X_train.copy()
|
||||
y_test_modified = X_test.copy()
|
||||
|
||||
for i in y_train_modified.columns:
|
||||
if i in y_train.columns:
|
||||
y_train_modified[i] = y_train[i]
|
||||
y_test_modified[i] = y_test[i]
|
||||
else:
|
||||
y_train_modified[i] = np.nan
|
||||
y_test_modified[i] = np.nan
|
||||
|
||||
y_train = y_train_modified
|
||||
y_test = y_test_modified
|
||||
|
||||
|
||||
|
||||
results = []
|
||||
for i in args:
|
||||
# check which scaler should be used depending on the columns
|
||||
if len(i.columns[i.columns != "Class"]) == len(self.scaler_input.feature_names_in_):
|
||||
scaler = self.scaler_input
|
||||
else:
|
||||
X_train_modified = y_train.copy()
|
||||
X_test_modified = y_test.copy()
|
||||
|
||||
for i in X_train_modified.columns:
|
||||
if i in X_train.columns:
|
||||
X_train_modified[i] = X_train[i]
|
||||
X_test_modified[i] = X_test[i]
|
||||
else:
|
||||
X_train_modified[i] = np.nan
|
||||
X_test_modified[i] = np.nan
|
||||
|
||||
X_train = X_train_modified
|
||||
X_test = X_test_modified
|
||||
|
||||
|
||||
X_train = pd.concat(
|
||||
[
|
||||
self.scaler_X.transform(
|
||||
X_train.loc[:, X_train.columns != "Class"]),
|
||||
X_train.loc[:, "Class"],
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
X_test = pd.concat(
|
||||
[
|
||||
self.scaler_X.transform(
|
||||
X_test.loc[:, X_test.columns != "Class"]),
|
||||
X_test.loc[:, "Class"],
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
y_train = pd.concat(
|
||||
[
|
||||
self.scaler_y.transform(
|
||||
y_train.loc[:, y_train.columns != "Class"]),
|
||||
y_train.loc[:, "Class"],
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
y_test = pd.concat(
|
||||
[
|
||||
self.scaler_y.transform(
|
||||
y_test.loc[:, y_test.columns != "Class"]),
|
||||
y_test.loc[:, "Class"],
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
X_train.dropna(axis=1, inplace=True)
|
||||
X_test.dropna(axis=1, inplace=True)
|
||||
y_train.dropna(axis=1, inplace=True)
|
||||
y_test.dropna(axis=1, inplace=True)
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
scaler = self.scaler_output
|
||||
|
||||
if "Class" in i.columns:
|
||||
i = pd.concat(
|
||||
[
|
||||
scaler.transform(i.loc[:, i.columns != "Class"]),
|
||||
i.loc[:, "Class"],
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
else:
|
||||
i = scaler.transform(i)
|
||||
|
||||
results.append(i)
|
||||
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def scale_inverse(self, *args):
|
||||
"""Backtransform the dataset
|
||||
@ -725,65 +734,28 @@ class preprocessing:
|
||||
Backtransformed data frames
|
||||
"""
|
||||
|
||||
|
||||
|
||||
result = []
|
||||
|
||||
|
||||
if self.scaler_scope == "individual":
|
||||
for i in args:
|
||||
if(len(i.columns) == len(self.scaler_X.feature_names_in_)):
|
||||
scaler = self.scaler_X
|
||||
else:
|
||||
scaler = self.scaler_y
|
||||
if "Class" in i.columns:
|
||||
inversed = pd.DataFrame(
|
||||
scaler.inverse_transform(
|
||||
i.loc[:, i.columns != "Class"]),
|
||||
columns=i.columns[:-1],
|
||||
)
|
||||
class_column = i.loc[:, "Class"].reset_index(drop=True)
|
||||
i = pd.concat([inversed, class_column], axis=1)
|
||||
else:
|
||||
|
||||
|
||||
for i in args:
|
||||
# check which scaler should be used depending on the columns
|
||||
if len(i.columns[i.columns != "Class"]) == len(self.scaler_input.feature_names_in_):
|
||||
scaler = self.scaler_input
|
||||
else:
|
||||
scaler = self.scaler_output
|
||||
|
||||
if "Class" in i.columns:
|
||||
inversed = pd.DataFrame(
|
||||
scaler.inverse_transform(
|
||||
i.loc[:, i.columns != "Class"]),
|
||||
columns=i.columns[:-1],
|
||||
)
|
||||
class_column = i.loc[:, "Class"].reset_index(drop=True)
|
||||
i = pd.concat([inversed, class_column], axis=1)
|
||||
else:
|
||||
i = pd.DataFrame(
|
||||
scaler.inverse_transform(i), columns=i.columns)
|
||||
result.append(i)
|
||||
|
||||
elif self.scaler_scope == "global":
|
||||
|
||||
for i in args:
|
||||
if (len(i.columns) == len(self.preprocess.scaler_X.feature_names_in_)):
|
||||
if "Class" in i.columns:
|
||||
inversed = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(
|
||||
i.loc[:, i.columns != "Class"]),
|
||||
columns=i.columns[:-1],
|
||||
)
|
||||
class_column = i.loc[:, "Class"].reset_index(drop=True)
|
||||
i = pd.concat([inversed, class_column], axis=1)
|
||||
else:
|
||||
i = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(i), columns=i.columns)
|
||||
result.append(i)
|
||||
|
||||
else:
|
||||
df = pd.DataFrame()
|
||||
for j in self.scaler_X.feature_names_in_:
|
||||
if j in i.columns:
|
||||
df[j] = i[j]
|
||||
else:
|
||||
df[j] = np.nan
|
||||
if "Class" in i.columns:
|
||||
inversed = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(
|
||||
df.loc[:, df.columns != "Class"]),
|
||||
columns=df.columns[:-1],
|
||||
)
|
||||
else:
|
||||
i = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(df), columns=df.columns)
|
||||
result.append(i)
|
||||
|
||||
result.append(i)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
14
tests/test_scaler.py
Normal file
14
tests/test_scaler.py
Normal file
@ -0,0 +1,14 @@
|
||||
# import unittest
|
||||
# import os
|
||||
|
||||
# os.chdir("../src/")
|
||||
# print(os.getcwd())
|
||||
|
||||
# from preprocessing import *
|
||||
|
||||
# class TestScaler(unittest.TestCase):
|
||||
# def sample_test(self):
|
||||
# self.assertEqual(1, 1)
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# unittest.main()
|
||||
Loading…
x
Reference in New Issue
Block a user