mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-13 10:38:22 +01:00
Compare commits
2 Commits
52940efdb9
...
f2c89e0b83
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f2c89e0b83 | ||
|
|
f321a5604b |
File diff suppressed because one or more lines are too long
@ -337,7 +337,7 @@ def huber_metric(delta=1.0):
|
||||
return huber
|
||||
|
||||
|
||||
def mass_balance_evaluation(model, X, preprocess):
|
||||
def mass_balance_evaluation(model, X, y, preprocess):
|
||||
"""Calculates the mass balance difference for each cell.
|
||||
|
||||
Args:
|
||||
@ -353,10 +353,10 @@ def mass_balance_evaluation(model, X, preprocess):
|
||||
columns = X.iloc[:, X.columns != "Class"].columns
|
||||
classes = X["Class"]
|
||||
classes.reset_index(drop=True, inplace=True)
|
||||
prediction = pd.DataFrame(model.predict(X[columns]), columns=columns)
|
||||
prediction = pd.DataFrame(model.predict(X[columns]), columns=y.columns)
|
||||
# backtransform min/max or standard scaler
|
||||
|
||||
if preprocess.scaler_X is None:
|
||||
if preprocess.scaler_X is not None:
|
||||
X = pd.DataFrame(
|
||||
preprocess.scaler_X.inverse_transform(
|
||||
X.iloc[:, X.columns != "Class"]),
|
||||
@ -586,6 +586,7 @@ class preprocessing:
|
||||
|
||||
def scale_fit(self, X, y, scaling, type="standard"):
|
||||
self.scaler_type = type
|
||||
self.scaler_scope = scaling
|
||||
"""Fit a scaler for data preprocessing.
|
||||
|
||||
Args:
|
||||
@ -620,6 +621,13 @@ class preprocessing:
|
||||
self.scaler_y = self.scaler_X
|
||||
|
||||
self.state["scale"] = True
|
||||
|
||||
|
||||
return pd.concat(
|
||||
[X.iloc[:, X.columns != "Class"],
|
||||
y.iloc[:, y.columns != "Class"]],
|
||||
axis=0,
|
||||
)
|
||||
|
||||
def scale_transform(self, X_train, X_test, y_train, y_test):
|
||||
"""Apply learned scaler on datasets.
|
||||
@ -634,6 +642,39 @@ class preprocessing:
|
||||
transformed dataframes
|
||||
"""
|
||||
|
||||
if self.scaler_scope == "global":
|
||||
if len(X_train.columns) > len(y_train.columns):
|
||||
y_train_modified = X_train.copy()
|
||||
y_test_modified = X_test.copy()
|
||||
|
||||
for i in y_train_modified.columns:
|
||||
if i in y_train.columns:
|
||||
y_train_modified[i] = y_train[i]
|
||||
y_test_modified[i] = y_test[i]
|
||||
else:
|
||||
y_train_modified[i] = np.nan
|
||||
y_test_modified[i] = np.nan
|
||||
|
||||
y_train = y_train_modified
|
||||
y_test = y_test_modified
|
||||
|
||||
|
||||
else:
|
||||
X_train_modified = y_train.copy()
|
||||
X_test_modified = y_test.copy()
|
||||
|
||||
for i in X_train_modified.columns:
|
||||
if i in X_train.columns:
|
||||
X_train_modified[i] = X_train[i]
|
||||
X_test_modified[i] = X_test[i]
|
||||
else:
|
||||
X_train_modified[i] = np.nan
|
||||
X_test_modified[i] = np.nan
|
||||
|
||||
X_train = X_train_modified
|
||||
X_test = X_test_modified
|
||||
|
||||
|
||||
X_train = pd.concat(
|
||||
[
|
||||
self.scaler_X.transform(
|
||||
@ -669,7 +710,12 @@ class preprocessing:
|
||||
],
|
||||
axis=1,
|
||||
)
|
||||
|
||||
|
||||
X_train.dropna(axis=1, inplace=True)
|
||||
X_test.dropna(axis=1, inplace=True)
|
||||
y_train.dropna(axis=1, inplace=True)
|
||||
y_test.dropna(axis=1, inplace=True)
|
||||
|
||||
return X_train, X_test, y_train, y_test
|
||||
|
||||
def scale_inverse(self, *args):
|
||||
@ -679,20 +725,66 @@ class preprocessing:
|
||||
Backtransformed data frames
|
||||
"""
|
||||
|
||||
|
||||
|
||||
result = []
|
||||
for i in args:
|
||||
if "Class" in i.columns:
|
||||
inversed = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(
|
||||
i.loc[:, i.columns != "Class"]),
|
||||
columns=i.columns[:-1],
|
||||
)
|
||||
class_column = i.loc[:, "Class"].reset_index(drop=True)
|
||||
i = pd.concat([inversed, class_column], axis=1)
|
||||
else:
|
||||
i = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(i), columns=i.columns)
|
||||
result.append(i)
|
||||
|
||||
|
||||
if self.scaler_scope == "individual":
|
||||
for i in args:
|
||||
if(len(i.columns) == len(self.scaler_X.feature_names_in_)):
|
||||
scaler = self.scaler_X
|
||||
else:
|
||||
scaler = self.scaler_y
|
||||
if "Class" in i.columns:
|
||||
inversed = pd.DataFrame(
|
||||
scaler.inverse_transform(
|
||||
i.loc[:, i.columns != "Class"]),
|
||||
columns=i.columns[:-1],
|
||||
)
|
||||
class_column = i.loc[:, "Class"].reset_index(drop=True)
|
||||
i = pd.concat([inversed, class_column], axis=1)
|
||||
else:
|
||||
i = pd.DataFrame(
|
||||
scaler.inverse_transform(i), columns=i.columns)
|
||||
result.append(i)
|
||||
|
||||
elif self.scaler_scope == "global":
|
||||
|
||||
for i in args:
|
||||
if (len(i.columns) == len(self.preprocess.scaler_X.feature_names_in_)):
|
||||
if "Class" in i.columns:
|
||||
inversed = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(
|
||||
i.loc[:, i.columns != "Class"]),
|
||||
columns=i.columns[:-1],
|
||||
)
|
||||
class_column = i.loc[:, "Class"].reset_index(drop=True)
|
||||
i = pd.concat([inversed, class_column], axis=1)
|
||||
else:
|
||||
i = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(i), columns=i.columns)
|
||||
result.append(i)
|
||||
|
||||
else:
|
||||
df = pd.DataFrame()
|
||||
for j in self.scaler_X.feature_names_in_:
|
||||
if j in i.columns:
|
||||
df[j] = i[j]
|
||||
else:
|
||||
df[j] = np.nan
|
||||
if "Class" in i.columns:
|
||||
inversed = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(
|
||||
df.loc[:, df.columns != "Class"]),
|
||||
columns=df.columns[:-1],
|
||||
)
|
||||
else:
|
||||
i = pd.DataFrame(
|
||||
self.scaler_X.inverse_transform(df), columns=df.columns)
|
||||
result.append(i)
|
||||
|
||||
|
||||
return result
|
||||
|
||||
def split(self, X, y, ratio=0.8):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user