Compare commits

...

2 Commits

2 changed files with 1014 additions and 37 deletions

File diff suppressed because one or more lines are too long

View File

@ -337,7 +337,7 @@ def huber_metric(delta=1.0):
return huber
def mass_balance_evaluation(model, X, preprocess):
def mass_balance_evaluation(model, X, y, preprocess):
"""Calculates the mass balance difference for each cell.
Args:
@ -353,10 +353,10 @@ def mass_balance_evaluation(model, X, preprocess):
columns = X.iloc[:, X.columns != "Class"].columns
classes = X["Class"]
classes.reset_index(drop=True, inplace=True)
prediction = pd.DataFrame(model.predict(X[columns]), columns=columns)
prediction = pd.DataFrame(model.predict(X[columns]), columns=y.columns)
# backtransform min/max or standard scaler
if preprocess.scaler_X is None:
if preprocess.scaler_X is not None:
X = pd.DataFrame(
preprocess.scaler_X.inverse_transform(
X.iloc[:, X.columns != "Class"]),
@ -586,6 +586,7 @@ class preprocessing:
def scale_fit(self, X, y, scaling, type="standard"):
self.scaler_type = type
self.scaler_scope = scaling
"""Fit a scaler for data preprocessing.
Args:
@ -620,6 +621,13 @@ class preprocessing:
self.scaler_y = self.scaler_X
self.state["scale"] = True
return pd.concat(
[X.iloc[:, X.columns != "Class"],
y.iloc[:, y.columns != "Class"]],
axis=0,
)
def scale_transform(self, X_train, X_test, y_train, y_test):
"""Apply learned scaler on datasets.
@ -634,6 +642,39 @@ class preprocessing:
transformed dataframes
"""
if self.scaler_scope == "global":
if len(X_train.columns) > len(y_train.columns):
y_train_modified = X_train.copy()
y_test_modified = X_test.copy()
for i in y_train_modified.columns:
if i in y_train.columns:
y_train_modified[i] = y_train[i]
y_test_modified[i] = y_test[i]
else:
y_train_modified[i] = np.nan
y_test_modified[i] = np.nan
y_train = y_train_modified
y_test = y_test_modified
else:
X_train_modified = y_train.copy()
X_test_modified = y_test.copy()
for i in X_train_modified.columns:
if i in X_train.columns:
X_train_modified[i] = X_train[i]
X_test_modified[i] = X_test[i]
else:
X_train_modified[i] = np.nan
X_test_modified[i] = np.nan
X_train = X_train_modified
X_test = X_test_modified
X_train = pd.concat(
[
self.scaler_X.transform(
@ -669,7 +710,12 @@ class preprocessing:
],
axis=1,
)
X_train.dropna(axis=1, inplace=True)
X_test.dropna(axis=1, inplace=True)
y_train.dropna(axis=1, inplace=True)
y_test.dropna(axis=1, inplace=True)
return X_train, X_test, y_train, y_test
def scale_inverse(self, *args):
@ -679,20 +725,66 @@ class preprocessing:
Backtransformed data frames
"""
result = []
for i in args:
if "Class" in i.columns:
inversed = pd.DataFrame(
self.scaler_X.inverse_transform(
i.loc[:, i.columns != "Class"]),
columns=i.columns[:-1],
)
class_column = i.loc[:, "Class"].reset_index(drop=True)
i = pd.concat([inversed, class_column], axis=1)
else:
i = pd.DataFrame(
self.scaler_X.inverse_transform(i), columns=i.columns)
result.append(i)
if self.scaler_scope == "individual":
for i in args:
if(len(i.columns) == len(self.scaler_X.feature_names_in_)):
scaler = self.scaler_X
else:
scaler = self.scaler_y
if "Class" in i.columns:
inversed = pd.DataFrame(
scaler.inverse_transform(
i.loc[:, i.columns != "Class"]),
columns=i.columns[:-1],
)
class_column = i.loc[:, "Class"].reset_index(drop=True)
i = pd.concat([inversed, class_column], axis=1)
else:
i = pd.DataFrame(
scaler.inverse_transform(i), columns=i.columns)
result.append(i)
elif self.scaler_scope == "global":
for i in args:
if (len(i.columns) == len(self.preprocess.scaler_X.feature_names_in_)):
if "Class" in i.columns:
inversed = pd.DataFrame(
self.scaler_X.inverse_transform(
i.loc[:, i.columns != "Class"]),
columns=i.columns[:-1],
)
class_column = i.loc[:, "Class"].reset_index(drop=True)
i = pd.concat([inversed, class_column], axis=1)
else:
i = pd.DataFrame(
self.scaler_X.inverse_transform(i), columns=i.columns)
result.append(i)
else:
df = pd.DataFrame()
for j in self.scaler_X.feature_names_in_:
if j in i.columns:
df[j] = i[j]
else:
df[j] = np.nan
if "Class" in i.columns:
inversed = pd.DataFrame(
self.scaler_X.inverse_transform(
df.loc[:, df.columns != "Class"]),
columns=df.columns[:-1],
)
else:
i = pd.DataFrame(
self.scaler_X.inverse_transform(df), columns=df.columns)
result.append(i)
return result
def split(self, X, y, ratio=0.8):