mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-16 03:08:22 +01:00
add custom scaler steps
This commit is contained in:
parent
f0a4ddddc6
commit
81d35796f8
1086
POET_Training.ipynb
1086
POET_Training.ipynb
File diff suppressed because one or more lines are too long
87
preprocessing.py
Normal file
87
preprocessing.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
import keras
|
||||||
|
print("Running Keras in version {}".format(keras.__version__))
|
||||||
|
|
||||||
|
import h5py
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import time
|
||||||
|
import sklearn.model_selection as sk
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
from imblearn.over_sampling import SMOTE
|
||||||
|
from imblearn.under_sampling import RandomUnderSampler
|
||||||
|
from imblearn.over_sampling import RandomOverSampler
|
||||||
|
from collections import Counter
|
||||||
|
import os
|
||||||
|
|
||||||
|
# preprocessing pipeline
|
||||||
|
#
|
||||||
|
|
||||||
|
def Safelog(val):
|
||||||
|
# get range of vector
|
||||||
|
if val > 0:
|
||||||
|
return np.log10(val)
|
||||||
|
elif val < 0:
|
||||||
|
return -np.log10(-val)
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def Safeexp(val):
|
||||||
|
if val > 0:
|
||||||
|
return -10 ** -val
|
||||||
|
elif val < 0:
|
||||||
|
return 10 ** val
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
class FuncTransform():
|
||||||
|
'''
|
||||||
|
Class to transform and inverse transform data with given functions.
|
||||||
|
Transform and inverse transform functions have to be given as dictionaries in the following format:
|
||||||
|
{'key1': function1, 'key2': function2, ...}
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, func_transform, func_inverse):
|
||||||
|
self.func_transform = func_transform
|
||||||
|
self.func_inverse = func_inverse
|
||||||
|
|
||||||
|
def fit(self, X):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def transform(self, X):
|
||||||
|
X = X.copy()
|
||||||
|
for key in X.keys():
|
||||||
|
if "Class" not in key:
|
||||||
|
X[key] = X[key].apply(self.func_transform[key])
|
||||||
|
return X
|
||||||
|
|
||||||
|
def fit_transform(self, X):
|
||||||
|
return self.fit(X).transform(X)
|
||||||
|
|
||||||
|
def inverse_transform(self, X_log):
|
||||||
|
X_log = X_log.copy()
|
||||||
|
for key in X_log.keys():
|
||||||
|
if "Class" not in key:
|
||||||
|
X_log[key] = X_log[key].apply(self.func_inverse[key])
|
||||||
|
return X_log
|
||||||
|
|
||||||
|
class DataSetSampling():
|
||||||
|
|
||||||
|
def __init__(self, X, y, sampling_strategy):
|
||||||
|
self.X = X
|
||||||
|
self.y = y
|
||||||
|
self.sampling_strategy = sampling_strategy
|
||||||
|
|
||||||
|
def fit(self, X):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def transform(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
|
class Scaling():
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user