From 79e50f47a27212767587b06cc0acaf349101ec50 Mon Sep 17 00:00:00 2001
From: Hannes Signer <signer@uni-potsdam.de>
Date: Fri, 28 Feb 2025 10:44:47 +0100
Subject: [PATCH] add batch normalization and self normalizing models

---
 src/POET_Training.ipynb | 108 +++++++---------------------------------
 src/preprocessing.py    |  49 ++++++++++++------
 2 files changed, 50 insertions(+), 107 deletions(-)

diff --git a/src/POET_Training.ipynb b/src/POET_Training.ipynb
index 5b9acd9..47d6b4b 100644
--- a/src/POET_Training.ipynb
+++ b/src/POET_Training.ipynb
@@ -59,8 +59,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2025-02-27 16:53:02.063625: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2025-02-27 16:53:02.082519: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2025-02-28 10:22:00.281793: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "2025-02-28 10:22:00.302002: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
      ]
     }
@@ -158,7 +158,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -180,20 +180,20 @@
     }
    ],
    "source": [
-    "preprocess = preprocessing(np.log1p, np.expm1) #np.log1p, np.expm1\n",
+    "preprocess = preprocessing() #np.log1p, np.expm1\n",
     "X, y = preprocess.cluster(df_design[species_columns], df_results[species_columns])\n",
     "\n",
     "# optional: perform log transformation\n",
-    "X, y = preprocess.funcTranform(X, y)\n",
+    "# X, y = preprocess.funcTranform(X, y)\n",
     "\n",
     "X_train, X_test, y_train, y_test = preprocess.split(X, y, ratio=0.2)\n",
     "X_train_origin = X_train.copy()\n",
     "X_train, y_train = preprocess.balancer(X_train, y_train, strategy=\"off\")\n",
     "X_train, y_train = preprocess.class_selection(X_train, y_train, class_label=0)\n",
-    "preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"minmax\")\n",
-    "X_train, X_test, y_train, y_test = preprocess.scale_transform(\n",
-    "    X_train, X_test, y_train, y_test\n",
-    ")\n",
+    "# preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"standard\")\n",
+    "# X_train, X_test, y_train, y_test = preprocess.scale_transform(\n",
+    "#     X_train, X_test, y_train, y_test\n",
+    "# )\n",
     "X_train, X_val, y_train, y_val = preprocess.split(X_train, y_train, ratio=0.1)"
    ]
   },
@@ -257,7 +257,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -275,7 +275,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -303,16 +303,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
     "# select model architecture\n",
-    "model = model_definition(\"large\")\n",
+    "model = model_definition(\"large_batch_normalization\")\n",
     "\n",
     "# define learning rate adaptation\n",
     "lr_schedule = keras.optimizers.schedules.ExponentialDecay(\n",
-    "    initial_learning_rate=0.001, decay_steps=2000, decay_rate=0.9, staircase=True\n",
+    "    initial_learning_rate=0.01, decay_steps=2000, decay_rate=0.9, staircase=True\n",
     ")\n",
     "\n",
     "# hyperparameters that are determined by hyperparameter optimization\n",
@@ -321,7 +321,7 @@
     "h3 = 0.5099528144902471\n",
     "\n",
     "\n",
-    "scaler_type = \"minmax\"\n",
+    "scaler_type = \"none\"\n",
     "loss_variant = \"huber_mass_balance\"\n",
     "delta = 1.7642791340966357\n",
     "\n",
@@ -347,83 +347,9 @@
      "output_type": "stream",
      "text": [
       "Epoch 1/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0302e-06 - loss: 9.1101e-05 - mass_balance: 1.7680e-04 - val_huber: 1.6816e-06 - val_loss: 8.5608e-05 - val_mass_balance: 1.6647e-04\n",
+      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 13ms/step - huber: 2.7347e-04 - loss: 0.0051 - mass_balance: 0.0098 - val_huber: 2.8895e-05 - val_loss: 0.0032 - val_mass_balance: 0.0062\n",
       "Epoch 2/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9164e-06 - loss: 9.1236e-05 - mass_balance: 1.7711e-04 - val_huber: 1.6744e-06 - val_loss: 8.8582e-05 - val_mass_balance: 1.7216e-04\n",
-      "Epoch 3/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.9028e-06 - loss: 8.8366e-05 - mass_balance: 1.7163e-04 - val_huber: 1.6736e-06 - val_loss: 8.9763e-05 - val_mass_balance: 1.7434e-04\n",
-      "Epoch 4/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9853e-06 - loss: 9.0045e-05 - mass_balance: 1.7478e-04 - val_huber: 1.6635e-06 - val_loss: 9.0544e-05 - val_mass_balance: 1.7591e-04\n",
-      "Epoch 5/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 21ms/step - huber: 1.9494e-06 - loss: 8.7276e-05 - mass_balance: 1.6948e-04 - val_huber: 1.6741e-06 - val_loss: 8.6483e-05 - val_mass_balance: 1.6809e-04\n",
-      "Epoch 6/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 1.7979e-06 - loss: 8.7661e-05 - mass_balance: 1.7025e-04 - val_huber: 1.6537e-06 - val_loss: 8.6392e-05 - val_mass_balance: 1.6783e-04\n",
-      "Epoch 7/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.0609e-06 - loss: 8.4980e-05 - mass_balance: 1.6501e-04 - val_huber: 1.6581e-06 - val_loss: 8.4510e-05 - val_mass_balance: 1.6422e-04\n",
-      "Epoch 8/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 2.1306e-06 - loss: 8.5293e-05 - mass_balance: 1.6556e-04 - val_huber: 1.6407e-06 - val_loss: 8.3087e-05 - val_mass_balance: 1.6164e-04\n",
-      "Epoch 9/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7904e-06 - loss: 8.4495e-05 - mass_balance: 1.6414e-04 - val_huber: 1.6346e-06 - val_loss: 8.2466e-05 - val_mass_balance: 1.6029e-04\n",
-      "Epoch 10/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.7857e-06 - loss: 8.3844e-05 - mass_balance: 1.6288e-04 - val_huber: 1.6314e-06 - val_loss: 8.4307e-05 - val_mass_balance: 1.6374e-04\n",
-      "Epoch 11/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 2.0917e-06 - loss: 8.3925e-05 - mass_balance: 1.6292e-04 - val_huber: 1.6253e-06 - val_loss: 8.5509e-05 - val_mass_balance: 1.6608e-04\n",
-      "Epoch 12/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.1661e-06 - loss: 8.2850e-05 - mass_balance: 1.6084e-04 - val_huber: 1.6364e-06 - val_loss: 8.2984e-05 - val_mass_balance: 1.6121e-04\n",
-      "Epoch 13/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.6631e-06 - loss: 8.3116e-05 - mass_balance: 1.6148e-04 - val_huber: 1.6294e-06 - val_loss: 8.0669e-05 - val_mass_balance: 1.5678e-04\n",
-      "Epoch 14/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 1.7664e-06 - loss: 8.1699e-05 - mass_balance: 1.5876e-04 - val_huber: 1.6272e-06 - val_loss: 8.0021e-05 - val_mass_balance: 1.5553e-04\n",
-      "Epoch 15/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8568e-06 - loss: 8.0615e-05 - mass_balance: 1.5659e-04 - val_huber: 1.6175e-06 - val_loss: 8.0275e-05 - val_mass_balance: 1.5602e-04\n",
-      "Epoch 16/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7830e-06 - loss: 8.0901e-05 - mass_balance: 1.5717e-04 - val_huber: 1.6209e-06 - val_loss: 7.8130e-05 - val_mass_balance: 1.5187e-04\n",
-      "Epoch 17/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7357e-06 - loss: 7.9596e-05 - mass_balance: 1.5464e-04 - val_huber: 1.6223e-06 - val_loss: 8.7514e-05 - val_mass_balance: 1.7020e-04\n",
-      "Epoch 18/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 1.8609e-06 - loss: 7.9730e-05 - mass_balance: 1.5490e-04 - val_huber: 1.6178e-06 - val_loss: 7.8086e-05 - val_mass_balance: 1.5175e-04\n",
-      "Epoch 19/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8063e-06 - loss: 7.8635e-05 - mass_balance: 1.5278e-04 - val_huber: 1.6157e-06 - val_loss: 7.7298e-05 - val_mass_balance: 1.5027e-04\n",
-      "Epoch 20/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 23ms/step - huber: 1.9740e-06 - loss: 7.8909e-05 - mass_balance: 1.5325e-04 - val_huber: 1.6102e-06 - val_loss: 7.8826e-05 - val_mass_balance: 1.5312e-04\n",
-      "Epoch 21/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 1.8796e-06 - loss: 7.7536e-05 - mass_balance: 1.5063e-04 - val_huber: 1.6175e-06 - val_loss: 7.9814e-05 - val_mass_balance: 1.5504e-04\n",
-      "Epoch 22/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.7153e-06 - loss: 7.7000e-05 - mass_balance: 1.4963e-04 - val_huber: 1.6125e-06 - val_loss: 7.5456e-05 - val_mass_balance: 1.4669e-04\n",
-      "Epoch 23/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 15ms/step - huber: 2.0066e-06 - loss: 7.7276e-05 - mass_balance: 1.5009e-04 - val_huber: 1.6136e-06 - val_loss: 7.6164e-05 - val_mass_balance: 1.4803e-04\n",
-      "Epoch 24/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 15ms/step - huber: 1.9103e-06 - loss: 7.6767e-05 - mass_balance: 1.4911e-04 - val_huber: 1.6051e-06 - val_loss: 7.6967e-05 - val_mass_balance: 1.4963e-04\n",
-      "Epoch 25/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 1.7270e-06 - loss: 7.5980e-05 - mass_balance: 1.4763e-04 - val_huber: 1.6134e-06 - val_loss: 7.4603e-05 - val_mass_balance: 1.4503e-04\n",
-      "Epoch 26/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.9137e-06 - loss: 7.5943e-05 - mass_balance: 1.4750e-04 - val_huber: 1.6108e-06 - val_loss: 7.6177e-05 - val_mass_balance: 1.4806e-04\n",
-      "Epoch 27/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 2.1447e-06 - loss: 7.5024e-05 - mass_balance: 1.4564e-04 - val_huber: 1.6123e-06 - val_loss: 7.5187e-05 - val_mass_balance: 1.4617e-04\n",
-      "Epoch 28/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 22ms/step - huber: 2.0587e-06 - loss: 7.4814e-05 - mass_balance: 1.4526e-04 - val_huber: 1.6096e-06 - val_loss: 7.4630e-05 - val_mass_balance: 1.4500e-04\n",
-      "Epoch 29/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8129e-06 - loss: 7.3951e-05 - mass_balance: 1.4368e-04 - val_huber: 1.6081e-06 - val_loss: 7.4177e-05 - val_mass_balance: 1.4413e-04\n",
-      "Epoch 30/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 2.0191e-06 - loss: 7.4011e-05 - mass_balance: 1.4372e-04 - val_huber: 1.6102e-06 - val_loss: 7.4224e-05 - val_mass_balance: 1.4424e-04\n",
-      "Epoch 31/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7857e-06 - loss: 7.3585e-05 - mass_balance: 1.4296e-04 - val_huber: 1.6118e-06 - val_loss: 7.3740e-05 - val_mass_balance: 1.4335e-04\n",
-      "Epoch 32/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9859e-06 - loss: 7.2959e-05 - mass_balance: 1.4168e-04 - val_huber: 1.6117e-06 - val_loss: 7.2533e-05 - val_mass_balance: 1.4098e-04\n",
-      "Epoch 33/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 25ms/step - huber: 1.9710e-06 - loss: 7.2820e-05 - mass_balance: 1.4141e-04 - val_huber: 1.6038e-06 - val_loss: 7.4178e-05 - val_mass_balance: 1.4417e-04\n",
-      "Epoch 34/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8785e-06 - loss: 7.2509e-05 - mass_balance: 1.4084e-04 - val_huber: 1.6048e-06 - val_loss: 7.2866e-05 - val_mass_balance: 1.4163e-04\n",
-      "Epoch 35/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 24ms/step - huber: 2.0480e-06 - loss: 7.2435e-05 - mass_balance: 1.4063e-04 - val_huber: 1.6072e-06 - val_loss: 7.1991e-05 - val_mass_balance: 1.3992e-04\n",
-      "Epoch 36/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0031e-06 - loss: 7.1794e-05 - mass_balance: 1.3939e-04 - val_huber: 1.6078e-06 - val_loss: 7.1452e-05 - val_mass_balance: 1.3888e-04\n",
-      "Epoch 37/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 16ms/step - huber: 1.9493e-06 - loss: 7.1358e-05 - mass_balance: 1.3857e-04 - val_huber: 1.6105e-06 - val_loss: 7.1551e-05 - val_mass_balance: 1.3904e-04\n",
-      "Epoch 38/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9237e-06 - loss: 7.1155e-05 - mass_balance: 1.3818e-04 - val_huber: 1.6065e-06 - val_loss: 7.1723e-05 - val_mass_balance: 1.3939e-04\n",
-      "Epoch 39/50\n",
-      "\u001b[1m556/886\u001b[0m \u001b[32m━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━━━\u001b[0m \u001b[1m5s\u001b[0m 16ms/step - huber: 1.9354e-06 - loss: 7.1366e-05 - mass_balance: 1.3857e-04"
+      "\u001b[1m647/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━\u001b[0m \u001b[1m3s\u001b[0m 13ms/step - huber: 3.2162e-04 - loss: 0.0052 - mass_balance: 0.0098"
      ]
     }
    ],
diff --git a/src/preprocessing.py b/src/preprocessing.py
index bfd5cf6..be783d6 100644
--- a/src/preprocessing.py
+++ b/src/preprocessing.py
@@ -1,5 +1,5 @@
 import keras
-from keras.layers import Dense, Dropout, Input, BatchNormalization, LeakyReLU
+from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU
 import tensorflow as tf
 import h5py
 import numpy as np
@@ -15,7 +15,6 @@ from imblearn.under_sampling import RandomUnderSampler
 from imblearn.over_sampling import RandomOverSampler
 from collections import Counter
 import os
-from preprocessing import *
 from sklearn import set_config
 from importlib import reload
 
@@ -62,22 +61,34 @@ def model_definition(architecture):
     elif architecture == "large_batch_normalization":
         model = keras.Sequential([
         keras.layers.Input(shape=(8,), dtype=dtype),
-        BatchNormalization(),  # Normalisierung der Eingabedaten
+        BatchNormalization(),
 
         Dense(512, dtype=dtype),
-        BatchNormalization(),  # Nach der Dense-Schicht für stabilere Verteilungen
         LeakyReLU(negative_slope=0.01),
+        # BatchNormalization(),
 
         Dense(1024, dtype=dtype),
-        BatchNormalization(),
         LeakyReLU(negative_slope=0.01),
+        # BatchNormalization(),
 
         Dense(512, dtype=dtype),
-        BatchNormalization(),
         LeakyReLU(negative_slope=0.01),
 
         Dense(8, dtype=dtype),
         ])
+        
+        
+    elif architecture == "large_self_normalization":
+        model = keras.Sequential([
+        keras.layers.Input(shape=(8,), dtype=dtype),
+        Dense(512, activation='selu', kernel_initializer='lecun_normal', dtype=dtype),
+        AlphaDropout(0.05),
+        Dense(1024, activation='selu', kernel_initializer='lecun_normal',dtype=dtype),
+        AlphaDropout(0.05),
+        Dense(512, activation='selu', kernel_initializer='lecun_normal',dtype=dtype),
+        AlphaDropout(0.05),
+        Dense(8, dtype=dtype),
+        ])
 
     elif architecture == "paper":
         model = keras.Sequential(
@@ -163,11 +174,6 @@ def custom_loss(
             mean_y = tf.convert_to_tensor(
                 preprocess.scaler_y.mean_, dtype=tf.float32)
 
-        else:
-            raise Exception(
-                "No valid scaler type found. Choose between 'standard' and 'minmax'."
-            )
-
     except AttributeError:
         raise Exception(
             "Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training."
@@ -183,6 +189,10 @@ def custom_loss(
         elif scaler_type == "standard":
             predicted_inverse = predicted * scale_y + mean_y
             results_inverse = results * scale_X + mean_X
+            
+        elif scaler_type == "none":
+            predicted_inverse = predicted
+            results_inverse = results
 
         # apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was used
         if preprocess.func_dict_out is not None:
@@ -273,6 +283,10 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
         elif scaler_type == "standard":
             predicted_inverse = predicted * scale_y + mean_y
             results_inverse = results * scale_X + mean_X
+            
+        elif scaler_type == "none":
+            predicted_inverse = predicted
+            results_inverse = results
 
         if preprocess.func_dict_out is not None:
             predicted_inverse = tf.math.expm1(predicted_inverse)
@@ -338,13 +352,16 @@ def mass_balance_evaluation(model, X, preprocess):
     classes.reset_index(drop=True, inplace=True)
     prediction = pd.DataFrame(model.predict(X[columns]), columns=columns)
     # backtransform min/max or standard scaler
-    X = pd.DataFrame(
+    
+    
+    if preprocess.scaler_X is None:
+        X = pd.DataFrame(
         preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
         columns=columns,
-    )
-    prediction = pd.DataFrame(
-        preprocess.scaler_y.inverse_transform(prediction), columns=columns
-    )
+        )
+        prediction = pd.DataFrame(
+            preprocess.scaler_y.inverse_transform(prediction), columns=columns
+        )      
 
     # apply backtransformation if log transformation was applied
     if preprocess.func_dict_out is not None: