add batch normalization and self normalizing models

2025-12-16 00:18:23 +01:00 · 2025-02-28 10:44:47 +01:00 · 2025-02-28 10:44:47 +01:00 · 79e50f47a2
commit 79e50f47a2
parent e9baf132b4
2 changed files with 50 additions and 107 deletions
--- a/src/POET_Training.ipynb
+++ b/src/POET_Training.ipynb
@ -59,8 +59,8 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "2025-02-27 16:53:02.063625: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "2025-02-28 10:22:00.281793: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2025-02-27 16:53:02.082519: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2025-02-28 10:22:00.302002: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
      "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
     ]
    }
@ -158,7 +158,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@ -180,20 +180,20 @@
    }
   ],
   "source": [
-    "preprocess = preprocessing(np.log1p, np.expm1) #np.log1p, np.expm1\n",
+    "preprocess = preprocessing() #np.log1p, np.expm1\n",
    "X, y = preprocess.cluster(df_design[species_columns], df_results[species_columns])\n",
    "\n",
    "# optional: perform log transformation\n",
-    "X, y = preprocess.funcTranform(X, y)\n",
+    "# X, y = preprocess.funcTranform(X, y)\n",
    "\n",
    "X_train, X_test, y_train, y_test = preprocess.split(X, y, ratio=0.2)\n",
    "X_train_origin = X_train.copy()\n",
    "X_train, y_train = preprocess.balancer(X_train, y_train, strategy=\"off\")\n",
    "X_train, y_train = preprocess.class_selection(X_train, y_train, class_label=0)\n",
-    "preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"minmax\")\n",
+    "# preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"standard\")\n",
-    "X_train, X_test, y_train, y_test = preprocess.scale_transform(\n",
+    "# X_train, X_test, y_train, y_test = preprocess.scale_transform(\n",
-    "    X_train, X_test, y_train, y_test\n",
+    "#     X_train, X_test, y_train, y_test\n",
-    ")\n",
+    "# )\n",
    "X_train, X_val, y_train, y_val = preprocess.split(X_train, y_train, ratio=0.1)"
   ]
  },
@ -257,7 +257,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@ -275,7 +275,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -303,16 +303,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# select model architecture\n",
-    "model = model_definition(\"large\")\n",
+    "model = model_definition(\"large_batch_normalization\")\n",
    "\n",
    "# define learning rate adaptation\n",
    "lr_schedule = keras.optimizers.schedules.ExponentialDecay(\n",
-    "    initial_learning_rate=0.001, decay_steps=2000, decay_rate=0.9, staircase=True\n",
+    "    initial_learning_rate=0.01, decay_steps=2000, decay_rate=0.9, staircase=True\n",
    ")\n",
    "\n",
    "# hyperparameters that are determined by hyperparameter optimization\n",
@ -321,7 +321,7 @@
    "h3 = 0.5099528144902471\n",
    "\n",
    "\n",
-    "scaler_type = \"minmax\"\n",
+    "scaler_type = \"none\"\n",
    "loss_variant = \"huber_mass_balance\"\n",
    "delta = 1.7642791340966357\n",
    "\n",
@ -347,83 +347,9 @@
     "output_type": "stream",
     "text": [
      "Epoch 1/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0302e-06 - loss: 9.1101e-05 - mass_balance: 1.7680e-04 - val_huber: 1.6816e-06 - val_loss: 8.5608e-05 - val_mass_balance: 1.6647e-04\n",
+      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 13ms/step - huber: 2.7347e-04 - loss: 0.0051 - mass_balance: 0.0098 - val_huber: 2.8895e-05 - val_loss: 0.0032 - val_mass_balance: 0.0062\n",
      "Epoch 2/50\n",
-      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9164e-06 - loss: 9.1236e-05 - mass_balance: 1.7711e-04 - val_huber: 1.6744e-06 - val_loss: 8.8582e-05 - val_mass_balance: 1.7216e-04\n",
+      "\u001b[1m647/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━\u001b[0m \u001b[1m3s\u001b[0m 13ms/step - huber: 3.2162e-04 - loss: 0.0052 - mass_balance: 0.0098"
      "Epoch 3/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.9028e-06 - loss: 8.8366e-05 - mass_balance: 1.7163e-04 - val_huber: 1.6736e-06 - val_loss: 8.9763e-05 - val_mass_balance: 1.7434e-04\n",
      "Epoch 4/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9853e-06 - loss: 9.0045e-05 - mass_balance: 1.7478e-04 - val_huber: 1.6635e-06 - val_loss: 9.0544e-05 - val_mass_balance: 1.7591e-04\n",
      "Epoch 5/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 21ms/step - huber: 1.9494e-06 - loss: 8.7276e-05 - mass_balance: 1.6948e-04 - val_huber: 1.6741e-06 - val_loss: 8.6483e-05 - val_mass_balance: 1.6809e-04\n",
      "Epoch 6/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 1.7979e-06 - loss: 8.7661e-05 - mass_balance: 1.7025e-04 - val_huber: 1.6537e-06 - val_loss: 8.6392e-05 - val_mass_balance: 1.6783e-04\n",
      "Epoch 7/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.0609e-06 - loss: 8.4980e-05 - mass_balance: 1.6501e-04 - val_huber: 1.6581e-06 - val_loss: 8.4510e-05 - val_mass_balance: 1.6422e-04\n",
      "Epoch 8/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 2.1306e-06 - loss: 8.5293e-05 - mass_balance: 1.6556e-04 - val_huber: 1.6407e-06 - val_loss: 8.3087e-05 - val_mass_balance: 1.6164e-04\n",
      "Epoch 9/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7904e-06 - loss: 8.4495e-05 - mass_balance: 1.6414e-04 - val_huber: 1.6346e-06 - val_loss: 8.2466e-05 - val_mass_balance: 1.6029e-04\n",
      "Epoch 10/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.7857e-06 - loss: 8.3844e-05 - mass_balance: 1.6288e-04 - val_huber: 1.6314e-06 - val_loss: 8.4307e-05 - val_mass_balance: 1.6374e-04\n",
      "Epoch 11/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 2.0917e-06 - loss: 8.3925e-05 - mass_balance: 1.6292e-04 - val_huber: 1.6253e-06 - val_loss: 8.5509e-05 - val_mass_balance: 1.6608e-04\n",
      "Epoch 12/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.1661e-06 - loss: 8.2850e-05 - mass_balance: 1.6084e-04 - val_huber: 1.6364e-06 - val_loss: 8.2984e-05 - val_mass_balance: 1.6121e-04\n",
      "Epoch 13/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.6631e-06 - loss: 8.3116e-05 - mass_balance: 1.6148e-04 - val_huber: 1.6294e-06 - val_loss: 8.0669e-05 - val_mass_balance: 1.5678e-04\n",
      "Epoch 14/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 1.7664e-06 - loss: 8.1699e-05 - mass_balance: 1.5876e-04 - val_huber: 1.6272e-06 - val_loss: 8.0021e-05 - val_mass_balance: 1.5553e-04\n",
      "Epoch 15/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8568e-06 - loss: 8.0615e-05 - mass_balance: 1.5659e-04 - val_huber: 1.6175e-06 - val_loss: 8.0275e-05 - val_mass_balance: 1.5602e-04\n",
      "Epoch 16/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7830e-06 - loss: 8.0901e-05 - mass_balance: 1.5717e-04 - val_huber: 1.6209e-06 - val_loss: 7.8130e-05 - val_mass_balance: 1.5187e-04\n",
      "Epoch 17/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7357e-06 - loss: 7.9596e-05 - mass_balance: 1.5464e-04 - val_huber: 1.6223e-06 - val_loss: 8.7514e-05 - val_mass_balance: 1.7020e-04\n",
      "Epoch 18/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 1.8609e-06 - loss: 7.9730e-05 - mass_balance: 1.5490e-04 - val_huber: 1.6178e-06 - val_loss: 7.8086e-05 - val_mass_balance: 1.5175e-04\n",
      "Epoch 19/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8063e-06 - loss: 7.8635e-05 - mass_balance: 1.5278e-04 - val_huber: 1.6157e-06 - val_loss: 7.7298e-05 - val_mass_balance: 1.5027e-04\n",
      "Epoch 20/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 23ms/step - huber: 1.9740e-06 - loss: 7.8909e-05 - mass_balance: 1.5325e-04 - val_huber: 1.6102e-06 - val_loss: 7.8826e-05 - val_mass_balance: 1.5312e-04\n",
      "Epoch 21/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 1.8796e-06 - loss: 7.7536e-05 - mass_balance: 1.5063e-04 - val_huber: 1.6175e-06 - val_loss: 7.9814e-05 - val_mass_balance: 1.5504e-04\n",
      "Epoch 22/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.7153e-06 - loss: 7.7000e-05 - mass_balance: 1.4963e-04 - val_huber: 1.6125e-06 - val_loss: 7.5456e-05 - val_mass_balance: 1.4669e-04\n",
      "Epoch 23/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 15ms/step - huber: 2.0066e-06 - loss: 7.7276e-05 - mass_balance: 1.5009e-04 - val_huber: 1.6136e-06 - val_loss: 7.6164e-05 - val_mass_balance: 1.4803e-04\n",
      "Epoch 24/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 15ms/step - huber: 1.9103e-06 - loss: 7.6767e-05 - mass_balance: 1.4911e-04 - val_huber: 1.6051e-06 - val_loss: 7.6967e-05 - val_mass_balance: 1.4963e-04\n",
      "Epoch 25/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 1.7270e-06 - loss: 7.5980e-05 - mass_balance: 1.4763e-04 - val_huber: 1.6134e-06 - val_loss: 7.4603e-05 - val_mass_balance: 1.4503e-04\n",
      "Epoch 26/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.9137e-06 - loss: 7.5943e-05 - mass_balance: 1.4750e-04 - val_huber: 1.6108e-06 - val_loss: 7.6177e-05 - val_mass_balance: 1.4806e-04\n",
      "Epoch 27/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 2.1447e-06 - loss: 7.5024e-05 - mass_balance: 1.4564e-04 - val_huber: 1.6123e-06 - val_loss: 7.5187e-05 - val_mass_balance: 1.4617e-04\n",
      "Epoch 28/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 22ms/step - huber: 2.0587e-06 - loss: 7.4814e-05 - mass_balance: 1.4526e-04 - val_huber: 1.6096e-06 - val_loss: 7.4630e-05 - val_mass_balance: 1.4500e-04\n",
      "Epoch 29/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8129e-06 - loss: 7.3951e-05 - mass_balance: 1.4368e-04 - val_huber: 1.6081e-06 - val_loss: 7.4177e-05 - val_mass_balance: 1.4413e-04\n",
      "Epoch 30/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 2.0191e-06 - loss: 7.4011e-05 - mass_balance: 1.4372e-04 - val_huber: 1.6102e-06 - val_loss: 7.4224e-05 - val_mass_balance: 1.4424e-04\n",
      "Epoch 31/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7857e-06 - loss: 7.3585e-05 - mass_balance: 1.4296e-04 - val_huber: 1.6118e-06 - val_loss: 7.3740e-05 - val_mass_balance: 1.4335e-04\n",
      "Epoch 32/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9859e-06 - loss: 7.2959e-05 - mass_balance: 1.4168e-04 - val_huber: 1.6117e-06 - val_loss: 7.2533e-05 - val_mass_balance: 1.4098e-04\n",
      "Epoch 33/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 25ms/step - huber: 1.9710e-06 - loss: 7.2820e-05 - mass_balance: 1.4141e-04 - val_huber: 1.6038e-06 - val_loss: 7.4178e-05 - val_mass_balance: 1.4417e-04\n",
      "Epoch 34/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8785e-06 - loss: 7.2509e-05 - mass_balance: 1.4084e-04 - val_huber: 1.6048e-06 - val_loss: 7.2866e-05 - val_mass_balance: 1.4163e-04\n",
      "Epoch 35/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 24ms/step - huber: 2.0480e-06 - loss: 7.2435e-05 - mass_balance: 1.4063e-04 - val_huber: 1.6072e-06 - val_loss: 7.1991e-05 - val_mass_balance: 1.3992e-04\n",
      "Epoch 36/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0031e-06 - loss: 7.1794e-05 - mass_balance: 1.3939e-04 - val_huber: 1.6078e-06 - val_loss: 7.1452e-05 - val_mass_balance: 1.3888e-04\n",
      "Epoch 37/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 16ms/step - huber: 1.9493e-06 - loss: 7.1358e-05 - mass_balance: 1.3857e-04 - val_huber: 1.6105e-06 - val_loss: 7.1551e-05 - val_mass_balance: 1.3904e-04\n",
      "Epoch 38/50\n",
      "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9237e-06 - loss: 7.1155e-05 - mass_balance: 1.3818e-04 - val_huber: 1.6065e-06 - val_loss: 7.1723e-05 - val_mass_balance: 1.3939e-04\n",
      "Epoch 39/50\n",
      "\u001b[1m556/886\u001b[0m \u001b[32m━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━━━\u001b[0m \u001b[1m5s\u001b[0m 16ms/step - huber: 1.9354e-06 - loss: 7.1366e-05 - mass_balance: 1.3857e-04"
     ]
    }
   ],
--- a/src/preprocessing.py
+++ b/src/preprocessing.py
@ -1,5 +1,5 @@
 import keras
-from keras.layers import Dense, Dropout, Input, BatchNormalization, LeakyReLU
+from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU
 import tensorflow as tf
 import h5py
 import numpy as np
@ -15,7 +15,6 @@ from imblearn.under_sampling import RandomUnderSampler
 from imblearn.over_sampling import RandomOverSampler
 from collections import Counter
 import os
 from preprocessing import *
 from sklearn import set_config
 from importlib import reload
@ -62,23 +61,35 @@ def model_definition(architecture):
    elif architecture == "large_batch_normalization":
        model = keras.Sequential([
        keras.layers.Input(shape=(8,), dtype=dtype),
-        BatchNormalization(),  # Normalisierung der Eingabedaten
+        BatchNormalization(),
        Dense(512, dtype=dtype),
        BatchNormalization(),  # Nach der Dense-Schicht für stabilere Verteilungen
        LeakyReLU(negative_slope=0.01),
        # BatchNormalization(),
        Dense(1024, dtype=dtype),
        BatchNormalization(),
        LeakyReLU(negative_slope=0.01),
        # BatchNormalization(),
        Dense(512, dtype=dtype),
        BatchNormalization(),
        LeakyReLU(negative_slope=0.01),
        Dense(8, dtype=dtype),
        ])
    elif architecture == "large_self_normalization":
        model = keras.Sequential([
        keras.layers.Input(shape=(8,), dtype=dtype),
        Dense(512, activation='selu', kernel_initializer='lecun_normal', dtype=dtype),
        AlphaDropout(0.05),
        Dense(1024, activation='selu', kernel_initializer='lecun_normal',dtype=dtype),
        AlphaDropout(0.05),
        Dense(512, activation='selu', kernel_initializer='lecun_normal',dtype=dtype),
        AlphaDropout(0.05),
        Dense(8, dtype=dtype),
        ])
    elif architecture == "paper":
        model = keras.Sequential(
            [
@ -163,11 +174,6 @@ def custom_loss(
            mean_y = tf.convert_to_tensor(
                preprocess.scaler_y.mean_, dtype=tf.float32)
        else:
            raise Exception(
                "No valid scaler type found. Choose between 'standard' and 'minmax'."
            )
    except AttributeError:
        raise Exception(
            "Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training."
@ -184,6 +190,10 @@ def custom_loss(
            predicted_inverse = predicted * scale_y + mean_y
            results_inverse = results * scale_X + mean_X
        elif scaler_type == "none":
            predicted_inverse = predicted
            results_inverse = results
        # apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was used
        if preprocess.func_dict_out is not None:
            predicted_inverse = tf.math.expm1(predicted_inverse)
@ -274,6 +284,10 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
            predicted_inverse = predicted * scale_y + mean_y
            results_inverse = results * scale_X + mean_X
        elif scaler_type == "none":
            predicted_inverse = predicted
            results_inverse = results
        if preprocess.func_dict_out is not None:
            predicted_inverse = tf.math.expm1(predicted_inverse)
            results_inverse = tf.math.expm1(results_inverse)
@ -338,6 +352,9 @@ def mass_balance_evaluation(model, X, preprocess):
    classes.reset_index(drop=True, inplace=True)
    prediction = pd.DataFrame(model.predict(X[columns]), columns=columns)
    # backtransform min/max or standard scaler
    if preprocess.scaler_X is None:
        X = pd.DataFrame(
        preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
        columns=columns,