From 79e50f47a27212767587b06cc0acaf349101ec50 Mon Sep 17 00:00:00 2001 From: Hannes Signer Date: Fri, 28 Feb 2025 10:44:47 +0100 Subject: [PATCH] add batch normalization and self normalizing models --- src/POET_Training.ipynb | 108 +++++++--------------------------------- src/preprocessing.py | 49 ++++++++++++------ 2 files changed, 50 insertions(+), 107 deletions(-) diff --git a/src/POET_Training.ipynb b/src/POET_Training.ipynb index 5b9acd9..47d6b4b 100644 --- a/src/POET_Training.ipynb +++ b/src/POET_Training.ipynb @@ -59,8 +59,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2025-02-27 16:53:02.063625: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", - "2025-02-27 16:53:02.082519: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", + "2025-02-28 10:22:00.281793: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", + "2025-02-28 10:22:00.302002: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" ] } @@ -158,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -180,20 +180,20 @@ } ], "source": [ - "preprocess = preprocessing(np.log1p, np.expm1) #np.log1p, np.expm1\n", + "preprocess = preprocessing() #np.log1p, np.expm1\n", "X, y = preprocess.cluster(df_design[species_columns], df_results[species_columns])\n", "\n", "# optional: perform log transformation\n", - "X, y = preprocess.funcTranform(X, y)\n", + "# X, y = preprocess.funcTranform(X, y)\n", "\n", "X_train, X_test, y_train, y_test = preprocess.split(X, y, ratio=0.2)\n", "X_train_origin = X_train.copy()\n", "X_train, y_train = preprocess.balancer(X_train, y_train, strategy=\"off\")\n", "X_train, y_train = preprocess.class_selection(X_train, y_train, class_label=0)\n", - "preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"minmax\")\n", - "X_train, X_test, y_train, y_test = preprocess.scale_transform(\n", - " X_train, X_test, y_train, y_test\n", - ")\n", + "# preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"standard\")\n", + "# X_train, X_test, y_train, y_test = preprocess.scale_transform(\n", + "# X_train, X_test, y_train, y_test\n", + "# )\n", "X_train, X_val, y_train, y_val = preprocess.split(X_train, y_train, ratio=0.1)" ] }, @@ -257,7 +257,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -275,7 +275,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -303,16 +303,16 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# select model architecture\n", - "model = model_definition(\"large\")\n", + "model = model_definition(\"large_batch_normalization\")\n", "\n", "# define learning rate adaptation\n", "lr_schedule = keras.optimizers.schedules.ExponentialDecay(\n", - " initial_learning_rate=0.001, decay_steps=2000, decay_rate=0.9, staircase=True\n", + " initial_learning_rate=0.01, decay_steps=2000, decay_rate=0.9, staircase=True\n", ")\n", "\n", "# hyperparameters that are determined by hyperparameter optimization\n", @@ -321,7 +321,7 @@ "h3 = 0.5099528144902471\n", "\n", "\n", - "scaler_type = \"minmax\"\n", + "scaler_type = \"none\"\n", "loss_variant = \"huber_mass_balance\"\n", "delta = 1.7642791340966357\n", "\n", @@ -347,83 +347,9 @@ "output_type": "stream", "text": [ "Epoch 1/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0302e-06 - loss: 9.1101e-05 - mass_balance: 1.7680e-04 - val_huber: 1.6816e-06 - val_loss: 8.5608e-05 - val_mass_balance: 1.6647e-04\n", + "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 13ms/step - huber: 2.7347e-04 - loss: 0.0051 - mass_balance: 0.0098 - val_huber: 2.8895e-05 - val_loss: 0.0032 - val_mass_balance: 0.0062\n", "Epoch 2/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9164e-06 - loss: 9.1236e-05 - mass_balance: 1.7711e-04 - val_huber: 1.6744e-06 - val_loss: 8.8582e-05 - val_mass_balance: 1.7216e-04\n", - "Epoch 3/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.9028e-06 - loss: 8.8366e-05 - mass_balance: 1.7163e-04 - val_huber: 1.6736e-06 - val_loss: 8.9763e-05 - val_mass_balance: 1.7434e-04\n", - "Epoch 4/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9853e-06 - loss: 9.0045e-05 - mass_balance: 1.7478e-04 - val_huber: 1.6635e-06 - val_loss: 9.0544e-05 - val_mass_balance: 1.7591e-04\n", - "Epoch 5/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 21ms/step - huber: 1.9494e-06 - loss: 8.7276e-05 - mass_balance: 1.6948e-04 - val_huber: 1.6741e-06 - val_loss: 8.6483e-05 - val_mass_balance: 1.6809e-04\n", - "Epoch 6/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 1.7979e-06 - loss: 8.7661e-05 - mass_balance: 1.7025e-04 - val_huber: 1.6537e-06 - val_loss: 8.6392e-05 - val_mass_balance: 1.6783e-04\n", - "Epoch 7/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.0609e-06 - loss: 8.4980e-05 - mass_balance: 1.6501e-04 - val_huber: 1.6581e-06 - val_loss: 8.4510e-05 - val_mass_balance: 1.6422e-04\n", - "Epoch 8/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 2.1306e-06 - loss: 8.5293e-05 - mass_balance: 1.6556e-04 - val_huber: 1.6407e-06 - val_loss: 8.3087e-05 - val_mass_balance: 1.6164e-04\n", - "Epoch 9/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7904e-06 - loss: 8.4495e-05 - mass_balance: 1.6414e-04 - val_huber: 1.6346e-06 - val_loss: 8.2466e-05 - val_mass_balance: 1.6029e-04\n", - "Epoch 10/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.7857e-06 - loss: 8.3844e-05 - mass_balance: 1.6288e-04 - val_huber: 1.6314e-06 - val_loss: 8.4307e-05 - val_mass_balance: 1.6374e-04\n", - "Epoch 11/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 2.0917e-06 - loss: 8.3925e-05 - mass_balance: 1.6292e-04 - val_huber: 1.6253e-06 - val_loss: 8.5509e-05 - val_mass_balance: 1.6608e-04\n", - "Epoch 12/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.1661e-06 - loss: 8.2850e-05 - mass_balance: 1.6084e-04 - val_huber: 1.6364e-06 - val_loss: 8.2984e-05 - val_mass_balance: 1.6121e-04\n", - "Epoch 13/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.6631e-06 - loss: 8.3116e-05 - mass_balance: 1.6148e-04 - val_huber: 1.6294e-06 - val_loss: 8.0669e-05 - val_mass_balance: 1.5678e-04\n", - "Epoch 14/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 1.7664e-06 - loss: 8.1699e-05 - mass_balance: 1.5876e-04 - val_huber: 1.6272e-06 - val_loss: 8.0021e-05 - val_mass_balance: 1.5553e-04\n", - "Epoch 15/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8568e-06 - loss: 8.0615e-05 - mass_balance: 1.5659e-04 - val_huber: 1.6175e-06 - val_loss: 8.0275e-05 - val_mass_balance: 1.5602e-04\n", - "Epoch 16/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7830e-06 - loss: 8.0901e-05 - mass_balance: 1.5717e-04 - val_huber: 1.6209e-06 - val_loss: 7.8130e-05 - val_mass_balance: 1.5187e-04\n", - "Epoch 17/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7357e-06 - loss: 7.9596e-05 - mass_balance: 1.5464e-04 - val_huber: 1.6223e-06 - val_loss: 8.7514e-05 - val_mass_balance: 1.7020e-04\n", - "Epoch 18/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 1.8609e-06 - loss: 7.9730e-05 - mass_balance: 1.5490e-04 - val_huber: 1.6178e-06 - val_loss: 7.8086e-05 - val_mass_balance: 1.5175e-04\n", - "Epoch 19/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8063e-06 - loss: 7.8635e-05 - mass_balance: 1.5278e-04 - val_huber: 1.6157e-06 - val_loss: 7.7298e-05 - val_mass_balance: 1.5027e-04\n", - "Epoch 20/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 23ms/step - huber: 1.9740e-06 - loss: 7.8909e-05 - mass_balance: 1.5325e-04 - val_huber: 1.6102e-06 - val_loss: 7.8826e-05 - val_mass_balance: 1.5312e-04\n", - "Epoch 21/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 1.8796e-06 - loss: 7.7536e-05 - mass_balance: 1.5063e-04 - val_huber: 1.6175e-06 - val_loss: 7.9814e-05 - val_mass_balance: 1.5504e-04\n", - "Epoch 22/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.7153e-06 - loss: 7.7000e-05 - mass_balance: 1.4963e-04 - val_huber: 1.6125e-06 - val_loss: 7.5456e-05 - val_mass_balance: 1.4669e-04\n", - "Epoch 23/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 15ms/step - huber: 2.0066e-06 - loss: 7.7276e-05 - mass_balance: 1.5009e-04 - val_huber: 1.6136e-06 - val_loss: 7.6164e-05 - val_mass_balance: 1.4803e-04\n", - "Epoch 24/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 15ms/step - huber: 1.9103e-06 - loss: 7.6767e-05 - mass_balance: 1.4911e-04 - val_huber: 1.6051e-06 - val_loss: 7.6967e-05 - val_mass_balance: 1.4963e-04\n", - "Epoch 25/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 1.7270e-06 - loss: 7.5980e-05 - mass_balance: 1.4763e-04 - val_huber: 1.6134e-06 - val_loss: 7.4603e-05 - val_mass_balance: 1.4503e-04\n", - "Epoch 26/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.9137e-06 - loss: 7.5943e-05 - mass_balance: 1.4750e-04 - val_huber: 1.6108e-06 - val_loss: 7.6177e-05 - val_mass_balance: 1.4806e-04\n", - "Epoch 27/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 2.1447e-06 - loss: 7.5024e-05 - mass_balance: 1.4564e-04 - val_huber: 1.6123e-06 - val_loss: 7.5187e-05 - val_mass_balance: 1.4617e-04\n", - "Epoch 28/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 22ms/step - huber: 2.0587e-06 - loss: 7.4814e-05 - mass_balance: 1.4526e-04 - val_huber: 1.6096e-06 - val_loss: 7.4630e-05 - val_mass_balance: 1.4500e-04\n", - "Epoch 29/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8129e-06 - loss: 7.3951e-05 - mass_balance: 1.4368e-04 - val_huber: 1.6081e-06 - val_loss: 7.4177e-05 - val_mass_balance: 1.4413e-04\n", - "Epoch 30/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 2.0191e-06 - loss: 7.4011e-05 - mass_balance: 1.4372e-04 - val_huber: 1.6102e-06 - val_loss: 7.4224e-05 - val_mass_balance: 1.4424e-04\n", - "Epoch 31/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7857e-06 - loss: 7.3585e-05 - mass_balance: 1.4296e-04 - val_huber: 1.6118e-06 - val_loss: 7.3740e-05 - val_mass_balance: 1.4335e-04\n", - "Epoch 32/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9859e-06 - loss: 7.2959e-05 - mass_balance: 1.4168e-04 - val_huber: 1.6117e-06 - val_loss: 7.2533e-05 - val_mass_balance: 1.4098e-04\n", - "Epoch 33/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 25ms/step - huber: 1.9710e-06 - loss: 7.2820e-05 - mass_balance: 1.4141e-04 - val_huber: 1.6038e-06 - val_loss: 7.4178e-05 - val_mass_balance: 1.4417e-04\n", - "Epoch 34/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8785e-06 - loss: 7.2509e-05 - mass_balance: 1.4084e-04 - val_huber: 1.6048e-06 - val_loss: 7.2866e-05 - val_mass_balance: 1.4163e-04\n", - "Epoch 35/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 24ms/step - huber: 2.0480e-06 - loss: 7.2435e-05 - mass_balance: 1.4063e-04 - val_huber: 1.6072e-06 - val_loss: 7.1991e-05 - val_mass_balance: 1.3992e-04\n", - "Epoch 36/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0031e-06 - loss: 7.1794e-05 - mass_balance: 1.3939e-04 - val_huber: 1.6078e-06 - val_loss: 7.1452e-05 - val_mass_balance: 1.3888e-04\n", - "Epoch 37/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 16ms/step - huber: 1.9493e-06 - loss: 7.1358e-05 - mass_balance: 1.3857e-04 - val_huber: 1.6105e-06 - val_loss: 7.1551e-05 - val_mass_balance: 1.3904e-04\n", - "Epoch 38/50\n", - "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9237e-06 - loss: 7.1155e-05 - mass_balance: 1.3818e-04 - val_huber: 1.6065e-06 - val_loss: 7.1723e-05 - val_mass_balance: 1.3939e-04\n", - "Epoch 39/50\n", - "\u001b[1m556/886\u001b[0m \u001b[32m━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━━━\u001b[0m \u001b[1m5s\u001b[0m 16ms/step - huber: 1.9354e-06 - loss: 7.1366e-05 - mass_balance: 1.3857e-04" + "\u001b[1m647/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━\u001b[0m \u001b[1m3s\u001b[0m 13ms/step - huber: 3.2162e-04 - loss: 0.0052 - mass_balance: 0.0098" ] } ], diff --git a/src/preprocessing.py b/src/preprocessing.py index bfd5cf6..be783d6 100644 --- a/src/preprocessing.py +++ b/src/preprocessing.py @@ -1,5 +1,5 @@ import keras -from keras.layers import Dense, Dropout, Input, BatchNormalization, LeakyReLU +from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU import tensorflow as tf import h5py import numpy as np @@ -15,7 +15,6 @@ from imblearn.under_sampling import RandomUnderSampler from imblearn.over_sampling import RandomOverSampler from collections import Counter import os -from preprocessing import * from sklearn import set_config from importlib import reload @@ -62,22 +61,34 @@ def model_definition(architecture): elif architecture == "large_batch_normalization": model = keras.Sequential([ keras.layers.Input(shape=(8,), dtype=dtype), - BatchNormalization(), # Normalisierung der Eingabedaten + BatchNormalization(), Dense(512, dtype=dtype), - BatchNormalization(), # Nach der Dense-Schicht für stabilere Verteilungen LeakyReLU(negative_slope=0.01), + # BatchNormalization(), Dense(1024, dtype=dtype), - BatchNormalization(), LeakyReLU(negative_slope=0.01), + # BatchNormalization(), Dense(512, dtype=dtype), - BatchNormalization(), LeakyReLU(negative_slope=0.01), Dense(8, dtype=dtype), ]) + + + elif architecture == "large_self_normalization": + model = keras.Sequential([ + keras.layers.Input(shape=(8,), dtype=dtype), + Dense(512, activation='selu', kernel_initializer='lecun_normal', dtype=dtype), + AlphaDropout(0.05), + Dense(1024, activation='selu', kernel_initializer='lecun_normal',dtype=dtype), + AlphaDropout(0.05), + Dense(512, activation='selu', kernel_initializer='lecun_normal',dtype=dtype), + AlphaDropout(0.05), + Dense(8, dtype=dtype), + ]) elif architecture == "paper": model = keras.Sequential( @@ -163,11 +174,6 @@ def custom_loss( mean_y = tf.convert_to_tensor( preprocess.scaler_y.mean_, dtype=tf.float32) - else: - raise Exception( - "No valid scaler type found. Choose between 'standard' and 'minmax'." - ) - except AttributeError: raise Exception( "Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training." @@ -183,6 +189,10 @@ def custom_loss( elif scaler_type == "standard": predicted_inverse = predicted * scale_y + mean_y results_inverse = results * scale_X + mean_X + + elif scaler_type == "none": + predicted_inverse = predicted + results_inverse = results # apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was used if preprocess.func_dict_out is not None: @@ -273,6 +283,10 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"): elif scaler_type == "standard": predicted_inverse = predicted * scale_y + mean_y results_inverse = results * scale_X + mean_X + + elif scaler_type == "none": + predicted_inverse = predicted + results_inverse = results if preprocess.func_dict_out is not None: predicted_inverse = tf.math.expm1(predicted_inverse) @@ -338,13 +352,16 @@ def mass_balance_evaluation(model, X, preprocess): classes.reset_index(drop=True, inplace=True) prediction = pd.DataFrame(model.predict(X[columns]), columns=columns) # backtransform min/max or standard scaler - X = pd.DataFrame( + + + if preprocess.scaler_X is None: + X = pd.DataFrame( preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]), columns=columns, - ) - prediction = pd.DataFrame( - preprocess.scaler_y.inverse_transform(prediction), columns=columns - ) + ) + prediction = pd.DataFrame( + preprocess.scaler_y.inverse_transform(prediction), columns=columns + ) # apply backtransformation if log transformation was applied if preprocess.func_dict_out is not None: