add batch normalization and self normalizing models

This commit is contained in:
Hannes Signer 2025-02-28 10:44:47 +01:00
parent e9baf132b4
commit 79e50f47a2
2 changed files with 50 additions and 107 deletions

View File

@ -59,8 +59,8 @@
"name": "stderr", "name": "stderr",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"2025-02-27 16:53:02.063625: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2025-02-28 10:22:00.281793: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
"2025-02-27 16:53:02.082519: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "2025-02-28 10:22:00.302002: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
] ]
} }
@ -158,7 +158,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -180,20 +180,20 @@
} }
], ],
"source": [ "source": [
"preprocess = preprocessing(np.log1p, np.expm1) #np.log1p, np.expm1\n", "preprocess = preprocessing() #np.log1p, np.expm1\n",
"X, y = preprocess.cluster(df_design[species_columns], df_results[species_columns])\n", "X, y = preprocess.cluster(df_design[species_columns], df_results[species_columns])\n",
"\n", "\n",
"# optional: perform log transformation\n", "# optional: perform log transformation\n",
"X, y = preprocess.funcTranform(X, y)\n", "# X, y = preprocess.funcTranform(X, y)\n",
"\n", "\n",
"X_train, X_test, y_train, y_test = preprocess.split(X, y, ratio=0.2)\n", "X_train, X_test, y_train, y_test = preprocess.split(X, y, ratio=0.2)\n",
"X_train_origin = X_train.copy()\n", "X_train_origin = X_train.copy()\n",
"X_train, y_train = preprocess.balancer(X_train, y_train, strategy=\"off\")\n", "X_train, y_train = preprocess.balancer(X_train, y_train, strategy=\"off\")\n",
"X_train, y_train = preprocess.class_selection(X_train, y_train, class_label=0)\n", "X_train, y_train = preprocess.class_selection(X_train, y_train, class_label=0)\n",
"preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"minmax\")\n", "# preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"standard\")\n",
"X_train, X_test, y_train, y_test = preprocess.scale_transform(\n", "# X_train, X_test, y_train, y_test = preprocess.scale_transform(\n",
" X_train, X_test, y_train, y_test\n", "# X_train, X_test, y_train, y_test\n",
")\n", "# )\n",
"X_train, X_val, y_train, y_val = preprocess.split(X_train, y_train, ratio=0.1)" "X_train, X_val, y_train, y_val = preprocess.split(X_train, y_train, ratio=0.1)"
] ]
}, },
@ -257,7 +257,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -275,7 +275,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -303,16 +303,16 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# select model architecture\n", "# select model architecture\n",
"model = model_definition(\"large\")\n", "model = model_definition(\"large_batch_normalization\")\n",
"\n", "\n",
"# define learning rate adaptation\n", "# define learning rate adaptation\n",
"lr_schedule = keras.optimizers.schedules.ExponentialDecay(\n", "lr_schedule = keras.optimizers.schedules.ExponentialDecay(\n",
" initial_learning_rate=0.001, decay_steps=2000, decay_rate=0.9, staircase=True\n", " initial_learning_rate=0.01, decay_steps=2000, decay_rate=0.9, staircase=True\n",
")\n", ")\n",
"\n", "\n",
"# hyperparameters that are determined by hyperparameter optimization\n", "# hyperparameters that are determined by hyperparameter optimization\n",
@ -321,7 +321,7 @@
"h3 = 0.5099528144902471\n", "h3 = 0.5099528144902471\n",
"\n", "\n",
"\n", "\n",
"scaler_type = \"minmax\"\n", "scaler_type = \"none\"\n",
"loss_variant = \"huber_mass_balance\"\n", "loss_variant = \"huber_mass_balance\"\n",
"delta = 1.7642791340966357\n", "delta = 1.7642791340966357\n",
"\n", "\n",
@ -347,83 +347,9 @@
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Epoch 1/50\n", "Epoch 1/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0302e-06 - loss: 9.1101e-05 - mass_balance: 1.7680e-04 - val_huber: 1.6816e-06 - val_loss: 8.5608e-05 - val_mass_balance: 1.6647e-04\n", "\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 13ms/step - huber: 2.7347e-04 - loss: 0.0051 - mass_balance: 0.0098 - val_huber: 2.8895e-05 - val_loss: 0.0032 - val_mass_balance: 0.0062\n",
"Epoch 2/50\n", "Epoch 2/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9164e-06 - loss: 9.1236e-05 - mass_balance: 1.7711e-04 - val_huber: 1.6744e-06 - val_loss: 8.8582e-05 - val_mass_balance: 1.7216e-04\n", "\u001b[1m647/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━\u001b[0m \u001b[1m3s\u001b[0m 13ms/step - huber: 3.2162e-04 - loss: 0.0052 - mass_balance: 0.0098"
"Epoch 3/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.9028e-06 - loss: 8.8366e-05 - mass_balance: 1.7163e-04 - val_huber: 1.6736e-06 - val_loss: 8.9763e-05 - val_mass_balance: 1.7434e-04\n",
"Epoch 4/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9853e-06 - loss: 9.0045e-05 - mass_balance: 1.7478e-04 - val_huber: 1.6635e-06 - val_loss: 9.0544e-05 - val_mass_balance: 1.7591e-04\n",
"Epoch 5/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 21ms/step - huber: 1.9494e-06 - loss: 8.7276e-05 - mass_balance: 1.6948e-04 - val_huber: 1.6741e-06 - val_loss: 8.6483e-05 - val_mass_balance: 1.6809e-04\n",
"Epoch 6/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 1.7979e-06 - loss: 8.7661e-05 - mass_balance: 1.7025e-04 - val_huber: 1.6537e-06 - val_loss: 8.6392e-05 - val_mass_balance: 1.6783e-04\n",
"Epoch 7/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.0609e-06 - loss: 8.4980e-05 - mass_balance: 1.6501e-04 - val_huber: 1.6581e-06 - val_loss: 8.4510e-05 - val_mass_balance: 1.6422e-04\n",
"Epoch 8/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 2.1306e-06 - loss: 8.5293e-05 - mass_balance: 1.6556e-04 - val_huber: 1.6407e-06 - val_loss: 8.3087e-05 - val_mass_balance: 1.6164e-04\n",
"Epoch 9/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7904e-06 - loss: 8.4495e-05 - mass_balance: 1.6414e-04 - val_huber: 1.6346e-06 - val_loss: 8.2466e-05 - val_mass_balance: 1.6029e-04\n",
"Epoch 10/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.7857e-06 - loss: 8.3844e-05 - mass_balance: 1.6288e-04 - val_huber: 1.6314e-06 - val_loss: 8.4307e-05 - val_mass_balance: 1.6374e-04\n",
"Epoch 11/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 2.0917e-06 - loss: 8.3925e-05 - mass_balance: 1.6292e-04 - val_huber: 1.6253e-06 - val_loss: 8.5509e-05 - val_mass_balance: 1.6608e-04\n",
"Epoch 12/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.1661e-06 - loss: 8.2850e-05 - mass_balance: 1.6084e-04 - val_huber: 1.6364e-06 - val_loss: 8.2984e-05 - val_mass_balance: 1.6121e-04\n",
"Epoch 13/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.6631e-06 - loss: 8.3116e-05 - mass_balance: 1.6148e-04 - val_huber: 1.6294e-06 - val_loss: 8.0669e-05 - val_mass_balance: 1.5678e-04\n",
"Epoch 14/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 1.7664e-06 - loss: 8.1699e-05 - mass_balance: 1.5876e-04 - val_huber: 1.6272e-06 - val_loss: 8.0021e-05 - val_mass_balance: 1.5553e-04\n",
"Epoch 15/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8568e-06 - loss: 8.0615e-05 - mass_balance: 1.5659e-04 - val_huber: 1.6175e-06 - val_loss: 8.0275e-05 - val_mass_balance: 1.5602e-04\n",
"Epoch 16/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7830e-06 - loss: 8.0901e-05 - mass_balance: 1.5717e-04 - val_huber: 1.6209e-06 - val_loss: 7.8130e-05 - val_mass_balance: 1.5187e-04\n",
"Epoch 17/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7357e-06 - loss: 7.9596e-05 - mass_balance: 1.5464e-04 - val_huber: 1.6223e-06 - val_loss: 8.7514e-05 - val_mass_balance: 1.7020e-04\n",
"Epoch 18/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 1.8609e-06 - loss: 7.9730e-05 - mass_balance: 1.5490e-04 - val_huber: 1.6178e-06 - val_loss: 7.8086e-05 - val_mass_balance: 1.5175e-04\n",
"Epoch 19/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8063e-06 - loss: 7.8635e-05 - mass_balance: 1.5278e-04 - val_huber: 1.6157e-06 - val_loss: 7.7298e-05 - val_mass_balance: 1.5027e-04\n",
"Epoch 20/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 23ms/step - huber: 1.9740e-06 - loss: 7.8909e-05 - mass_balance: 1.5325e-04 - val_huber: 1.6102e-06 - val_loss: 7.8826e-05 - val_mass_balance: 1.5312e-04\n",
"Epoch 21/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 1.8796e-06 - loss: 7.7536e-05 - mass_balance: 1.5063e-04 - val_huber: 1.6175e-06 - val_loss: 7.9814e-05 - val_mass_balance: 1.5504e-04\n",
"Epoch 22/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.7153e-06 - loss: 7.7000e-05 - mass_balance: 1.4963e-04 - val_huber: 1.6125e-06 - val_loss: 7.5456e-05 - val_mass_balance: 1.4669e-04\n",
"Epoch 23/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 15ms/step - huber: 2.0066e-06 - loss: 7.7276e-05 - mass_balance: 1.5009e-04 - val_huber: 1.6136e-06 - val_loss: 7.6164e-05 - val_mass_balance: 1.4803e-04\n",
"Epoch 24/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 15ms/step - huber: 1.9103e-06 - loss: 7.6767e-05 - mass_balance: 1.4911e-04 - val_huber: 1.6051e-06 - val_loss: 7.6967e-05 - val_mass_balance: 1.4963e-04\n",
"Epoch 25/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 1.7270e-06 - loss: 7.5980e-05 - mass_balance: 1.4763e-04 - val_huber: 1.6134e-06 - val_loss: 7.4603e-05 - val_mass_balance: 1.4503e-04\n",
"Epoch 26/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.9137e-06 - loss: 7.5943e-05 - mass_balance: 1.4750e-04 - val_huber: 1.6108e-06 - val_loss: 7.6177e-05 - val_mass_balance: 1.4806e-04\n",
"Epoch 27/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 2.1447e-06 - loss: 7.5024e-05 - mass_balance: 1.4564e-04 - val_huber: 1.6123e-06 - val_loss: 7.5187e-05 - val_mass_balance: 1.4617e-04\n",
"Epoch 28/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 22ms/step - huber: 2.0587e-06 - loss: 7.4814e-05 - mass_balance: 1.4526e-04 - val_huber: 1.6096e-06 - val_loss: 7.4630e-05 - val_mass_balance: 1.4500e-04\n",
"Epoch 29/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8129e-06 - loss: 7.3951e-05 - mass_balance: 1.4368e-04 - val_huber: 1.6081e-06 - val_loss: 7.4177e-05 - val_mass_balance: 1.4413e-04\n",
"Epoch 30/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 2.0191e-06 - loss: 7.4011e-05 - mass_balance: 1.4372e-04 - val_huber: 1.6102e-06 - val_loss: 7.4224e-05 - val_mass_balance: 1.4424e-04\n",
"Epoch 31/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7857e-06 - loss: 7.3585e-05 - mass_balance: 1.4296e-04 - val_huber: 1.6118e-06 - val_loss: 7.3740e-05 - val_mass_balance: 1.4335e-04\n",
"Epoch 32/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9859e-06 - loss: 7.2959e-05 - mass_balance: 1.4168e-04 - val_huber: 1.6117e-06 - val_loss: 7.2533e-05 - val_mass_balance: 1.4098e-04\n",
"Epoch 33/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 25ms/step - huber: 1.9710e-06 - loss: 7.2820e-05 - mass_balance: 1.4141e-04 - val_huber: 1.6038e-06 - val_loss: 7.4178e-05 - val_mass_balance: 1.4417e-04\n",
"Epoch 34/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8785e-06 - loss: 7.2509e-05 - mass_balance: 1.4084e-04 - val_huber: 1.6048e-06 - val_loss: 7.2866e-05 - val_mass_balance: 1.4163e-04\n",
"Epoch 35/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 24ms/step - huber: 2.0480e-06 - loss: 7.2435e-05 - mass_balance: 1.4063e-04 - val_huber: 1.6072e-06 - val_loss: 7.1991e-05 - val_mass_balance: 1.3992e-04\n",
"Epoch 36/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0031e-06 - loss: 7.1794e-05 - mass_balance: 1.3939e-04 - val_huber: 1.6078e-06 - val_loss: 7.1452e-05 - val_mass_balance: 1.3888e-04\n",
"Epoch 37/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 16ms/step - huber: 1.9493e-06 - loss: 7.1358e-05 - mass_balance: 1.3857e-04 - val_huber: 1.6105e-06 - val_loss: 7.1551e-05 - val_mass_balance: 1.3904e-04\n",
"Epoch 38/50\n",
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9237e-06 - loss: 7.1155e-05 - mass_balance: 1.3818e-04 - val_huber: 1.6065e-06 - val_loss: 7.1723e-05 - val_mass_balance: 1.3939e-04\n",
"Epoch 39/50\n",
"\u001b[1m556/886\u001b[0m \u001b[32m━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━━━\u001b[0m \u001b[1m5s\u001b[0m 16ms/step - huber: 1.9354e-06 - loss: 7.1366e-05 - mass_balance: 1.3857e-04"
] ]
} }
], ],

View File

@ -1,5 +1,5 @@
import keras import keras
from keras.layers import Dense, Dropout, Input, BatchNormalization, LeakyReLU from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU
import tensorflow as tf import tensorflow as tf
import h5py import h5py
import numpy as np import numpy as np
@ -15,7 +15,6 @@ from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler from imblearn.over_sampling import RandomOverSampler
from collections import Counter from collections import Counter
import os import os
from preprocessing import *
from sklearn import set_config from sklearn import set_config
from importlib import reload from importlib import reload
@ -62,23 +61,35 @@ def model_definition(architecture):
elif architecture == "large_batch_normalization": elif architecture == "large_batch_normalization":
model = keras.Sequential([ model = keras.Sequential([
keras.layers.Input(shape=(8,), dtype=dtype), keras.layers.Input(shape=(8,), dtype=dtype),
BatchNormalization(), # Normalisierung der Eingabedaten BatchNormalization(),
Dense(512, dtype=dtype), Dense(512, dtype=dtype),
BatchNormalization(), # Nach der Dense-Schicht für stabilere Verteilungen
LeakyReLU(negative_slope=0.01), LeakyReLU(negative_slope=0.01),
# BatchNormalization(),
Dense(1024, dtype=dtype), Dense(1024, dtype=dtype),
BatchNormalization(),
LeakyReLU(negative_slope=0.01), LeakyReLU(negative_slope=0.01),
# BatchNormalization(),
Dense(512, dtype=dtype), Dense(512, dtype=dtype),
BatchNormalization(),
LeakyReLU(negative_slope=0.01), LeakyReLU(negative_slope=0.01),
Dense(8, dtype=dtype), Dense(8, dtype=dtype),
]) ])
elif architecture == "large_self_normalization":
model = keras.Sequential([
keras.layers.Input(shape=(8,), dtype=dtype),
Dense(512, activation='selu', kernel_initializer='lecun_normal', dtype=dtype),
AlphaDropout(0.05),
Dense(1024, activation='selu', kernel_initializer='lecun_normal',dtype=dtype),
AlphaDropout(0.05),
Dense(512, activation='selu', kernel_initializer='lecun_normal',dtype=dtype),
AlphaDropout(0.05),
Dense(8, dtype=dtype),
])
elif architecture == "paper": elif architecture == "paper":
model = keras.Sequential( model = keras.Sequential(
[ [
@ -163,11 +174,6 @@ def custom_loss(
mean_y = tf.convert_to_tensor( mean_y = tf.convert_to_tensor(
preprocess.scaler_y.mean_, dtype=tf.float32) preprocess.scaler_y.mean_, dtype=tf.float32)
else:
raise Exception(
"No valid scaler type found. Choose between 'standard' and 'minmax'."
)
except AttributeError: except AttributeError:
raise Exception( raise Exception(
"Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training." "Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training."
@ -184,6 +190,10 @@ def custom_loss(
predicted_inverse = predicted * scale_y + mean_y predicted_inverse = predicted * scale_y + mean_y
results_inverse = results * scale_X + mean_X results_inverse = results * scale_X + mean_X
elif scaler_type == "none":
predicted_inverse = predicted
results_inverse = results
# apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was used # apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was used
if preprocess.func_dict_out is not None: if preprocess.func_dict_out is not None:
predicted_inverse = tf.math.expm1(predicted_inverse) predicted_inverse = tf.math.expm1(predicted_inverse)
@ -274,6 +284,10 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
predicted_inverse = predicted * scale_y + mean_y predicted_inverse = predicted * scale_y + mean_y
results_inverse = results * scale_X + mean_X results_inverse = results * scale_X + mean_X
elif scaler_type == "none":
predicted_inverse = predicted
results_inverse = results
if preprocess.func_dict_out is not None: if preprocess.func_dict_out is not None:
predicted_inverse = tf.math.expm1(predicted_inverse) predicted_inverse = tf.math.expm1(predicted_inverse)
results_inverse = tf.math.expm1(results_inverse) results_inverse = tf.math.expm1(results_inverse)
@ -338,6 +352,9 @@ def mass_balance_evaluation(model, X, preprocess):
classes.reset_index(drop=True, inplace=True) classes.reset_index(drop=True, inplace=True)
prediction = pd.DataFrame(model.predict(X[columns]), columns=columns) prediction = pd.DataFrame(model.predict(X[columns]), columns=columns)
# backtransform min/max or standard scaler # backtransform min/max or standard scaler
if preprocess.scaler_X is None:
X = pd.DataFrame( X = pd.DataFrame(
preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]), preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
columns=columns, columns=columns,