mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-13 08:48:22 +01:00
add batch normalization and self normalizing models
This commit is contained in:
parent
e9baf132b4
commit
79e50f47a2
@ -59,8 +59,8 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-02-27 16:53:02.063625: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
||||
"2025-02-27 16:53:02.082519: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
||||
"2025-02-28 10:22:00.281793: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
||||
"2025-02-28 10:22:00.302002: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
||||
"To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
|
||||
]
|
||||
}
|
||||
@ -158,7 +158,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -180,20 +180,20 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"preprocess = preprocessing(np.log1p, np.expm1) #np.log1p, np.expm1\n",
|
||||
"preprocess = preprocessing() #np.log1p, np.expm1\n",
|
||||
"X, y = preprocess.cluster(df_design[species_columns], df_results[species_columns])\n",
|
||||
"\n",
|
||||
"# optional: perform log transformation\n",
|
||||
"X, y = preprocess.funcTranform(X, y)\n",
|
||||
"# X, y = preprocess.funcTranform(X, y)\n",
|
||||
"\n",
|
||||
"X_train, X_test, y_train, y_test = preprocess.split(X, y, ratio=0.2)\n",
|
||||
"X_train_origin = X_train.copy()\n",
|
||||
"X_train, y_train = preprocess.balancer(X_train, y_train, strategy=\"off\")\n",
|
||||
"X_train, y_train = preprocess.class_selection(X_train, y_train, class_label=0)\n",
|
||||
"preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"minmax\")\n",
|
||||
"X_train, X_test, y_train, y_test = preprocess.scale_transform(\n",
|
||||
" X_train, X_test, y_train, y_test\n",
|
||||
")\n",
|
||||
"# preprocess.scale_fit(X_train, y_train, scaling=\"global\", type=\"standard\")\n",
|
||||
"# X_train, X_test, y_train, y_test = preprocess.scale_transform(\n",
|
||||
"# X_train, X_test, y_train, y_test\n",
|
||||
"# )\n",
|
||||
"X_train, X_val, y_train, y_val = preprocess.split(X_train, y_train, ratio=0.1)"
|
||||
]
|
||||
},
|
||||
@ -257,7 +257,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -275,7 +275,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -303,16 +303,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# select model architecture\n",
|
||||
"model = model_definition(\"large\")\n",
|
||||
"model = model_definition(\"large_batch_normalization\")\n",
|
||||
"\n",
|
||||
"# define learning rate adaptation\n",
|
||||
"lr_schedule = keras.optimizers.schedules.ExponentialDecay(\n",
|
||||
" initial_learning_rate=0.001, decay_steps=2000, decay_rate=0.9, staircase=True\n",
|
||||
" initial_learning_rate=0.01, decay_steps=2000, decay_rate=0.9, staircase=True\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# hyperparameters that are determined by hyperparameter optimization\n",
|
||||
@ -321,7 +321,7 @@
|
||||
"h3 = 0.5099528144902471\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"scaler_type = \"minmax\"\n",
|
||||
"scaler_type = \"none\"\n",
|
||||
"loss_variant = \"huber_mass_balance\"\n",
|
||||
"delta = 1.7642791340966357\n",
|
||||
"\n",
|
||||
@ -347,83 +347,9 @@
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 1/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0302e-06 - loss: 9.1101e-05 - mass_balance: 1.7680e-04 - val_huber: 1.6816e-06 - val_loss: 8.5608e-05 - val_mass_balance: 1.6647e-04\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 13ms/step - huber: 2.7347e-04 - loss: 0.0051 - mass_balance: 0.0098 - val_huber: 2.8895e-05 - val_loss: 0.0032 - val_mass_balance: 0.0062\n",
|
||||
"Epoch 2/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9164e-06 - loss: 9.1236e-05 - mass_balance: 1.7711e-04 - val_huber: 1.6744e-06 - val_loss: 8.8582e-05 - val_mass_balance: 1.7216e-04\n",
|
||||
"Epoch 3/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.9028e-06 - loss: 8.8366e-05 - mass_balance: 1.7163e-04 - val_huber: 1.6736e-06 - val_loss: 8.9763e-05 - val_mass_balance: 1.7434e-04\n",
|
||||
"Epoch 4/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9853e-06 - loss: 9.0045e-05 - mass_balance: 1.7478e-04 - val_huber: 1.6635e-06 - val_loss: 9.0544e-05 - val_mass_balance: 1.7591e-04\n",
|
||||
"Epoch 5/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 21ms/step - huber: 1.9494e-06 - loss: 8.7276e-05 - mass_balance: 1.6948e-04 - val_huber: 1.6741e-06 - val_loss: 8.6483e-05 - val_mass_balance: 1.6809e-04\n",
|
||||
"Epoch 6/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 1.7979e-06 - loss: 8.7661e-05 - mass_balance: 1.7025e-04 - val_huber: 1.6537e-06 - val_loss: 8.6392e-05 - val_mass_balance: 1.6783e-04\n",
|
||||
"Epoch 7/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.0609e-06 - loss: 8.4980e-05 - mass_balance: 1.6501e-04 - val_huber: 1.6581e-06 - val_loss: 8.4510e-05 - val_mass_balance: 1.6422e-04\n",
|
||||
"Epoch 8/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 2.1306e-06 - loss: 8.5293e-05 - mass_balance: 1.6556e-04 - val_huber: 1.6407e-06 - val_loss: 8.3087e-05 - val_mass_balance: 1.6164e-04\n",
|
||||
"Epoch 9/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7904e-06 - loss: 8.4495e-05 - mass_balance: 1.6414e-04 - val_huber: 1.6346e-06 - val_loss: 8.2466e-05 - val_mass_balance: 1.6029e-04\n",
|
||||
"Epoch 10/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 13ms/step - huber: 1.7857e-06 - loss: 8.3844e-05 - mass_balance: 1.6288e-04 - val_huber: 1.6314e-06 - val_loss: 8.4307e-05 - val_mass_balance: 1.6374e-04\n",
|
||||
"Epoch 11/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 2.0917e-06 - loss: 8.3925e-05 - mass_balance: 1.6292e-04 - val_huber: 1.6253e-06 - val_loss: 8.5509e-05 - val_mass_balance: 1.6608e-04\n",
|
||||
"Epoch 12/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 2.1661e-06 - loss: 8.2850e-05 - mass_balance: 1.6084e-04 - val_huber: 1.6364e-06 - val_loss: 8.2984e-05 - val_mass_balance: 1.6121e-04\n",
|
||||
"Epoch 13/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.6631e-06 - loss: 8.3116e-05 - mass_balance: 1.6148e-04 - val_huber: 1.6294e-06 - val_loss: 8.0669e-05 - val_mass_balance: 1.5678e-04\n",
|
||||
"Epoch 14/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 1.7664e-06 - loss: 8.1699e-05 - mass_balance: 1.5876e-04 - val_huber: 1.6272e-06 - val_loss: 8.0021e-05 - val_mass_balance: 1.5553e-04\n",
|
||||
"Epoch 15/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8568e-06 - loss: 8.0615e-05 - mass_balance: 1.5659e-04 - val_huber: 1.6175e-06 - val_loss: 8.0275e-05 - val_mass_balance: 1.5602e-04\n",
|
||||
"Epoch 16/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7830e-06 - loss: 8.0901e-05 - mass_balance: 1.5717e-04 - val_huber: 1.6209e-06 - val_loss: 7.8130e-05 - val_mass_balance: 1.5187e-04\n",
|
||||
"Epoch 17/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7357e-06 - loss: 7.9596e-05 - mass_balance: 1.5464e-04 - val_huber: 1.6223e-06 - val_loss: 8.7514e-05 - val_mass_balance: 1.7020e-04\n",
|
||||
"Epoch 18/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m18s\u001b[0m 20ms/step - huber: 1.8609e-06 - loss: 7.9730e-05 - mass_balance: 1.5490e-04 - val_huber: 1.6178e-06 - val_loss: 7.8086e-05 - val_mass_balance: 1.5175e-04\n",
|
||||
"Epoch 19/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8063e-06 - loss: 7.8635e-05 - mass_balance: 1.5278e-04 - val_huber: 1.6157e-06 - val_loss: 7.7298e-05 - val_mass_balance: 1.5027e-04\n",
|
||||
"Epoch 20/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 23ms/step - huber: 1.9740e-06 - loss: 7.8909e-05 - mass_balance: 1.5325e-04 - val_huber: 1.6102e-06 - val_loss: 7.8826e-05 - val_mass_balance: 1.5312e-04\n",
|
||||
"Epoch 21/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 23ms/step - huber: 1.8796e-06 - loss: 7.7536e-05 - mass_balance: 1.5063e-04 - val_huber: 1.6175e-06 - val_loss: 7.9814e-05 - val_mass_balance: 1.5504e-04\n",
|
||||
"Epoch 22/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.7153e-06 - loss: 7.7000e-05 - mass_balance: 1.4963e-04 - val_huber: 1.6125e-06 - val_loss: 7.5456e-05 - val_mass_balance: 1.4669e-04\n",
|
||||
"Epoch 23/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 15ms/step - huber: 2.0066e-06 - loss: 7.7276e-05 - mass_balance: 1.5009e-04 - val_huber: 1.6136e-06 - val_loss: 7.6164e-05 - val_mass_balance: 1.4803e-04\n",
|
||||
"Epoch 24/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m14s\u001b[0m 15ms/step - huber: 1.9103e-06 - loss: 7.6767e-05 - mass_balance: 1.4911e-04 - val_huber: 1.6051e-06 - val_loss: 7.6967e-05 - val_mass_balance: 1.4963e-04\n",
|
||||
"Epoch 25/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 19ms/step - huber: 1.7270e-06 - loss: 7.5980e-05 - mass_balance: 1.4763e-04 - val_huber: 1.6134e-06 - val_loss: 7.4603e-05 - val_mass_balance: 1.4503e-04\n",
|
||||
"Epoch 26/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m21s\u001b[0m 24ms/step - huber: 1.9137e-06 - loss: 7.5943e-05 - mass_balance: 1.4750e-04 - val_huber: 1.6108e-06 - val_loss: 7.6177e-05 - val_mass_balance: 1.4806e-04\n",
|
||||
"Epoch 27/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m20s\u001b[0m 22ms/step - huber: 2.1447e-06 - loss: 7.5024e-05 - mass_balance: 1.4564e-04 - val_huber: 1.6123e-06 - val_loss: 7.5187e-05 - val_mass_balance: 1.4617e-04\n",
|
||||
"Epoch 28/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 22ms/step - huber: 2.0587e-06 - loss: 7.4814e-05 - mass_balance: 1.4526e-04 - val_huber: 1.6096e-06 - val_loss: 7.4630e-05 - val_mass_balance: 1.4500e-04\n",
|
||||
"Epoch 29/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.8129e-06 - loss: 7.3951e-05 - mass_balance: 1.4368e-04 - val_huber: 1.6081e-06 - val_loss: 7.4177e-05 - val_mass_balance: 1.4413e-04\n",
|
||||
"Epoch 30/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 2.0191e-06 - loss: 7.4011e-05 - mass_balance: 1.4372e-04 - val_huber: 1.6102e-06 - val_loss: 7.4224e-05 - val_mass_balance: 1.4424e-04\n",
|
||||
"Epoch 31/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m12s\u001b[0m 14ms/step - huber: 1.7857e-06 - loss: 7.3585e-05 - mass_balance: 1.4296e-04 - val_huber: 1.6118e-06 - val_loss: 7.3740e-05 - val_mass_balance: 1.4335e-04\n",
|
||||
"Epoch 32/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 17ms/step - huber: 1.9859e-06 - loss: 7.2959e-05 - mass_balance: 1.4168e-04 - val_huber: 1.6117e-06 - val_loss: 7.2533e-05 - val_mass_balance: 1.4098e-04\n",
|
||||
"Epoch 33/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 25ms/step - huber: 1.9710e-06 - loss: 7.2820e-05 - mass_balance: 1.4141e-04 - val_huber: 1.6038e-06 - val_loss: 7.4178e-05 - val_mass_balance: 1.4417e-04\n",
|
||||
"Epoch 34/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m24s\u001b[0m 27ms/step - huber: 1.8785e-06 - loss: 7.2509e-05 - mass_balance: 1.4084e-04 - val_huber: 1.6048e-06 - val_loss: 7.2866e-05 - val_mass_balance: 1.4163e-04\n",
|
||||
"Epoch 35/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m22s\u001b[0m 24ms/step - huber: 2.0480e-06 - loss: 7.2435e-05 - mass_balance: 1.4063e-04 - val_huber: 1.6072e-06 - val_loss: 7.1991e-05 - val_mass_balance: 1.3992e-04\n",
|
||||
"Epoch 36/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m19s\u001b[0m 21ms/step - huber: 2.0031e-06 - loss: 7.1794e-05 - mass_balance: 1.3939e-04 - val_huber: 1.6078e-06 - val_loss: 7.1452e-05 - val_mass_balance: 1.3888e-04\n",
|
||||
"Epoch 37/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m15s\u001b[0m 16ms/step - huber: 1.9493e-06 - loss: 7.1358e-05 - mass_balance: 1.3857e-04 - val_huber: 1.6105e-06 - val_loss: 7.1551e-05 - val_mass_balance: 1.3904e-04\n",
|
||||
"Epoch 38/50\n",
|
||||
"\u001b[1m886/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 14ms/step - huber: 1.9237e-06 - loss: 7.1155e-05 - mass_balance: 1.3818e-04 - val_huber: 1.6065e-06 - val_loss: 7.1723e-05 - val_mass_balance: 1.3939e-04\n",
|
||||
"Epoch 39/50\n",
|
||||
"\u001b[1m556/886\u001b[0m \u001b[32m━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━━━\u001b[0m \u001b[1m5s\u001b[0m 16ms/step - huber: 1.9354e-06 - loss: 7.1366e-05 - mass_balance: 1.3857e-04"
|
||||
"\u001b[1m647/886\u001b[0m \u001b[32m━━━━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━\u001b[0m \u001b[1m3s\u001b[0m 13ms/step - huber: 3.2162e-04 - loss: 0.0052 - mass_balance: 0.0098"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import keras
|
||||
from keras.layers import Dense, Dropout, Input, BatchNormalization, LeakyReLU
|
||||
from keras.layers import Dense, AlphaDropout, Dropout, Input, BatchNormalization, LeakyReLU
|
||||
import tensorflow as tf
|
||||
import h5py
|
||||
import numpy as np
|
||||
@ -15,7 +15,6 @@ from imblearn.under_sampling import RandomUnderSampler
|
||||
from imblearn.over_sampling import RandomOverSampler
|
||||
from collections import Counter
|
||||
import os
|
||||
from preprocessing import *
|
||||
from sklearn import set_config
|
||||
from importlib import reload
|
||||
|
||||
@ -62,22 +61,34 @@ def model_definition(architecture):
|
||||
elif architecture == "large_batch_normalization":
|
||||
model = keras.Sequential([
|
||||
keras.layers.Input(shape=(8,), dtype=dtype),
|
||||
BatchNormalization(), # Normalisierung der Eingabedaten
|
||||
BatchNormalization(),
|
||||
|
||||
Dense(512, dtype=dtype),
|
||||
BatchNormalization(), # Nach der Dense-Schicht für stabilere Verteilungen
|
||||
LeakyReLU(negative_slope=0.01),
|
||||
# BatchNormalization(),
|
||||
|
||||
Dense(1024, dtype=dtype),
|
||||
BatchNormalization(),
|
||||
LeakyReLU(negative_slope=0.01),
|
||||
# BatchNormalization(),
|
||||
|
||||
Dense(512, dtype=dtype),
|
||||
BatchNormalization(),
|
||||
LeakyReLU(negative_slope=0.01),
|
||||
|
||||
Dense(8, dtype=dtype),
|
||||
])
|
||||
|
||||
|
||||
elif architecture == "large_self_normalization":
|
||||
model = keras.Sequential([
|
||||
keras.layers.Input(shape=(8,), dtype=dtype),
|
||||
Dense(512, activation='selu', kernel_initializer='lecun_normal', dtype=dtype),
|
||||
AlphaDropout(0.05),
|
||||
Dense(1024, activation='selu', kernel_initializer='lecun_normal',dtype=dtype),
|
||||
AlphaDropout(0.05),
|
||||
Dense(512, activation='selu', kernel_initializer='lecun_normal',dtype=dtype),
|
||||
AlphaDropout(0.05),
|
||||
Dense(8, dtype=dtype),
|
||||
])
|
||||
|
||||
elif architecture == "paper":
|
||||
model = keras.Sequential(
|
||||
@ -163,11 +174,6 @@ def custom_loss(
|
||||
mean_y = tf.convert_to_tensor(
|
||||
preprocess.scaler_y.mean_, dtype=tf.float32)
|
||||
|
||||
else:
|
||||
raise Exception(
|
||||
"No valid scaler type found. Choose between 'standard' and 'minmax'."
|
||||
)
|
||||
|
||||
except AttributeError:
|
||||
raise Exception(
|
||||
"Data normalized with scaler different than specified for the training. Compare the scaling approach on preprocessing and training."
|
||||
@ -183,6 +189,10 @@ def custom_loss(
|
||||
elif scaler_type == "standard":
|
||||
predicted_inverse = predicted * scale_y + mean_y
|
||||
results_inverse = results * scale_X + mean_X
|
||||
|
||||
elif scaler_type == "none":
|
||||
predicted_inverse = predicted
|
||||
results_inverse = results
|
||||
|
||||
# apply exp1m on the columns of predicted_inverse and results_inverse if log transformation was used
|
||||
if preprocess.func_dict_out is not None:
|
||||
@ -273,6 +283,10 @@ def mass_balance_metric(preprocess, column_dict, scaler_type="minmax"):
|
||||
elif scaler_type == "standard":
|
||||
predicted_inverse = predicted * scale_y + mean_y
|
||||
results_inverse = results * scale_X + mean_X
|
||||
|
||||
elif scaler_type == "none":
|
||||
predicted_inverse = predicted
|
||||
results_inverse = results
|
||||
|
||||
if preprocess.func_dict_out is not None:
|
||||
predicted_inverse = tf.math.expm1(predicted_inverse)
|
||||
@ -338,13 +352,16 @@ def mass_balance_evaluation(model, X, preprocess):
|
||||
classes.reset_index(drop=True, inplace=True)
|
||||
prediction = pd.DataFrame(model.predict(X[columns]), columns=columns)
|
||||
# backtransform min/max or standard scaler
|
||||
X = pd.DataFrame(
|
||||
|
||||
|
||||
if preprocess.scaler_X is None:
|
||||
X = pd.DataFrame(
|
||||
preprocess.scaler_X.inverse_transform(X.iloc[:, X.columns != "Class"]),
|
||||
columns=columns,
|
||||
)
|
||||
prediction = pd.DataFrame(
|
||||
preprocess.scaler_y.inverse_transform(prediction), columns=columns
|
||||
)
|
||||
)
|
||||
prediction = pd.DataFrame(
|
||||
preprocess.scaler_y.inverse_transform(prediction), columns=columns
|
||||
)
|
||||
|
||||
# apply backtransformation if log transformation was applied
|
||||
if preprocess.func_dict_out is not None:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user