mirror of
https://git.gfz-potsdam.de/naaice/model-training.git
synced 2025-12-13 10:38:22 +01:00
adapt training
This commit is contained in:
parent
09a5687580
commit
04f5c40b29
@ -52,16 +52,29 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"os.chdir(\"./projects/model-training/src/\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2025-03-27 14:56:10.374756: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
||||
"2025-03-27 14:56:10.395067: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
||||
"To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
|
||||
"2025-03-28 14:24:13.743271: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
|
||||
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
||||
"E0000 00:00:1743168253.897216 16215 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
|
||||
"E0000 00:00:1743168253.941693 16215 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
|
||||
"2025-03-28 14:24:14.439356: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
|
||||
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -89,22 +102,25 @@
|
||||
"- `Barite_50_Data_training.h5`: Simulates a total of 1000 iterations, with barium being abandoned at one corner. The data set also contains all 1000 iterations\n",
|
||||
"- `barite_50_4_corner.h5`: Simulates a total of 5000 iterations, with barium being abandoned at all 4 corners. Contains every 20th iteration.\n",
|
||||
"- `barite_50_4_corner_20k.h5`: Simulates a total of 20000 iterations, whereby barium is abandoned at all 4 corners and also contains all 20000 iterations (not in the LFS, as too large for git)\n",
|
||||
"- `Barite_4c_corner.h5`: Simulates a total of 6000 iterations, whereby barium is abandoned at all 4 corners\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"The datasets are located under `./datasets` (must first be pulsed with git lfs)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"| Dataset | # cells | non-reactive portion | reactive portion |\n",
|
||||
"| Dataset | # cells | non-reactive portion | reactive portion (kmeans) |\n",
|
||||
"|---------|--------------|----------------------|---------------|\n",
|
||||
"| 1-corner 1k Iterations | 2.502.500 | 0,988 | 0,012 |\n",
|
||||
"| 4-corner 5k Iterations | 630.000 | 0,952 | 0,048 |\n",
|
||||
"| 4-corner 20k Iterations | 50.000.000 | 0,867 | 0,133 |\n"
|
||||
"| 4-corner 20k Iterations | 50.000.000 | 0,867 | 0,133 |\n",
|
||||
"| 4-corner 6k Iterations | 1510000 | | |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -127,6 +143,25 @@
|
||||
"df_results.drop(\"Charge\", axis=1, inplace=True, errors=\"ignore\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Experimental parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scaler_type = \"minmax\" \n",
|
||||
"feature_engineering = \"false\"\n",
|
||||
"optimizer_type = \"adam\"\n",
|
||||
"loss_variant = \"huber_mass_balance\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
@ -152,7 +187,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -182,12 +217,10 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"feature_engineering = False\n",
|
||||
"\n",
|
||||
"if feature_engineering == True:\n",
|
||||
" df_design[\"Ba\\Sr\"] = df_design[\"Ba\"] / df_design[\"Sr\"]\n",
|
||||
" df_design[\"BaxS\"] = df_design[\"Ba\"] * df_design[\"S\"]"
|
||||
@ -195,7 +228,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
@ -222,7 +255,7 @@
|
||||
"X_train, y_train = preprocess.balancer(X_train, y_train, strategy=\"off\")\n",
|
||||
"X_train, y_train = preprocess.class_selection(X_train, y_train, class_label=1.0)\n",
|
||||
"X_train_untr = X_train.copy()\n",
|
||||
"preprocess.scale_fit(X_train, y_train, type=\"minmax\")\n",
|
||||
"preprocess.scale_fit(X_train, y_train, type=scaler_type)\n",
|
||||
"X_train, X_test, y_train, y_test = preprocess.scale_transform(\n",
|
||||
" X_train, X_test, y_train, y_test\n",
|
||||
")\n",
|
||||
@ -338,7 +371,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -356,7 +389,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -384,9 +417,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"I0000 00:00:1743168379.083212 16215 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13393 MB memory: -> device: 0, name: NVIDIA A2, pci bus id: 0000:82:00.0, compute capability: 8.6\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# select model architecture\n",
|
||||
"model = model_definition(\"large\", len(df_design.columns), len(df_results.columns)) \n",
|
||||
@ -401,18 +442,18 @@
|
||||
"h2 = 0.5283208497548787\n",
|
||||
"h3 = 0.5099528144902471\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"scaler_type = \"minmax\"\n",
|
||||
"loss_variant = \"huber_mass_balance\"\n",
|
||||
"delta = 1.7642791340966357\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"optimizer_adam = keras.optimizers.Adam(learning_rate=lr_schedule)\n",
|
||||
"optimizer_sgd = keras.optimizers.SGD(learning_rate=lr_schedule)\n",
|
||||
"optimizer_rmsprop = keras.optimizers.RMSprop(learning_rate=lr_schedule)\n",
|
||||
"match optimizer_type:\n",
|
||||
" case \"adam\":\n",
|
||||
" optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)\n",
|
||||
" case \"sgd\":\n",
|
||||
" optimizer = keras.optimizers.SGD(learning_rate=lr_schedule)\n",
|
||||
" case \"rmsprop\":\n",
|
||||
" optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule)\n",
|
||||
"\n",
|
||||
"model.compile(\n",
|
||||
" optimizer=optimizer_adam,\n",
|
||||
" optimizer=optimizer,\n",
|
||||
" loss=custom_loss(preprocess, column_dict, h1, h2, h3, scaler_type, loss_variant, 1),\n",
|
||||
" metrics=[\n",
|
||||
" huber_metric(delta),\n",
|
||||
@ -423,42 +464,50 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[1m 380/2092\u001b[0m \u001b[32m━━━\u001b[0m\u001b[37m━━━━━━━━━━━━━━━━━\u001b[0m \u001b[1m21s\u001b[0m 13ms/step - huber: 0.0685 - loss: 0.1884 - mass_balance: 0.3475"
|
||||
"Epoch 1/500\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"ename": "KeyboardInterrupt",
|
||||
"evalue": "",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[9], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m history \u001b[38;5;241m=\u001b[39m model_training(model, epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n",
|
||||
"Cell \u001b[0;32mIn[7], line 4\u001b[0m, in \u001b[0;36mmodel_training\u001b[0;34m(model, batch_size, epochs)\u001b[0m\n\u001b[1;32m 2\u001b[0m start \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 3\u001b[0m callback \u001b[38;5;241m=\u001b[39m keras\u001b[38;5;241m.\u001b[39mcallbacks\u001b[38;5;241m.\u001b[39mEarlyStopping(monitor\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mloss\u001b[39m\u001b[38;5;124m\"\u001b[39m, patience\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m30\u001b[39m)\n\u001b[0;32m----> 4\u001b[0m history \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mfit(\n\u001b[1;32m 5\u001b[0m X_train\u001b[38;5;241m.\u001b[39mloc[:, X_train\u001b[38;5;241m.\u001b[39mcolumns \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mClass\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 6\u001b[0m y_train\u001b[38;5;241m.\u001b[39mloc[:, y_train\u001b[38;5;241m.\u001b[39mcolumns \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mClass\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 7\u001b[0m batch_size\u001b[38;5;241m=\u001b[39mbatch_size,\n\u001b[1;32m 8\u001b[0m epochs\u001b[38;5;241m=\u001b[39mepochs,\n\u001b[1;32m 9\u001b[0m validation_data\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 10\u001b[0m X_val\u001b[38;5;241m.\u001b[39mloc[:, X_val\u001b[38;5;241m.\u001b[39mcolumns \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mClass\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 11\u001b[0m y_val\u001b[38;5;241m.\u001b[39mloc[:, y_val\u001b[38;5;241m.\u001b[39mcolumns \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mClass\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 12\u001b[0m ),\n\u001b[1;32m 13\u001b[0m callbacks\u001b[38;5;241m=\u001b[39m[callback],\n\u001b[1;32m 14\u001b[0m )\n\u001b[1;32m 16\u001b[0m end \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime()\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTraining took \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m seconds\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(end \u001b[38;5;241m-\u001b[39m start))\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py:117\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 117\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 119\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/keras/src/backend/tensorflow/trainer.py:320\u001b[0m, in \u001b[0;36mTensorFlowTrainer.fit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq)\u001b[0m\n\u001b[1;32m 318\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m step, iterator \u001b[38;5;129;01min\u001b[39;00m epoch_iterator\u001b[38;5;241m.\u001b[39menumerate_epoch():\n\u001b[1;32m 319\u001b[0m callbacks\u001b[38;5;241m.\u001b[39mon_train_batch_begin(step)\n\u001b[0;32m--> 320\u001b[0m logs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrain_function(iterator)\n\u001b[1;32m 321\u001b[0m callbacks\u001b[38;5;241m.\u001b[39mon_train_batch_end(step, logs)\n\u001b[1;32m 322\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstop_training:\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/tensorflow/python/util/traceback_utils.py:150\u001b[0m, in \u001b[0;36mfilter_traceback.<locals>.error_handler\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 148\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 150\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m fn(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 152\u001b[0m filtered_tb \u001b[38;5;241m=\u001b[39m _process_traceback_frames(e\u001b[38;5;241m.\u001b[39m__traceback__)\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:833\u001b[0m, in \u001b[0;36mFunction.__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 830\u001b[0m compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mxla\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnonXla\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 832\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m OptionalXlaContext(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jit_compile):\n\u001b[0;32m--> 833\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_call(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwds)\n\u001b[1;32m 835\u001b[0m new_tracing_count \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mexperimental_get_tracing_count()\n\u001b[1;32m 836\u001b[0m without_tracing \u001b[38;5;241m=\u001b[39m (tracing_count \u001b[38;5;241m==\u001b[39m new_tracing_count)\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py:878\u001b[0m, in \u001b[0;36mFunction._call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock\u001b[38;5;241m.\u001b[39mrelease()\n\u001b[1;32m 876\u001b[0m \u001b[38;5;66;03m# In this case we have not created variables on the first call. So we can\u001b[39;00m\n\u001b[1;32m 877\u001b[0m \u001b[38;5;66;03m# run the first trace but we should fail if variables are created.\u001b[39;00m\n\u001b[0;32m--> 878\u001b[0m results \u001b[38;5;241m=\u001b[39m tracing_compilation\u001b[38;5;241m.\u001b[39mcall_function(\n\u001b[1;32m 879\u001b[0m args, kwds, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_variable_creation_config\n\u001b[1;32m 880\u001b[0m )\n\u001b[1;32m 881\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_created_variables:\n\u001b[1;32m 882\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCreating variables on a non-first call to a function\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 883\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m decorated with tf.function.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compilation.py:132\u001b[0m, in \u001b[0;36mcall_function\u001b[0;34m(args, kwargs, tracing_options)\u001b[0m\n\u001b[1;32m 130\u001b[0m args \u001b[38;5;241m=\u001b[39m args \u001b[38;5;28;01mif\u001b[39;00m args \u001b[38;5;28;01melse\u001b[39;00m ()\n\u001b[1;32m 131\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m kwargs \u001b[38;5;28;01mif\u001b[39;00m kwargs \u001b[38;5;28;01melse\u001b[39;00m {}\n\u001b[0;32m--> 132\u001b[0m function \u001b[38;5;241m=\u001b[39m trace_function(\n\u001b[1;32m 133\u001b[0m args\u001b[38;5;241m=\u001b[39margs, kwargs\u001b[38;5;241m=\u001b[39mkwargs, tracing_options\u001b[38;5;241m=\u001b[39mtracing_options\n\u001b[1;32m 134\u001b[0m )\n\u001b[1;32m 136\u001b[0m \u001b[38;5;66;03m# Bind it ourselves to skip unnecessary canonicalization of default call.\u001b[39;00m\n\u001b[1;32m 137\u001b[0m bound_args \u001b[38;5;241m=\u001b[39m function\u001b[38;5;241m.\u001b[39mfunction_type\u001b[38;5;241m.\u001b[39mbind(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compilation.py:178\u001b[0m, in \u001b[0;36mtrace_function\u001b[0;34m(args, kwargs, tracing_options)\u001b[0m\n\u001b[1;32m 175\u001b[0m args \u001b[38;5;241m=\u001b[39m tracing_options\u001b[38;5;241m.\u001b[39minput_signature\n\u001b[1;32m 176\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m {}\n\u001b[0;32m--> 178\u001b[0m concrete_function \u001b[38;5;241m=\u001b[39m _maybe_define_function(\n\u001b[1;32m 179\u001b[0m args, kwargs, tracing_options\n\u001b[1;32m 180\u001b[0m )\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m tracing_options\u001b[38;5;241m.\u001b[39mbind_graph_to_function:\n\u001b[1;32m 183\u001b[0m concrete_function\u001b[38;5;241m.\u001b[39m_garbage_collector\u001b[38;5;241m.\u001b[39mrelease() \u001b[38;5;66;03m# pylint: disable=protected-access\u001b[39;00m\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/tensorflow/python/eager/polymorphic_function/tracing_compilation.py:239\u001b[0m, in \u001b[0;36m_maybe_define_function\u001b[0;34m(args, kwargs, tracing_options)\u001b[0m\n\u001b[1;32m 229\u001b[0m lookup_func_type, lookup_func_context \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 230\u001b[0m function_type_utils\u001b[38;5;241m.\u001b[39mmake_canonicalized_monomorphic_type(\n\u001b[1;32m 231\u001b[0m args,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 235\u001b[0m )\n\u001b[1;32m 236\u001b[0m )\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tracing_options\u001b[38;5;241m.\u001b[39mfunction_cache \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 239\u001b[0m concrete_function \u001b[38;5;241m=\u001b[39m tracing_options\u001b[38;5;241m.\u001b[39mfunction_cache\u001b[38;5;241m.\u001b[39mlookup(\n\u001b[1;32m 240\u001b[0m lookup_func_type, current_func_context\n\u001b[1;32m 241\u001b[0m )\n\u001b[1;32m 242\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 243\u001b[0m concrete_function \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/tensorflow/core/function/polymorphism/function_cache.py:48\u001b[0m, in \u001b[0;36mFunctionCache.lookup\u001b[0;34m(self, function_type, context)\u001b[0m\n\u001b[1;32m 46\u001b[0m context \u001b[38;5;241m=\u001b[39m context \u001b[38;5;129;01mor\u001b[39;00m FunctionContext()\n\u001b[1;32m 47\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m context \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dispatch_dict:\n\u001b[0;32m---> 48\u001b[0m dispatch_type \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dispatch_dict[context]\u001b[38;5;241m.\u001b[39mdispatch(function_type)\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dispatch_type:\n\u001b[1;32m 50\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_primary[(context, dispatch_type)]\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/tensorflow/core/function/polymorphism/type_dispatch.py:85\u001b[0m, in \u001b[0;36mTypeDispatchTable.dispatch\u001b[0;34m(self, request)\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m request\n\u001b[1;32m 83\u001b[0m \u001b[38;5;66;03m# For known non-exact matches.\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;66;03m# (self._dispatch cache does not contain exact matches)\u001b[39;00m\n\u001b[0;32m---> 85\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m request \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dispatch_cache:\n\u001b[1;32m 86\u001b[0m \u001b[38;5;66;03m# Move to the front of LRU cache.\u001b[39;00m\n\u001b[1;32m 87\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dispatch_cache\u001b[38;5;241m.\u001b[39mpop(request)\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_dispatch_cache[request] \u001b[38;5;241m=\u001b[39m result\n",
|
||||
"File \u001b[0;32m~/bin/miniconda3/envs/training/lib/python3.11/site-packages/tensorflow/core/function/polymorphism/function_type.py:448\u001b[0m, in \u001b[0;36mFunctionType.__eq__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m 445\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 446\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moutput\u001b[38;5;241m.\u001b[39mfrom_tensors(\u001b[38;5;28miter\u001b[39m(flat_values))\n\u001b[0;32m--> 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__eq__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mbool\u001b[39m:\n\u001b[1;32m 449\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(other, FunctionType):\n\u001b[1;32m 450\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n",
|
||||
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
|
||||
"I0000 00:00:1743168394.599325 16266 service.cc:148] XLA service 0x7fbba800f5a0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:\n",
|
||||
"I0000 00:00:1743168394.599698 16266 service.cc:156] StreamExecutor device (0): NVIDIA A2, Compute Capability 8.6\n",
|
||||
"2025-03-28 14:26:34.666890: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.\n",
|
||||
"I0000 00:00:1743168395.131797 16266 cuda_dnn.cc:529] Loaded cuDNN version 90300\n",
|
||||
"I0000 00:00:1743168395.921117 16266 device_compiler.h:188] Compiled cluster using XLA! This line is logged at most once for the lifetime of the process.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[1m2092/2092\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m13s\u001b[0m 4ms/step - huber: 0.0218 - loss: 0.0593 - mass_balance: 0.1089 - val_huber: 4.2851e-04 - val_loss: 0.0092 - val_mass_balance: 0.0177\n",
|
||||
"Epoch 2/500\n",
|
||||
"\u001b[1m2092/2092\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m17s\u001b[0m 3ms/step - huber: 3.9075e-04 - loss: 0.0055 - mass_balance: 0.0106 - val_huber: 1.8523e-04 - val_loss: 0.0055 - val_mass_balance: 0.0105\n",
|
||||
"Epoch 3/500\n",
|
||||
"\u001b[1m2092/2092\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 3ms/step - huber: 5.1098e-04 - loss: 0.0050 - mass_balance: 0.0095 - val_huber: 8.2820e-05 - val_loss: 0.0050 - val_mass_balance: 0.0096\n",
|
||||
"Epoch 4/500\n",
|
||||
"\u001b[1m2092/2092\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 3ms/step - huber: 1.4331e-04 - loss: 0.0034 - mass_balance: 0.0064 - val_huber: 1.1835e-04 - val_loss: 0.0047 - val_mass_balance: 0.0090\n",
|
||||
"Epoch 5/500\n",
|
||||
"\u001b[1m2092/2092\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 3ms/step - huber: 1.5873e-04 - loss: 0.0028 - mass_balance: 0.0053 - val_huber: 8.8557e-05 - val_loss: 0.0032 - val_mass_balance: 0.0060\n",
|
||||
"Epoch 6/500\n",
|
||||
"\u001b[1m2092/2092\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 3ms/step - huber: 1.2674e-04 - loss: 0.0026 - mass_balance: 0.0051 - val_huber: 3.4972e-05 - val_loss: 0.0019 - val_mass_balance: 0.0037\n",
|
||||
"Epoch 7/500\n",
|
||||
"\u001b[1m1205/2092\u001b[0m \u001b[32m━━━━━━━━━━━\u001b[0m\u001b[37m━━━━━━━━━\u001b[0m \u001b[1m2s\u001b[0m 3ms/step - huber: 7.2290e-05 - loss: 0.0018 - mass_balance: 0.0034"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"history = model_training(model, epochs=1)"
|
||||
"history = model_training(model, epochs=500)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -908,7 +957,7 @@
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "training",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
||||
9
src/gpu_experiments.py
Normal file
9
src/gpu_experiments.py
Normal file
@ -0,0 +1,9 @@
|
||||
from preprocessing import *
|
||||
import numpy as np
|
||||
|
||||
|
||||
scaler_experiments = ["none", "minmax", "standard"]
|
||||
|
||||
|
||||
for i in scaler_experiments:
|
||||
|
||||
@ -668,8 +668,9 @@ class preprocessing:
|
||||
self.scaler_input = StandardScaler()
|
||||
self.scaler_output = StandardScaler()
|
||||
|
||||
else:
|
||||
raise Exception("No valid scaler type found")
|
||||
elif type == "none":
|
||||
print("No scaler is fitted.")
|
||||
return 0
|
||||
|
||||
|
||||
all_data = pd.concat([X, y],axis=0)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user