diff --git a/POET_Training.ipynb b/POET_Training.ipynb
index 9bbcbb7..7f292b0 100644
--- a/POET_Training.ipynb
+++ b/POET_Training.ipynb
@@ -27,18 +27,9 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 47,
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "2025-01-14 17:26:34.798886: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
- "2025-01-14 17:26:34.825591: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
- "To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
- ]
- },
{
"name": "stdout",
"output_type": "stream",
@@ -57,7 +48,9 @@
"import time\n",
"import sklearn.model_selection as sk\n",
"import matplotlib.pyplot as plt\n",
- "from sklearn.cluster import KMeans"
+ "from sklearn.cluster import KMeans\n",
+ "from imblearn.over_sampling import SMOTE\n",
+ "from collections import Counter"
]
},
{
@@ -69,7 +62,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 116,
"metadata": {},
"outputs": [],
"source": [
@@ -102,7 +95,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -188,7 +181,7 @@
}
],
"source": [
- "model = keras.Sequential(\n",
+ "model_simple = keras.Sequential(\n",
" [\n",
" keras.Input(shape = (12,), dtype = \"float32\"),\n",
" keras.layers.Dense(units = 128, activation = \"relu\", dtype = \"float32\"),\n",
@@ -197,8 +190,112 @@
" ]\n",
")\n",
"\n",
- "model.compile(optimizer=optimizer, loss = loss)\n",
- "model.summary()"
+ "model_simple.compile(optimizer=optimizer, loss = loss)\n",
+ "model_simple.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 117,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
Model: \"sequential_4\"\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1mModel: \"sequential_4\"\u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
+ "┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
+ "│ dense_15 (Dense) │ (None, 512) │ 6,656 │\n",
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+ "│ dense_16 (Dense) │ (None, 1024) │ 525,312 │\n",
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+ "│ dense_17 (Dense) │ (None, 512) │ 524,800 │\n",
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+ "│ dense_18 (Dense) │ (None, 12) │ 6,156 │\n",
+ "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
+ "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
+ "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
+ "│ dense_15 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m512\u001b[0m) │ \u001b[38;5;34m6,656\u001b[0m │\n",
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+ "│ dense_16 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1024\u001b[0m) │ \u001b[38;5;34m525,312\u001b[0m │\n",
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+ "│ dense_17 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m512\u001b[0m) │ \u001b[38;5;34m524,800\u001b[0m │\n",
+ "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+ "│ dense_18 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m12\u001b[0m) │ \u001b[38;5;34m6,156\u001b[0m │\n",
+ "└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " Total params: 1,062,924 (4.05 MB)\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m1,062,924\u001b[0m (4.05 MB)\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " Trainable params: 1,062,924 (4.05 MB)\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m1,062,924\u001b[0m (4.05 MB)\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ " Non-trainable params: 0 (0.00 B)\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "model_large = keras.Sequential(\n",
+ " [keras.layers.Input(shape=(12,), dtype=dtype),\n",
+ " keras.layers.Dense(512, activation='relu', dtype=dtype),\n",
+ " keras.layers.Dense(1024, activation='relu', dtype=dtype),\n",
+ " keras.layers.Dense(512, activation='relu', dtype=dtype),\n",
+ " keras.layers.Dense(12, dtype=dtype)\n",
+ " ])\n",
+ "\n",
+ "model_large.compile(optimizer=optimizer, loss = loss)\n",
+ "model_large.summary()\n"
]
},
{
@@ -210,7 +307,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
@@ -234,7 +331,7 @@
},
{
"cell_type": "code",
- "execution_count": 55,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -280,7 +377,7 @@
},
{
"cell_type": "code",
- "execution_count": 49,
+ "execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
@@ -304,7 +401,7 @@
},
{
"cell_type": "code",
- "execution_count": 50,
+ "execution_count": 87,
"metadata": {},
"outputs": [
{
@@ -319,36 +416,58 @@
"source": [
"# widget with slider for the index\n",
"\n",
- "class_label = np.array([])\n",
+ "class_label_design = np.array([])\n",
+ "class_label_result = np.array([])\n",
+ "\n",
+ "\n",
"i = 1000\n",
"for i in range(0,1001):\n",
- " field = np.array(df_design['Barite'][(i*2500):(i*2500+2500)]).reshape(50,50)\n",
- " kmeans = KMeans(n_clusters=2, random_state=0).fit(field.reshape(-1,1))\n",
- " class_label = np.append(class_label.astype(int), kmeans.labels_)\n",
+ " field_design = np.array(df_design['Barite'][(i*2500):(i*2500+2500)]).reshape(50,50)\n",
+ " field_result = np.array(df_results['Barite'][(i*2500):(i*2500+2500)]).reshape(50,50)\n",
+ " \n",
+ " kmeans_design = KMeans(n_clusters=2, random_state=0).fit(field_design.reshape(-1,1))\n",
+ " kmeans_result = KMeans(n_clusters=2, random_state=0).fit(field_result.reshape(-1,1))\n",
+ " \n",
+ " class_label_design = np.append(class_label_design.astype(int), kmeans_design.labels_)\n",
+ " class_label_result = np.append(class_label_result.astype(int), kmeans_result.labels_)\n",
+ " \n",
"\n",
"\n",
- "class_label = pd.DataFrame(class_label, columns = [\"Class\"])\n",
- "\n"
+ "class_label_design = pd.DataFrame(class_label_design, columns = [\"Class\"])\n",
+ "class_label_result = pd.DataFrame(class_label_result, columns = [\"Class\"])\n"
]
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": 88,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if(\"Class\" in df_design.columns and \"Class\" in df_results.columns):\n",
+ " print(\"Class column already exists\")\n",
+ "else:\n",
+ " df_design = pd.concat([df_design, class_label_design], axis=1)\n",
+ " df_results = pd.concat([df_results, class_label_design], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 30,
+ "execution_count": 89,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAGfCAYAAAD22G0fAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAFkFJREFUeJzt3V9s1uXd+PHPjcAtaNvJnC0NzAdjHzfHg78IjkBUmEp/Icbo48kyjGHxRAUMDQcoeiDbQYuYEF2YLLrFmSw+7GD+O5iGJmrZQkwKQiSY+MsShk2k61ywrYhF8Pod+HjPDoYUip8Cr1fyPbiv7/e+e3mJ99ur9x8qpZQSAJBgXPYEADh/iRAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGnGn6kHfuqpp+Lxxx+P/fv3xw9+8IN44okn4oYbbvja+33++efxwQcfRF1dXVQqlTM1PQDOkFJKDA4ORnNzc4wb9zV7nXIGbN68uUyYMKE888wz5d133y0rV64sF110Udm3b9/X3renp6dEhMPhcDjO8qOnp+drn/MrpYz+F5jOnTs3rr322ti0aVNt7Pvf/37ccccd0dHRccL79vf3x7e+9a3Y9/Z/RP3F5/5vC//7P/8rewoAo+pIfBZ/jj/GRx99FA0NDSe8dtR/HXf48OHYsWNHPPTQQ8PGW1tbY9u2bcdcPzQ0FENDQ7Xbg4ODERFRf/G4qK879yM0vjIhewoAo+t/tzYn85LKqD/Lf/jhh3H06NFobGwcNt7Y2Bi9vb3HXN/R0RENDQ21Y/r06aM9JQDGqDO21fjXApZSjlvFNWvWRH9/f+3o6ek5U1MCYIwZ9V/HXXrppXHBBRccs+vp6+s7ZncUEVGtVqNarY72NAA4C4z6TmjixIkxe/bs6OzsHDbe2dkZ8+fPH+0fB8BZ7Ix8TmjVqlVx9913x5w5c2LevHnx9NNPx/vvvx/33XffmfhxAJylzkiEfvzjH8c//vGP+PnPfx779++PmTNnxh//+Me4/PLLz8SPA+AsdUY+J3Q6BgYGoqGhIQ78vyvOi7do/9/m/5M9BYBRdaR8Fm/Gy9Hf3x/19fUnvPbcf5YHYMwSIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGnOyBeYMpzvhwM4PjshANKIEABpRAiANCIEQBoRAiCNCAGQRoQASONzQqPA54AATo2dEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkMa3aH+Fb8MG+GbZCQGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBECa8+pzQj4HBDC22AkBkEaEAEgjQgCkESEA0ogQAGlECIA0Y/Yt2v/9n/8V4ysTsqcBwBlkJwRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSjDhCW7dujdtuuy2am5ujUqnESy+9NOx8KSXWrl0bzc3NMWnSpFi4cGHs2bNntOYLwDlkxBE6ePBgXHPNNbFx48bjnl+/fn1s2LAhNm7cGN3d3dHU1BSLFi2KwcHB054sAOeWEf+ldosXL47Fixcf91wpJZ544ol45JFH4s4774yIiOeeey4aGxvj+eefj3vvvff0ZgvAOWVUXxPau3dv9Pb2Rmtra22sWq3GggULYtu2bce9z9DQUAwMDAw7ADg/jGqEent7IyKisbFx2HhjY2Pt3L/q6OiIhoaG2jF9+vTRnBIAY9gZeXdcpVIZdruUcszYl9asWRP9/f21o6en50xMCYAxaMSvCZ1IU1NTRHyxI5o6dWptvK+v75jd0Zeq1WpUq9XRnAYAZ4lR3QnNmDEjmpqaorOzszZ2+PDh6Orqivnz54/mjwLgHDDindDHH38cf/nLX2q39+7dG7t27YopU6bEd7/73Whra4v29vZoaWmJlpaWaG9vj8mTJ8eSJUtGdeIAnP1GHKHt27fHj370o9rtVatWRUTE0qVL47e//W2sXr06Dh06FMuWLYsDBw7E3LlzY8uWLVFXVzd6swbgnFAppZTsSXzVwMBANDQ0xMK4PcZXJmRPB4AROlI+izfj5ejv74/6+voTXuu74wBII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGlECIA0IgRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGlECIA0IgRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgzYgi1NHREdddd13U1dXFZZddFnfccUe89957w64ppcTatWujubk5Jk2aFAsXLow9e/aM6qQBODeMKEJdXV2xfPnyeOutt6KzszOOHDkSra2tcfDgwdo169evjw0bNsTGjRuju7s7mpqaYtGiRTE4ODjqkwfg7FYppZRTvfPf//73uOyyy6KrqytuvPHGKKVEc3NztLW1xYMPPhgREUNDQ9HY2BiPPfZY3HvvvV/7mAMDA9HQ0BAL4/YYX5lwqlMDIMmR8lm8GS9Hf39/1NfXn/Da03pNqL+/PyIipkyZEhERe/fujd7e3mhtba1dU61WY8GCBbFt27bjPsbQ0FAMDAwMOwA4P5xyhEopsWrVqrj++utj5syZERHR29sbERGNjY3Drm1sbKyd+1cdHR3R0NBQO6ZPn36qUwLgLHPKEVqxYkW888478T//8z/HnKtUKsNul1KOGfvSmjVror+/v3b09PSc6pQAOMuMP5U7PfDAA/HKK6/E1q1bY9q0abXxpqamiPhiRzR16tTaeF9f3zG7oy9Vq9WoVqunMg0AznIj2gmVUmLFihXxwgsvxOuvvx4zZswYdn7GjBnR1NQUnZ2dtbHDhw9HV1dXzJ8/f3RmDMA5Y0Q7oeXLl8fzzz8fL7/8ctTV1dVe52loaIhJkyZFpVKJtra2aG9vj5aWlmhpaYn29vaYPHlyLFmy5Iz8AwBw9hpRhDZt2hQREQsXLhw2/uyzz8ZPf/rTiIhYvXp1HDp0KJYtWxYHDhyIuXPnxpYtW6Kurm5UJgzAueO0Pid0JvicEMDZ7Rv7nBAAnA4RAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGlECIA0IgRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGlECIA0IgRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGlGFKFNmzbFrFmzor6+Purr62PevHnx6quv1s6XUmLt2rXR3NwckyZNioULF8aePXtGfdIAnBtGFKFp06bFunXrYvv27bF9+/a46aab4vbbb6+FZv369bFhw4bYuHFjdHd3R1NTUyxatCgGBwfPyOQBOLtVSinldB5gypQp8fjjj8c999wTzc3N0dbWFg8++GBERAwNDUVjY2M89thjce+9957U4w0MDERDQ0MsjNtjfGXC6UwNgARHymfxZrwc/f39UV9ff8JrT/k1oaNHj8bmzZvj4MGDMW/evNi7d2/09vZGa2tr7ZpqtRoLFiyIbdu2/dvHGRoaioGBgWEHAOeHEUdo9+7dcfHFF0e1Wo377rsvXnzxxbj66qujt7c3IiIaGxuHXd/Y2Fg7dzwdHR3R0NBQO6ZPnz7SKQFwlhpxhK666qrYtWtXvPXWW3H//ffH0qVL4913362dr1Qqw64vpRwz9lVr1qyJ/v7+2tHT0zPSKQFwlho/0jtMnDgxrrzyyoiImDNnTnR3d8eTTz5Zex2ot7c3pk6dWru+r6/vmN3RV1Wr1ahWqyOdBgDngNP+nFApJYaGhmLGjBnR1NQUnZ2dtXOHDx+Orq6umD9//un+GADOQSPaCT388MOxePHimD59egwODsbmzZvjzTffjNdeey0qlUq0tbVFe3t7tLS0REtLS7S3t8fkyZNjyZIlZ2r+AJzFRhShv/3tb3H33XfH/v37o6GhIWbNmhWvvfZaLFq0KCIiVq9eHYcOHYply5bFgQMHYu7cubFly5aoq6s7I5MH4Ox22p8TGm0+JwRwdvtGPicEAKdLhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGlECIA0IgRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGlECIA0IgRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBECa04pQR0dHVCqVaGtrq42VUmLt2rXR3NwckyZNioULF8aePXtOd54AnINOOULd3d3x9NNPx6xZs4aNr1+/PjZs2BAbN26M7u7uaGpqikWLFsXg4OBpTxaAc8spRejjjz+Ou+66K5555pm45JJLauOllHjiiSfikUceiTvvvDNmzpwZzz33XHzyySfx/PPPj9qkATg3nFKEli9fHrfeemvccsstw8b37t0bvb290draWhurVquxYMGC2LZt23Efa2hoKAYGBoYdAJwfxo/0Dps3b4633347uru7jznX29sbERGNjY3DxhsbG2Pfvn3HfbyOjo742c9+NtJpAHAOGNFOqKenJ1auXBm/+93v4sILL/y311UqlWG3SynHjH1pzZo10d/fXzt6enpGMiUAzmIj2gnt2LEj+vr6Yvbs2bWxo0ePxtatW2Pjxo3x3nvvRcQXO6KpU6fWrunr6ztmd/SlarUa1Wr1VOYOwFluRDuhm2++OXbv3h27du2qHXPmzIm77rordu3aFVdccUU0NTVFZ2dn7T6HDx+Orq6umD9//qhPHoCz24h2QnV1dTFz5sxhYxdddFF8+9vfro23tbVFe3t7tLS0REtLS7S3t8fkyZNjyZIlozdrAM4JI35jwtdZvXp1HDp0KJYtWxYHDhyIuXPnxpYtW6Kurm60fxQAZ7lKKaVkT+KrBgYGoqGhIRbG7TG+MiF7OgCM0JHyWbwZL0d/f3/U19ef8FrfHQdAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGlECIA0IgRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSiBAAaUQIgDQiBEAaEQIgjQgBkEaEAEgjQgCkESEA0ogQAGlECIA0IgRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiCNCAGQRoQASCNCAKQRIQDSjM+ewL8qpURExJH4LKIkTwaAETsSn0XEP5/PT2TMRWhwcDAiIv4cf0yeCQCnY3BwMBoaGk54TaWcTKq+QZ9//nl88MEHUVdXF5VKJQYGBmL69OnR09MT9fX12dMbs6zTybFOJ8c6nRzrdHyllBgcHIzm5uYYN+7Er/qMuZ3QuHHjYtq0aceM19fX+5d8EqzTybFOJ8c6nRzrdKyv2wF9yRsTAEgjQgCkGfMRqlar8eijj0a1Ws2eyphmnU6OdTo51unkWKfTN+bemADA+WPM74QAOHeJEABpRAiANCIEQJoxH6GnnnoqZsyYERdeeGHMnj07/vSnP2VPKdXWrVvjtttui+bm5qhUKvHSSy8NO19KibVr10Zzc3NMmjQpFi5cGHv27MmZbJKOjo647rrroq6uLi677LK444474r333ht2jXWK2LRpU8yaNav2Qct58+bFq6++WjtvjY6vo6MjKpVKtLW11cas1akb0xH6/e9/H21tbfHII4/Ezp0744YbbojFixfH+++/nz21NAcPHoxrrrkmNm7ceNzz69evjw0bNsTGjRuju7s7mpqaYtGiRbXv5DsfdHV1xfLly+Ott96Kzs7OOHLkSLS2tsbBgwdr11iniGnTpsW6deti+/btsX379rjpppvi9ttvrz15WqNjdXd3x9NPPx2zZs0aNm6tTkMZw374wx+W++67b9jY9773vfLQQw8lzWhsiYjy4osv1m5//vnnpampqaxbt6429umnn5aGhobyq1/9KmGGY0NfX1+JiNLV1VVKsU4ncskll5Rf//rX1ug4BgcHS0tLS+ns7CwLFiwoK1euLKX483S6xuxO6PDhw7Fjx45obW0dNt7a2hrbtm1LmtXYtnfv3ujt7R22ZtVqNRYsWHBer1l/f39EREyZMiUirNPxHD16NDZv3hwHDx6MefPmWaPjWL58edx6661xyy23DBu3VqdnzH2B6Zc+/PDDOHr0aDQ2Ng4bb2xsjN7e3qRZjW1frsvx1mzfvn0ZU0pXSolVq1bF9ddfHzNnzowI6/RVu3fvjnnz5sWnn34aF198cbz44otx9dVX1548rdEXNm/eHG+//XZ0d3cfc86fp9MzZiP0pUqlMux2KeWYMYazZv+0YsWKeOedd+LPf/7zMeesU8RVV10Vu3btio8++ij+8Ic/xNKlS6Orq6t23hpF9PT0xMqVK2PLli1x4YUX/tvrrNWpGbO/jrv00kvjggsuOGbX09fXd8z/cfCFpqamiAhr9r8eeOCBeOWVV+KNN94Y9teDWKd/mjhxYlx55ZUxZ86c6OjoiGuuuSaefPJJa/QVO3bsiL6+vpg9e3aMHz8+xo8fH11dXfGLX/wixo8fX1sPa3VqxmyEJk6cGLNnz47Ozs5h452dnTF//vykWY1tM2bMiKampmFrdvjw4ejq6jqv1qyUEitWrIgXXnghXn/99ZgxY8aw89bp3yulxNDQkDX6iptvvjl2794du3btqh1z5syJu+66K3bt2hVXXHGFtTodee+J+HqbN28uEyZMKL/5zW/Ku+++W9ra2spFF11U/vrXv2ZPLc3g4GDZuXNn2blzZ4mIsmHDhrJz586yb9++Ukop69atKw0NDeWFF14ou3fvLj/5yU/K1KlTy8DAQPLMvzn3339/aWhoKG+++WbZv39/7fjkk09q11inUtasWVO2bt1a9u7dW955553y8MMPl3HjxpUtW7aUUqzRiXz13XGlWKvTMaYjVEopv/zlL8vll19eJk6cWK699tra22zPV2+88UaJiGOOpUuXllK+eLvoo48+Wpqamkq1Wi033nhj2b17d+6kv2HHW5+IKM8++2ztGutUyj333FP7b+s73/lOufnmm2sBKsUanci/RshanTp/lQMAacbsa0IAnPtECIA0IgRAGhECII0IAZBGhABII0IApBEhANKIEABpRAiANCIEQBoRAiDN/wf760AGi+dbEwAAAABJRU5ErkJggg==",
+ "image/png": "",
"text/plain": [
""
]
@@ -358,33 +477,336 @@
}
],
"source": [
- "i = 1000\n",
- "plt.imshow(class_label[(i*2500):(i*2500+2500)].reshape(50,50)) "
+ "i=1000\n",
+ "\n",
+ "plt.imshow(np.array(df_results['Barite'][(i*2500):(i*2500+2500)]).reshape(50,50), interpolation='bicubic', origin='lower')\n",
+ "plt.contour(np.array(df_results['Class'][(i*2500):(i*2500+2500)]).reshape(50,50), levels=[0.1], colors='red', origin='lower')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Split into Training and Testing datsets"
]
},
{
"cell_type": "code",
- "execution_count": 52,
+ "execution_count": 90,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_test, y_train, y_test = sk.train_test_split(df_design, df_results, test_size = 0.2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Perform SMOT Sampling on dataset to balance classes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Counter(df_design['Class'])\n",
+ "oversample = SMOTE()\n",
+ "\n",
+ "design_resampled, design_classes_resampled = oversample.fit_resample(X_train.iloc[:, :-1], X_train.iloc[:, -1])\n",
+ "target_resampled, target_classes_resampled = oversample.fit_resample(y_train.iloc[:, :-1], y_train.iloc[:, -1])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train = pd.concat([design_resampled, design_classes_resampled], axis=1)\n",
+ "y_train = pd.concat([target_resampled, target_classes_resampled], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Define Scaling and Normalization Functions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def log_scale(df_design, df_result, func_dict):\n",
+ " \n",
+ " df_design = df_design.copy()\n",
+ " df_result = df_result.copy()\n",
+ " \n",
+ " for key in df_design.keys():\n",
+ " if key != \"Class\":\n",
+ " df_design[key] = np.vectorize(func_dict[key])(df_design[key])\n",
+ " df_result[key] = np.vectorize(func_dict[key])(df_result[key])\n",
+ " \n",
+ " return df_design, df_result\n",
+ "\n",
+ "# Get minimum and maximum values for each column\n",
+ "def get_min_max(df_design, df_result):\n",
+ " \n",
+ " min_vals_des = df_design.min()\n",
+ " max_vals_des = df_design.max()\n",
+ " \n",
+ " min_vals_res = df_result.min()\n",
+ " max_vals_res = df_result.max()\n",
+ "\n",
+ " # minimum of input and output data to get global minimum/maximum\n",
+ " data_min = np.minimum(min_vals_des, min_vals_res).to_dict()\n",
+ " data_max = np.maximum(max_vals_des, max_vals_res).to_dict()\n",
+ "\n",
+ " return data_min, data_max\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_design_log, df_results_log = log_scale(df_design, df_results, func_dict_in)\n",
+ "data_min_log, data_max_log = get_min_max(df_design_log, df_results_log)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train_log, y_train_log = log_scale(X_train, y_train, func_dict_in)\n",
+ "X_test_log, y_test_log = log_scale(X_test, y_test, func_dict_in)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "train_min_log, train_max_log = get_min_max(X_train_log, y_train_log)\n",
+ "test_min_log, test_max_log = get_min_max(X_test_log, y_test_log)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def preprocess(data, func_dict, data_min, data_max):\n",
+ " data = data.copy()\n",
+ " for key in data.keys():\n",
+ " if key != \"Class\":\n",
+ " data[key] = (data[key] - data_min[key]) / (data_max[key] - data_min[key])\n",
+ "\n",
+ " return data\n",
+ "\n",
+ "def postprocess(data, func_dict, data_min, data_max):\n",
+ " data = data.copy()\n",
+ " for key in data.keys():\n",
+ " if key != \"Class\":\n",
+ " data[key] = data[key] * (data_max[key] - data_min[key]) + data_min[key]\n",
+ " data[key] = np.vectorize(func_dict[key])(data[key])\n",
+ " return data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Preprocess the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pp_design = preprocess(df_design_log, func_dict_in, data_min_log, data_max_log)\n",
+ "pp_results = preprocess(df_results_log, func_dict_in, data_min_log, data_max_log)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train_preprocess = preprocess(X_train_log, func_dict_in, train_min_log, train_max_log)\n",
+ "y_train_preprocess = preprocess(y_train_log, func_dict_in, train_min_log, train_max_log)\n",
+ "\n",
+ "X_test_preprocess = preprocess(X_test_log, func_dict_in, test_min_log, test_max_log)\n",
+ "y_test_preprocess = preprocess(y_test_log, func_dict_in, test_min_log, test_max_log)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Sample the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "X_train, X_val, y_train, y_val = sk.train_test_split(X_train_preprocess, y_train_preprocess, test_size = 0.1)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Custom Loss function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def custom_loss_H20(df_design_log, df_result_log, data_min_log, data_max_log, func_dict_out, postprocess):\n",
+ " df_result = postprocess(df_result_log, func_dict_out, data_min_log, data_max_log) \n",
+ " return keras.losses.Huber + np.sum(((df_result['H'] / df_result['O']) - 2)**2)\n",
+ "\n",
+ "def loss_wrapper(data_min_log, data_max_log, func_dict_out, postprocess):\n",
+ " def loss(df_design_log, df_result_log):\n",
+ " return custom_loss_H20(df_design_log, df_result_log, data_min_log, data_max_log, func_dict_out, postprocess)\n",
+ " return loss"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(3559968, 12)"
+ ]
+ },
+ "execution_count": 106,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "X_train.iloc[:, :-1].shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Train the model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Class column already exists\n"
+ "Epoch 1/50\n",
+ "\u001b[1m6954/6954\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m91s\u001b[0m 13ms/step - loss: 0.0070 - val_loss: 0.0066\n",
+ "Epoch 2/50\n",
+ "\u001b[1m6954/6954\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m104s\u001b[0m 15ms/step - loss: 0.0066 - val_loss: 0.0066\n",
+ "Epoch 3/50\n",
+ "\u001b[1m1147/6954\u001b[0m \u001b[32m━━━\u001b[0m\u001b[37m━━━━━━━━━━━━━━━━━\u001b[0m \u001b[1m2:01\u001b[0m 21ms/step - loss: 0.0066"
]
}
],
"source": [
- "if(\"Class\" in df_design.columns):\n",
- " print(\"Class column already exists\")\n",
- "else:\n",
- " df_design = pd.concat([df_design, class_label], axis=1)"
+ "# measure time\n",
+ "start = time.time()\n",
+ "\n",
+ "history = model_large.fit(X_train.iloc[:, :-1], \n",
+ " y_train.iloc[:, :-1], \n",
+ " batch_size = batch_size, \n",
+ " epochs = epochs, \n",
+ " validation_data = (X_val.iloc[:,:-1], y_val.iloc[:, :-1])\n",
+ ")\n",
+ "\n",
+ "end = time.time()\n",
+ "\n",
+ "print(\"Training took {} seconds\".format(end - start))"
]
},
{
"cell_type": "code",
- "execution_count": 53,
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.plot(history.history[\"loss\"], \"o-\", label = \"Training Loss\")\n",
+ "plt.xlabel(\"Epoch\")\n",
+ "# plt.yscale('log')\n",
+ "plt.ylabel(\"Loss (Huber)\")\n",
+ "plt.grid('on')\n",
+ "\n",
+ "plt.savefig(\"loss_all.png\", dpi=300)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "plt.plot(history.history[\"loss\"][1:], \"o-\", label = \"Training Loss\")\n",
+ "plt.xlabel(\"Epoch\")\n",
+ "# plt.yscale('log')\n",
+ "plt.ylabel(\"Loss (Huber)\")\n",
+ "plt.grid('on')\n",
+ "plt.savefig(\"loss_1_to_end.png\", dpi=300)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
"metadata": {},
"outputs": [
{
@@ -648,7 +1070,7 @@
"[2502500 rows x 13 columns]"
]
},
- "execution_count": 53,
+ "execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
@@ -657,318 +1079,6 @@
"df_design"
]
},
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Define Scaling and Normalization Functions"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 58,
- "metadata": {},
- "outputs": [],
- "source": [
- "def log_scale(df_design, df_result, func_dict):\n",
- " \n",
- " df_design = df_design.copy()\n",
- " df_result = df_result.copy()\n",
- " \n",
- " for key in df_design.keys():\n",
- " if key != \"Class\":\n",
- " df_design[key] = np.vectorize(func_dict[key])(df_design[key])\n",
- " df_result[key] = np.vectorize(func_dict[key])(df_result[key])\n",
- " \n",
- " return df_result, df_design\n",
- "\n",
- "# Get minimum and maximum values for each column\n",
- "def get_min_max(df_design, df_result):\n",
- " \n",
- " min_vals_des = df_design.min()\n",
- " max_vals_des = df_design.max()\n",
- " \n",
- " min_vals_res = df_result.min()\n",
- " max_vals_res = df_result.max()\n",
- "\n",
- " # minimum of input and output data to get global minimum/maximum\n",
- " data_min = np.minimum(min_vals_des, min_vals_res).to_dict()\n",
- " data_max = np.maximum(max_vals_des, max_vals_res).to_dict()\n",
- "\n",
- " return data_min, data_max\n",
- "\n",
- "\n",
- "df_design_log, df_results_log = log_scale(df_design, df_results, func_dict_in)\n",
- "data_min_log, data_max_log = get_min_max(df_design_log, df_results_log)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 61,
- "metadata": {},
- "outputs": [],
- "source": [
- "def preprocess(data, func_dict, data_min, data_max):\n",
- " data = data.copy()\n",
- " for key in data.keys():\n",
- " if key != \"Class\":\n",
- " data[key] = (data[key] - data_min[key]) / (data_max[key] - data_min[key])\n",
- "\n",
- " return data\n",
- "\n",
- "def postprocess(data, func_dict, data_min, data_max):\n",
- " data = data.copy()\n",
- " for key in data.keys():\n",
- " if key != \"Class\":\n",
- " data[key] = data[key] * (data_max[key] - data_min[key]) + data_min[key]\n",
- " data[key] = np.vectorize(func_dict[key])(data[key])\n",
- " return data"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Preprocess the data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 62,
- "metadata": {},
- "outputs": [],
- "source": [
- "pp_design = preprocess(df_design_log, func_dict_in, data_min_log, data_max_log)\n",
- "pp_results = preprocess(df_results_log, func_dict_in, data_min_log, data_max_log)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Sample the data"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [],
- "source": [
- "X_train, X_test, y_train, y_test = sk.train_test_split(pp_design, pp_results, test_size = 0.2)\n",
- "X_train, X_val, y_train, y_val = sk.train_test_split(X_train, y_train, test_size = 0.1)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Custom Loss function"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def custom_loss_H20(df_design_log, df_result_log, data_min_log, data_max_log, func_dict_out, postprocess):\n",
- " df_result = postprocess(df_result_log, func_dict_out, data_min_log, data_max_log) \n",
- " return keras.losses.Huber + np.sum(((df_result['H'] / df_result['O']) - 2)**2)\n",
- "\n",
- "def loss_wrapper(data_min_log, data_max_log, func_dict_out, postprocess):\n",
- " def loss(df_design_log, df_result_log):\n",
- " return custom_loss_H20(df_design_log, df_result_log, data_min_log, data_max_log, func_dict_out, postprocess)\n",
- " return loss"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Train the model"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Epoch 1/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 2ms/step - loss: 0.0015 - val_loss: 1.2993e-06\n",
- "Epoch 2/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 1.3182e-06 - val_loss: 1.1714e-06\n",
- "Epoch 3/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 1.4322e-06 - val_loss: 1.4424e-06\n",
- "Epoch 4/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 1.1811e-06 - val_loss: 1.1027e-06\n",
- "Epoch 5/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 1.0509e-06 - val_loss: 1.1202e-06\n",
- "Epoch 6/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.9101e-07 - val_loss: 1.0344e-06\n",
- "Epoch 7/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 8.5978e-07 - val_loss: 1.0202e-06\n",
- "Epoch 8/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.6363e-07 - val_loss: 1.5508e-06\n",
- "Epoch 9/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 8.2612e-07 - val_loss: 1.0281e-06\n",
- "Epoch 10/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.8237e-07 - val_loss: 9.6918e-07\n",
- "Epoch 11/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.8727e-07 - val_loss: 9.8902e-07\n",
- "Epoch 12/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.2731e-07 - val_loss: 9.4628e-07\n",
- "Epoch 13/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 2ms/step - loss: 6.2018e-07 - val_loss: 1.0144e-06\n",
- "Epoch 14/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.0086e-07 - val_loss: 9.9860e-07\n",
- "Epoch 15/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.6483e-07 - val_loss: 9.5001e-07\n",
- "Epoch 16/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.8847e-07 - val_loss: 9.4421e-07\n",
- "Epoch 17/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.6030e-07 - val_loss: 9.3255e-07\n",
- "Epoch 18/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.4765e-07 - val_loss: 9.2782e-07\n",
- "Epoch 19/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 2ms/step - loss: 7.0107e-07 - val_loss: 9.2918e-07\n",
- "Epoch 20/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 5.7916e-07 - val_loss: 9.3070e-07\n",
- "Epoch 21/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.1965e-07 - val_loss: 9.3583e-07\n",
- "Epoch 22/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.1729e-07 - val_loss: 9.2800e-07\n",
- "Epoch 23/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 5.8376e-07 - val_loss: 9.2606e-07\n",
- "Epoch 24/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.1949e-07 - val_loss: 9.2550e-07\n",
- "Epoch 25/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.0228e-07 - val_loss: 9.2386e-07\n",
- "Epoch 26/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.4762e-07 - val_loss: 9.2222e-07\n",
- "Epoch 27/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.3545e-07 - val_loss: 9.2336e-07\n",
- "Epoch 28/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.1678e-07 - val_loss: 9.2510e-07\n",
- "Epoch 29/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.2552e-07 - val_loss: 9.2267e-07\n",
- "Epoch 30/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.7044e-07 - val_loss: 9.2244e-07\n",
- "Epoch 31/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.4412e-07 - val_loss: 9.2193e-07\n",
- "Epoch 32/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.9198e-07 - val_loss: 9.2181e-07\n",
- "Epoch 33/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 8.8825e-07 - val_loss: 9.2173e-07\n",
- "Epoch 34/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.1502e-07 - val_loss: 9.2309e-07\n",
- "Epoch 35/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.5551e-07 - val_loss: 9.2157e-07\n",
- "Epoch 36/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.3050e-07 - val_loss: 9.2172e-07\n",
- "Epoch 37/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.8292e-07 - val_loss: 9.2127e-07\n",
- "Epoch 38/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 5.7185e-07 - val_loss: 9.2111e-07\n",
- "Epoch 39/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.1807e-07 - val_loss: 9.2119e-07\n",
- "Epoch 40/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 5.7785e-07 - val_loss: 9.2112e-07\n",
- "Epoch 41/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.6563e-07 - val_loss: 9.2108e-07\n",
- "Epoch 42/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.1370e-07 - val_loss: 9.2109e-07\n",
- "Epoch 43/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.2470e-07 - val_loss: 9.2105e-07\n",
- "Epoch 44/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 2ms/step - loss: 7.2408e-07 - val_loss: 9.2102e-07\n",
- "Epoch 45/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 2ms/step - loss: 6.6530e-07 - val_loss: 9.2098e-07\n",
- "Epoch 46/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.7502e-07 - val_loss: 9.2098e-07\n",
- "Epoch 47/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.3829e-07 - val_loss: 9.2094e-07\n",
- "Epoch 48/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 5.8739e-07 - val_loss: 9.2096e-07\n",
- "Epoch 49/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 7.0502e-07 - val_loss: 9.2095e-07\n",
- "Epoch 50/50\n",
- "\u001b[1m3520/3520\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m6s\u001b[0m 2ms/step - loss: 6.5994e-07 - val_loss: 9.2094e-07\n",
- "Training took 317.1207675933838 seconds\n"
- ]
- }
- ],
- "source": [
- "# measure time\n",
- "start = time.time()\n",
- "\n",
- "history = model.fit(X_train, \n",
- " y_train, \n",
- " batch_size = batch_size, \n",
- " epochs = epochs, \n",
- " validation_data = (X_val, y_val)\n",
- ")\n",
- "\n",
- "end = time.time()\n",
- "\n",
- "print(\"Training took {} seconds\".format(end - start))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "plt.plot(history.history[\"loss\"], \"o-\", label = \"Training Loss\")\n",
- "plt.xlabel(\"Epoch\")\n",
- "# plt.yscale('log')\n",
- "plt.ylabel(\"Loss (Huber)\")\n",
- "plt.grid('on')\n",
- "\n",
- "plt.savefig(\"loss_all.png\", dpi=300)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "plt.plot(history.history[\"loss\"][1:], \"o-\", label = \"Training Loss\")\n",
- "plt.xlabel(\"Epoch\")\n",
- "# plt.yscale('log')\n",
- "plt.ylabel(\"Loss (Huber)\")\n",
- "plt.grid('on')\n",
- "plt.savefig(\"loss_1_to_end.png\", dpi=300)\n"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -978,29 +1088,86 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "\u001b[1m15641/15641\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 337us/step - loss: 7.0854e-07\n"
+ "\u001b[1m15641/15641\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 336us/step - loss: 6.6414e-07\n"
]
},
{
"data": {
"text/plain": [
- "6.561523377968115e-07"
+ "8.585521982240607e-07"
]
},
- "execution_count": 20,
+ "execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "model.evaluate(X_test, y_test)"
+ "# test on all test data\n",
+ "model.evaluate(X_test.iloc[:,:-1], y_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[1m15454/15454\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m5s\u001b[0m 331us/step - loss: 2.7927e-07\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "3.939527175589319e-07"
+ ]
+ },
+ "execution_count": 36,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# test on non-reactive data\n",
+ "model.evaluate(X_test[X_test['Class'] == 0].iloc[:,:-1], y_test[X_test['Class'] == 0])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[1m188/188\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 400us/step - loss: 3.3173e-05\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "3.921399184037e-05"
+ ]
+ },
+ "execution_count": 37,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# test on reactive data\n",
+ "model.evaluate(X_test[X_test['Class'] == 1].iloc[:,:-1], y_test[X_test['Class'] == 1])"
]
},
{