Commit c9f6290b authored by Bannier Delphine's avatar Bannier Delphine
Browse files

set up kfold

parent 20c0e823
......@@ -11,7 +11,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"https://www.pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/"
"https://www.pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/\n",
"\n",
"https://www.kaggle.com/franklemuchahary/basic-cnn-keras-with-cross-validation"
]
},
{
......@@ -312,13 +314,6 @@
"print(len(train_dataset),len(test_dataset))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -335,6 +330,9 @@
"from preprocessing.scale_data import scale_variable\n",
"\n",
"sc, trainAttrX, testAttrX = scale_variable(trainAttrX, testAttrX,'Target_FVC')\n",
"sc1, trainAttrX, testAttrX = scale_variable(trainAttrX, testAttrX,'First_FVC')\n",
"sc2, trainAttrX, testAttrX = scale_variable(trainAttrX, testAttrX,'Age')\n",
"\n",
"trainY = trainAttrX.loc[:,'Target_FVC_scaled']\n",
"testY = testAttrX.loc[:,'Target_FVC_scaled']"
]
......@@ -350,8 +348,11 @@
"trainAttrX, testAttrX = encode_variable(trainAttrX, testAttrX,'Sex')\n",
"trainAttrX, testAttrX = encode_variable(trainAttrX, testAttrX,'SmokingStatus')\n",
"\n",
"trainAttrX.drop(columns = ['Sex','SmokingStatus','Target_FVC','Target_FVC_scaled','PatientID'], inplace = True)\n",
"testAttrX.drop(columns = ['Sex','SmokingStatus','Target_FVC','Target_FVC_scaled','PatientID'], inplace = True)"
"for dft in [trainAttrX,testAttrX]:\n",
" dft.drop(columns = ['Sex','SmokingStatus','Target_FVC','Target_FVC_scaled',\n",
" 'PatientID','First_FVC','Age'], inplace = True)\n",
" dft.loc[:,'First_Percent'] = dft.loc[:,'First_Percent']/100\n",
" dft.loc[:,'Delta_week'] = dft.loc[:,'Delta_week']/133"
]
},
{
......@@ -380,10 +381,10 @@
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Age</th>\n",
" <th>First_FVC</th>\n",
" <th>First_Percent</th>\n",
" <th>Delta_week</th>\n",
" <th>First_FVC_scaled</th>\n",
" <th>Age_scaled</th>\n",
" <th>Sex_le</th>\n",
" <th>SmokingStatus_le</th>\n",
" </tr>\n",
......@@ -391,46 +392,46 @@
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>79</td>\n",
" <td>2315</td>\n",
" <td>58.253649</td>\n",
" <td>9</td>\n",
" <td>0.582536</td>\n",
" <td>0.067669</td>\n",
" <td>-0.631784</td>\n",
" <td>1.684379</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>79</td>\n",
" <td>2315</td>\n",
" <td>58.253649</td>\n",
" <td>11</td>\n",
" <td>0.582536</td>\n",
" <td>0.082707</td>\n",
" <td>-0.631784</td>\n",
" <td>1.684379</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>79</td>\n",
" <td>2315</td>\n",
" <td>58.253649</td>\n",
" <td>13</td>\n",
" <td>0.582536</td>\n",
" <td>0.097744</td>\n",
" <td>-0.631784</td>\n",
" <td>1.684379</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>79</td>\n",
" <td>2315</td>\n",
" <td>58.253649</td>\n",
" <td>15</td>\n",
" <td>0.582536</td>\n",
" <td>0.112782</td>\n",
" <td>-0.631784</td>\n",
" <td>1.684379</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>79</td>\n",
" <td>2315</td>\n",
" <td>58.253649</td>\n",
" <td>21</td>\n",
" <td>0.582536</td>\n",
" <td>0.157895</td>\n",
" <td>-0.631784</td>\n",
" <td>1.684379</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
......@@ -439,12 +440,19 @@
"</div>"
],
"text/plain": [
" Age First_FVC First_Percent Delta_week Sex_le SmokingStatus_le\n",
"0 79 2315 58.253649 9 1 1\n",
"1 79 2315 58.253649 11 1 1\n",
"2 79 2315 58.253649 13 1 1\n",
"3 79 2315 58.253649 15 1 1\n",
"4 79 2315 58.253649 21 1 1"
" First_Percent Delta_week First_FVC_scaled Age_scaled Sex_le \\\n",
"0 0.582536 0.067669 -0.631784 1.684379 1 \n",
"1 0.582536 0.082707 -0.631784 1.684379 1 \n",
"2 0.582536 0.097744 -0.631784 1.684379 1 \n",
"3 0.582536 0.112782 -0.631784 1.684379 1 \n",
"4 0.582536 0.157895 -0.631784 1.684379 1 \n",
"\n",
" SmokingStatus_le \n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 "
]
},
"execution_count": 12,
......@@ -560,67 +568,251 @@
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping\n",
"\n",
"#set early stopping criteria\n",
"pat = 5 #this is the number of epochs with no improvment after which the training will stop\n",
"es = EarlyStopping(monitor='val_loss', patience=pat, verbose=1)\n",
"\n",
"#define the model checkpoint callback -> this will keep on saving the model as a physical file\n",
"cp = ModelCheckpoint('superposition_injection.h5', verbose=1, save_best_only=True)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"def custom_shuffle_split(trainAttrX,train_dataset,trainY,test_size = 0.1 ):\n",
" cut = int(len(trainY)*test_size)\n",
" arr = list(np.arange(len(trainY)))\n",
" np.random.shuffle(arr)\n",
" trainidx = arr[cut:]\n",
" testidx = arr[:cut]\n",
" train_x, train_y = [trainAttrX.iloc[trainidx], train_dataset[trainidx]] , trainY[trainidx]\n",
" val_x, val_y = [trainAttrX.iloc[testidx], train_dataset[testidx]] , trainY[testidx]\n",
" return train_x, val_x, train_y, val_y"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/50\n",
" 84/137 [=================>............] - ETA: 27s - loss: 198187.9375"
"Training on Fold: 1\n",
"Epoch 1/30\n",
"111/111 [==============================] - ETA: 0s - loss: 316.5363\n",
"Epoch 00001: val_loss improved from inf to 107.96893, saving model to superposition_injection.h5\n",
"111/111 [==============================] - 56s 503ms/step - loss: 316.5363 - val_loss: 107.9689\n",
"Epoch 2/30\n",
"111/111 [==============================] - ETA: 0s - loss: 197.5600\n",
"Epoch 00002: val_loss improved from 107.96893 to 100.10335, saving model to superposition_injection.h5\n",
"111/111 [==============================] - 55s 495ms/step - loss: 197.5600 - val_loss: 100.1033\n",
"Epoch 3/30\n",
"111/111 [==============================] - ETA: 0s - loss: 220.9793\n",
"Epoch 00003: val_loss improved from 100.10335 to 99.90191, saving model to superposition_injection.h5\n",
"111/111 [==============================] - 54s 485ms/step - loss: 220.9793 - val_loss: 99.9019\n",
"Epoch 4/30\n",
"111/111 [==============================] - ETA: 0s - loss: 126.4834\n",
"Epoch 00004: val_loss did not improve from 99.90191\n",
"111/111 [==============================] - 55s 498ms/step - loss: 126.4834 - val_loss: 100.0706\n",
"Epoch 5/30\n",
"111/111 [==============================] - ETA: 0s - loss: 130.7720\n",
"Epoch 00005: val_loss improved from 99.90191 to 99.70051, saving model to superposition_injection.h5\n",
"111/111 [==============================] - 54s 485ms/step - loss: 130.7720 - val_loss: 99.7005\n",
"Epoch 6/30\n",
"111/111 [==============================] - ETA: 0s - loss: 164.4418\n",
"Epoch 00006: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 54s 483ms/step - loss: 164.4418 - val_loss: 100.6682\n",
"Epoch 7/30\n",
"111/111 [==============================] - ETA: 0s - loss: 119.5080\n",
"Epoch 00007: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 55s 497ms/step - loss: 119.5080 - val_loss: 101.0239\n",
"Epoch 8/30\n",
"111/111 [==============================] - ETA: 0s - loss: 151.3392\n",
"Epoch 00008: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 56s 505ms/step - loss: 151.3392 - val_loss: 100.1579\n",
"Epoch 9/30\n",
"111/111 [==============================] - ETA: 0s - loss: 105.6256\n",
"Epoch 00009: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 55s 492ms/step - loss: 105.6256 - val_loss: 100.2133\n",
"Epoch 10/30\n",
"111/111 [==============================] - ETA: 0s - loss: 114.8794\n",
"Epoch 00010: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 54s 491ms/step - loss: 114.8794 - val_loss: 100.3191\n",
"Epoch 00010: early stopping\n",
"4/4 [==============================] - 1s 200ms/step - loss: 100.1156\n",
"Val Score: 100.11563110351562\n",
"====================================================================================\n",
"\n",
"\n",
"Training on Fold: 2\n",
"Epoch 1/30\n",
"111/111 [==============================] - ETA: 0s - loss: 168.3844\n",
"Epoch 00001: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 55s 494ms/step - loss: 168.3844 - val_loss: 107.7039\n",
"Epoch 2/30\n",
"111/111 [==============================] - ETA: 0s - loss: 111.5110\n",
"Epoch 00002: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 56s 507ms/step - loss: 111.5110 - val_loss: 109.4569\n",
"Epoch 3/30\n",
"111/111 [==============================] - ETA: 0s - loss: 114.4467\n",
"Epoch 00003: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 56s 505ms/step - loss: 114.4467 - val_loss: 101.8173\n",
"Epoch 4/30\n",
"111/111 [==============================] - ETA: 0s - loss: 112.2941\n",
"Epoch 00004: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 55s 492ms/step - loss: 112.2941 - val_loss: 109.3902\n",
"Epoch 5/30\n",
"111/111 [==============================] - ETA: 0s - loss: 112.2740\n",
"Epoch 00005: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 56s 503ms/step - loss: 112.2740 - val_loss: 101.4342\n",
"Epoch 6/30\n",
"111/111 [==============================] - ETA: 0s - loss: 111.4760\n",
"Epoch 00006: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 55s 500ms/step - loss: 111.4760 - val_loss: 99.8436\n",
"Epoch 7/30\n",
"111/111 [==============================] - ETA: 0s - loss: 110.4311\n",
"Epoch 00007: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 56s 503ms/step - loss: 110.4311 - val_loss: 110.9042\n",
"Epoch 8/30\n",
"111/111 [==============================] - ETA: 0s - loss: 107.2995\n",
"Epoch 00008: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 58s 520ms/step - loss: 107.2995 - val_loss: 100.7832\n",
"Epoch 9/30\n",
"111/111 [==============================] - ETA: 0s - loss: 103.8186\n",
"Epoch 00009: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 56s 500ms/step - loss: 103.8186 - val_loss: 103.2870\n",
"Epoch 10/30\n",
"111/111 [==============================] - ETA: 0s - loss: 101.8159\n",
"Epoch 00010: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 54s 489ms/step - loss: 101.8159 - val_loss: 100.8220\n",
"Epoch 11/30\n",
"111/111 [==============================] - ETA: 0s - loss: 104.4015\n",
"Epoch 00011: val_loss did not improve from 99.70051\n",
"111/111 [==============================] - 54s 483ms/step - loss: 104.4015 - val_loss: 110.9032\n",
"Epoch 00011: early stopping\n",
"4/4 [==============================] - 1s 219ms/step - loss: 108.3460\n",
"Val Score: 108.34602355957031\n",
"====================================================================================\n",
"\n",
"\n"
]
},
}
],
"source": [
"from processing.models import fit_and_evaluate\n",
"n_folds=2\n",
"epochs=30\n",
"batch_size=8\n",
"\n",
"\n",
"#save the model history in a list after fitting so that we can plot later\n",
"model_history = [] \n",
"\n",
"for i in range(n_folds):\n",
" print(\"Training on Fold: \",i+1)\n",
" model = None\n",
" model = create_hybrid(trainAttrX.shape[1], shape = (240,240,1))\n",
" model.compile(loss=\"mean_absolute_percentage_error\", optimizer=opt)\n",
" t_x, val_x, t_y, val_y = custom_shuffle_split(trainAttrX,train_dataset,trainY,test_size = 0.1) \n",
" model_history.append(fit_and_evaluate(t_x, val_x, t_y, val_y, epochs, batch_size,model,es,cp))\n",
" print(\"=======\"*12, end=\"\\n\\n\\n\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<timed exec>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36m_method_wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_method_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_in_multi_worker_mode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 66\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 67\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0;31m# Running inside `run_distribute_coordinator` already.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 846\u001b[0m batch_size=batch_size):\n\u001b[1;32m 847\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_train_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m \u001b[0mtmp_logs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 849\u001b[0m \u001b[0;31m# Catch OutOfRangeError for Datasets of unknown size.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 850\u001b[0m \u001b[0;31m# This blocks until the batch has finished executing.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 578\u001b[0m \u001b[0mxla_context\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 579\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 580\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 581\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 582\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 609\u001b[0m \u001b[0;31m# In this case we have created variables on the first call, so we run the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 610\u001b[0m \u001b[0;31m# defunned version which is guaranteed to never create variables.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 611\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=not-callable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 612\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 613\u001b[0m \u001b[0;31m# Release the lock early so that multiple threads can perform the call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 2418\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2419\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2420\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# pylint: disable=protected-access\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2422\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[0;34m(self, args, kwargs)\u001b[0m\n\u001b[1;32m 1659\u001b[0m \u001b[0;31m`\u001b[0m\u001b[0margs\u001b[0m\u001b[0;31m`\u001b[0m \u001b[0;32mand\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m`\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;31m`\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1660\u001b[0m \"\"\"\n\u001b[0;32m-> 1661\u001b[0;31m return self._call_flat(\n\u001b[0m\u001b[1;32m 1662\u001b[0m (t for t in nest.flatten((args, kwargs), expand_composites=True)\n\u001b[1;32m 1663\u001b[0m if isinstance(t, (ops.Tensor,\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m 1743\u001b[0m and executing_eagerly):\n\u001b[1;32m 1744\u001b[0m \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1745\u001b[0;31m return self._build_call_outputs(self._inference_function.call(\n\u001b[0m\u001b[1;32m 1746\u001b[0m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[1;32m 1747\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m 591\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0m_InterpolateFunctionError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 592\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcancellation_manager\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 593\u001b[0;31m outputs = execute.execute(\n\u001b[0m\u001b[1;32m 594\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msignature\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 595\u001b[0m \u001b[0mnum_outputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_num_outputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/anaconda3/lib/python3.8/site-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0m\u001b[1;32m 60\u001b[0m inputs, attrs, num_outputs)\n\u001b[1;32m 61\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"%%time\n",
"hist = model.fit(\n",
" x=[trainAttrX, train_dataset], y=trainY,\n",
" validation_data=([testAttrX, test_dataset], testY),\n",
" epochs=50, batch_size=8)"
"import matplotlib.pyplot as plt \n",
"\n",
"plt.title('Loss vs Epochs')\n",
"plt.plot(model_history[0].history['loss'], label='Training Fold 1')\n",
"plt.plot(model_history[1].history['loss'], label='Training Fold 2')\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 21,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'hist' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-16-17f56577a9c3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpostprocessing\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot_history\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mplot_history\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mplot_history\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mhist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'hist' is not defined"
]
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"from postprocessing.plot_history import plot_history\n",
"plt.title('Validation loss vs Epochs')\n",
"plt.plot(model_history[0].history['val_loss'], label='Training Fold 1')\n",
"plt.plot(model_history[1].history['val_loss'], label='Training Fold 2')\n",
"plt.legend()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"from keras.models import load_model\n",
"\n",
"plot_history(hist)"
"model = load_model('superposition_injection.h5')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
......@@ -637,24 +829,59 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 25,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:predicting ...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"avg. FVC: 2690.479018721756, std FVC 832.7709592986739\n",
"mean difference : 27.58%, std: 29.55%\n",
"competition score : -4.610789567859322\n"
]
}
],
"source": [
"from postprocessing.evaluate import evaluate_hybrid, compute_score\n",
"\n",
"preds = evaluate_hybrid(model, df, trainAttrX, trainImagesX, trainY, sc)\n",
"preds = evaluate_hybrid(model, df, trainAttrX, train_dataset, trainY, sc)\n",
"conf, score = compute_score(trainY,preds.flatten())\n",
"print('competition score :', score)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 26,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"35/35 [==============================] - 11s 314ms/step - loss: 101.1096\n"
]
},
{
"data": {
"text/plain": [
"101.10960388183594"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.evaluate([trainAttrX, trainImagesX], trainY)"
"model.evaluate([trainAttrX, train_dataset], trainY)"
]
},
{
......@@ -666,22 +893,57 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 27,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:predicting ...\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"avg. FVC: 2690.479018721756, std FVC 832.7709592986739\n",
"mean difference : 34.22%, std: 31.98%\n",
"competition score : -4.61193964179478\n"
]
}
],
"source": [
"preds = evaluate_hybrid(model, df, testAttrX, testImagesX, testY, sc)\n",
"preds = evaluate_hybrid(model, df, testAttrX, test_dataset, testY, sc)\n",
"conf, score = compute_score(testY,preds.flatten())\n",
"print('competition score :', score)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 28,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"9/9 [==============================] - 3s 279ms/step - loss: 100.0515\n"
]
},
{
"data": {
"text/plain": [
"100.05147552490234"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.evaluate([testAttrX, testImagesX], testY)"
"model.evaluate([testAttrX, test_dataset], testY)"
]
},
{
......
%% Cell type:markdown id: tags:
# CNN collage + MLP
%% Cell type:markdown id: tags:
https://www.pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/
https://www.kaggle.com/franklemuchahary/basic-cnn-keras-with-cross-validation
%% Cell type:code id: tags:
``` python
import numpy as np
import pandas as pd
import os
import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
```
%% Cell type:markdown id: tags:
## A - Preprocessing : Reading Data
%% Cell type:code id: tags:
``` python
os.chdir('../')
```
%% Cell type:code id: tags:
``` python
from preprocessing.read_load_data import read_data
input_directory='../osic-pulmonary-fibrosis-progression'
train_df, test_df, sample_df = read_data(input_directory)
train_df.head()
```
%%%% Output: execute_result
Patient Weeks FVC Percent Age Sex SmokingStatus
0 ID00007637202177411956430 -4 2315 58.253649 79 Male Ex-smoker
1 ID00007637202177411956430 5 2214 55.712129 79 Male Ex-smoker
2 ID00007637202177411956430 7 2061 51.862104 79 Male Ex-smoker
3 ID00007637202177411956430 9 2144 53.950679 79 Male Ex-smoker
4 ID00007637202177411956430 11 2069 52.063412 79 Male Ex-smoker
%% Cell type:markdown id: tags:
## B - Preprocessing : Loading Data
%% Cell type:code id: tags:
``` python
patients_train_ids= train_df.Patient.unique()
patient_test_list= test_df.Patient.unique()
patients_train_ids = [pat for pat in patients_train_ids]
```
%% Cell type:code id: tags:
``` python
from preprocessing.read_load_data import load_images
logging.info("loading attributes...")
df = pd.read_csv(f'{input_directory}/train.csv')
patients_train_ids= df.Patient.unique().tolist()
logging.info("loading images...")
images = load_images(input_directory,
'train',
patients_train_ids,
option='collage',
outputH = 240,
outputW = 240)