Commit 3ec9d6c9 authored by Lafnoune Imane's avatar Lafnoune Imane
Browse files

mise à jour cnn injection transfer with weights

parent 5251b37f
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -24,7 +24,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
......@@ -44,7 +44,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
......@@ -53,7 +53,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
......@@ -150,7 +150,7 @@
"4 ID00007637202177411956430 11 2069 52.063412 79 Male Ex-smoker"
]
},
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
......@@ -172,7 +172,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -183,7 +183,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [
{
......@@ -229,7 +229,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
......@@ -313,102 +313,20 @@
" <td>Male</td>\n",
" <td>Never smoked</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1504</th>\n",
" <td>ID00419637202311204720264</td>\n",
" <td>6</td>\n",
" <td>3020</td>\n",
" <td>70.186855</td>\n",
" <td>73</td>\n",
" <td>Male</td>\n",
" <td>Ex-smoker</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1513</th>\n",
" <td>ID00421637202311550012437</td>\n",
" <td>15</td>\n",
" <td>2739</td>\n",
" <td>82.045291</td>\n",
" <td>68</td>\n",
" <td>Male</td>\n",
" <td>Ex-smoker</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1523</th>\n",
" <td>ID00422637202311677017371</td>\n",
" <td>6</td>\n",
" <td>1930</td>\n",
" <td>76.672493</td>\n",
" <td>73</td>\n",
" <td>Male</td>\n",
" <td>Ex-smoker</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1531</th>\n",
" <td>ID00423637202312137826377</td>\n",
" <td>17</td>\n",
" <td>3294</td>\n",
" <td>79.258903</td>\n",
" <td>72</td>\n",
" <td>Male</td>\n",
" <td>Ex-smoker</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1540</th>\n",
" <td>ID00426637202313170790466</td>\n",
" <td>0</td>\n",
" <td>2925</td>\n",
" <td>71.824968</td>\n",
" <td>73</td>\n",
" <td>Male</td>\n",
" <td>Never smoked</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>176 rows × 7 columns</p>\n",
"</div>"
],
"text/plain": [
" Patient Weeks FVC Percent Age Sex \\\n",
"0 ID00007637202177411956430 -4 2315 58.253649 79 Male \n",
"9 ID00009637202177434476278 8 3660 85.282878 69 Male \n",
"18 ID00010637202177584971671 0 3523 94.724672 60 Male \n",
"27 ID00011637202177653955184 6 3326 85.987590 72 Male \n",
"36 ID00012637202177665765362 33 3418 93.726006 65 Male \n",
"... ... ... ... ... ... ... \n",
"1504 ID00419637202311204720264 6 3020 70.186855 73 Male \n",
"1513 ID00421637202311550012437 15 2739 82.045291 68 Male \n",
"1523 ID00422637202311677017371 6 1930 76.672493 73 Male \n",
"1531 ID00423637202312137826377 17 3294 79.258903 72 Male \n",
"1540 ID00426637202313170790466 0 2925 71.824968 73 Male \n",
"\n",
" SmokingStatus \n",
"0 Ex-smoker \n",
"9 Ex-smoker \n",
"18 Ex-smoker \n",
"27 Ex-smoker \n",
"36 Never smoked \n",
"... ... \n",
"1504 Ex-smoker \n",
"1513 Ex-smoker \n",
"1523 Ex-smoker \n",
"1531 Ex-smoker \n",
"1540 Never smoked \n",
"\n",
"[176 rows x 7 columns]"
" Patient Weeks FVC Percent Age Sex SmokingStatus\n",
"0 ID00007637202177411956430 -4 2315 58.253649 79 Male Ex-smoker\n",
"9 ID00009637202177434476278 8 3660 85.282878 69 Male Ex-smoker\n",
"18 ID00010637202177584971671 0 3523 94.724672 60 Male Ex-smoker\n",
"27 ID00011637202177653955184 6 3326 85.987590 72 Male Ex-smoker\n",
"36 ID00012637202177665765362 33 3418 93.726006 65 Male Never smoked"
]
},
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
......@@ -426,7 +344,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
......@@ -445,7 +363,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
......@@ -465,7 +383,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
......@@ -489,7 +407,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [
{
......@@ -497,47 +415,15 @@
"output_type": "stream",
"text": [
"Epoch 1/20\n",
"18/18 [==============================] - 45s 914ms/step - loss: 99.7389 - val_loss: 100.4144\n",
"18/18 [==============================] - 15s 540ms/step - loss: 100.0941 - val_loss: 100.0653\n",
"Epoch 2/20\n",
"18/18 [==============================] - 13s 757ms/step - loss: 98.7043 - val_loss: 100.6795\n",
"18/18 [==============================] - 8s 436ms/step - loss: 99.7466 - val_loss: 100.3561\n",
"Epoch 3/20\n",
"18/18 [==============================] - 14s 813ms/step - loss: 99.9509 - val_loss: 100.7490\n",
"18/18 [==============================] - 9s 485ms/step - loss: 98.8366 - val_loss: 100.6308\n",
"Epoch 4/20\n",
"18/18 [==============================] - 11s 632ms/step - loss: 98.1777 - val_loss: 100.8061\n",
"18/18 [==============================] - 9s 528ms/step - loss: 98.3525 - val_loss: 100.7715\n",
"Epoch 5/20\n",
"18/18 [==============================] - 9s 498ms/step - loss: 99.5343 - val_loss: 100.7976\n",
"Epoch 6/20\n",
"18/18 [==============================] - 9s 504ms/step - loss: 99.6414 - val_loss: 100.7194\n",
"Epoch 7/20\n",
"18/18 [==============================] - 9s 482ms/step - loss: 98.7556 - val_loss: 100.6924\n",
"Epoch 8/20\n",
"18/18 [==============================] - 8s 469ms/step - loss: 99.1854 - val_loss: 100.7468\n",
"Epoch 9/20\n",
"18/18 [==============================] - 9s 509ms/step - loss: 98.0266 - val_loss: 100.8150\n",
"Epoch 10/20\n",
"18/18 [==============================] - 9s 528ms/step - loss: 98.6620 - val_loss: 100.6718\n",
"Epoch 11/20\n",
"18/18 [==============================] - 8s 474ms/step - loss: 97.1374 - val_loss: 100.6786\n",
"Epoch 12/20\n",
"18/18 [==============================] - 9s 494ms/step - loss: 99.4677 - val_loss: 100.6250\n",
"Epoch 13/20\n",
"18/18 [==============================] - 9s 491ms/step - loss: 99.0034 - val_loss: 100.6607\n",
"Epoch 14/20\n",
"18/18 [==============================] - 9s 477ms/step - loss: 98.6438 - val_loss: 100.6357\n",
"Epoch 15/20\n",
"18/18 [==============================] - 8s 469ms/step - loss: 99.4131 - val_loss: 100.5815\n",
"Epoch 16/20\n",
"18/18 [==============================] - 9s 500ms/step - loss: 98.8292 - val_loss: 100.6375\n",
"Epoch 17/20\n",
"18/18 [==============================] - 8s 448ms/step - loss: 99.5671 - val_loss: 100.6383\n",
"Epoch 18/20\n",
"18/18 [==============================] - 8s 452ms/step - loss: 99.8388 - val_loss: 100.7089\n",
"Epoch 19/20\n",
"18/18 [==============================] - 8s 452ms/step - loss: 97.6967 - val_loss: 100.8448\n",
"Epoch 20/20\n",
"18/18 [==============================] - 8s 448ms/step - loss: 96.8201 - val_loss: 100.8306\n",
"CPU times: user 17min 3s, sys: 59.7 s, total: 18min 3s\n",
"Wall time: 3min 53s\n"
"18/18 [==============================] - ETA: 0s - loss: 99.4422"
]
}
],
......
%% Cell type:markdown id: tags:
# CNN with transfer learning with no weights (efficientnet)
This notebook contains the configurations required to train an efficientnet model for K-folds.
It is possible to hit -0.6910 LB by tweaking parameters in this notebook!
https://www.kaggle.com/khoongweihao/k-fold-tf-efficientnet-models-training
%% Cell type:code id: tags:
``` python
import numpy as np
import pandas as pd
import os
import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
```
%% Cell type:markdown id: tags:
## A - Preprocessing : Reading Data
%% Cell type:code id: tags:
``` python
os.chdir('../')
```
%% Cell type:code id: tags:
``` python
from preprocessing.read_load_data import read_data
input_directory='../osic-pulmonary-fibrosis-progression'
train_df, test_df, sample_df = read_data(input_directory)
train_df.head()
```
%%%% Output: execute_result
Patient Weeks FVC Percent Age Sex SmokingStatus
0 ID00007637202177411956430 -4 2315 58.253649 79 Male Ex-smoker
1 ID00007637202177411956430 5 2214 55.712129 79 Male Ex-smoker
2 ID00007637202177411956430 7 2061 51.862104 79 Male Ex-smoker
3 ID00007637202177411956430 9 2144 53.950679 79 Male Ex-smoker
4 ID00007637202177411956430 11 2069 52.063412 79 Male Ex-smoker
%% Cell type:markdown id: tags:
## B - Preprocessing : Loading Data
%% Cell type:code id: tags:
``` python
patients_train_ids= train_df.Patient.unique()
patient_test_list= test_df.Patient.unique()
patients_train_ids = [pat for pat in patients_train_ids]
```
%% Cell type:code id: tags:
``` python
from preprocessing.read_load_data import load_images
logging.info("loading attributes...")
df = pd.read_csv(f'{input_directory}/train.csv')
df = df.drop_duplicates(subset = 'Patient', keep='first')
patients_train_ids= df.Patient.unique().tolist()
df = df[df['Patient'].isin(patients_train_ids)]
logging.info("loading images...")
images = load_images(input_directory,
'train',
patients_train_ids,
option='superposition',
outputH = 240,
outputW = 240)
print("Array shape: ", images.shape)
#check value between -1,1
print('min value: ', np.amin(images))
print('max value: ', np.amax(images))
```
%%%% Output: stream
INFO:loading attributes...
INFO:loading images...
%%%% Output: stream
Array shape: (176, 240, 240, 4)
min value: -0.1251496147096971
max value: 0.1692184837618322
%% Cell type:code id: tags:
``` python
df.head()
```
%%%% Output: execute_result
Patient Weeks FVC Percent Age Sex \
0 ID00007637202177411956430 -4 2315 58.253649 79 Male
9 ID00009637202177434476278 8 3660 85.282878 69 Male
18 ID00010637202177584971671 0 3523 94.724672 60 Male
27 ID00011637202177653955184 6 3326 85.987590 72 Male
36 ID00012637202177665765362 33 3418 93.726006 65 Male
... ... ... ... ... ... ...
1504 ID00419637202311204720264 6 3020 70.186855 73 Male
1513 ID00421637202311550012437 15 2739 82.045291 68 Male
1523 ID00422637202311677017371 6 1930 76.672493 73 Male
1531 ID00423637202312137826377 17 3294 79.258903 72 Male
1540 ID00426637202313170790466 0 2925 71.824968 73 Male
SmokingStatus
0 Ex-smoker
9 Ex-smoker
18 Ex-smoker
27 Ex-smoker
36 Never smoked
... ...
1504 Ex-smoker
1513 Ex-smoker
1523 Ex-smoker
1531 Ex-smoker
1540 Never smoked
[176 rows x 7 columns]
Patient Weeks FVC Percent Age Sex SmokingStatus
0 ID00007637202177411956430 -4 2315 58.253649 79 Male Ex-smoker
9 ID00009637202177434476278 8 3660 85.282878 69 Male Ex-smoker
18 ID00010637202177584971671 0 3523 94.724672 60 Male Ex-smoker
27 ID00011637202177653955184 6 3326 85.987590 72 Male Ex-smoker
36 ID00012637202177665765362 33 3418 93.726006 65 Male Never smoked
%% Cell type:markdown id: tags:
## C - Preprocessing : shuffle
%% Cell type:code id: tags:
``` python
from sklearn.model_selection import train_test_split
split = train_test_split(df, images, test_size=0.2, random_state=42)
(trainAttrX, testAttrX, trainImagesX, testImagesX) = split
```
%% Cell type:markdown id: tags:
## D - Preprocessing : Scaling + Encoding
%% Cell type:code id: tags:
``` python
from preprocessing.scale_data import scale_variable
sc, trainAttrX, testAttrX = scale_variable(trainAttrX, testAttrX,'FVC')
trainY = trainAttrX.loc[:,'FVC_scaled']
testY = testAttrX.loc[:,'FVC_scaled']
```
%% Cell type:markdown id: tags:
## E - Processing : Create models
%% Cell type:code id: tags:
``` python
from processing.models import create_transfer_learning
from keras.optimizers import Adam
from tensorflow.keras.models import Model
import efficientnet.tfkeras as efn
new_model =efn.EfficientNetB1(weights=None,include_top=False)
input_channel = 4
config = new_model.get_config()
config["layers"][0]["config"]["batch_input_shape"] = (None, 240, 240, input_channel)
modify_name = config["layers"][1]["config"]["name"]
custom_model = Model.from_config(config)
model = create_transfer_learning(new_model,custom_model,modify_name, input_channel)
opt = Adam(lr=1e-3, decay=1e-3 / 200)
model.compile(loss="mean_absolute_percentage_error", optimizer=opt)
```
%% Cell type:code id: tags:
``` python
%%time
hist = model.fit(x=trainImagesX, y=trainY,
validation_data=(testImagesX, testY),
epochs=20, batch_size=8)
```
%%%% Output: stream
Epoch 1/20
18/18 [==============================] - 45s 914ms/step - loss: 99.7389 - val_loss: 100.4144
18/18 [==============================] - 15s 540ms/step - loss: 100.0941 - val_loss: 100.0653
Epoch 2/20
18/18 [==============================] - 13s 757ms/step - loss: 98.7043 - val_loss: 100.6795
18/18 [==============================] - 8s 436ms/step - loss: 99.7466 - val_loss: 100.3561
Epoch 3/20
18/18 [==============================] - 14s 813ms/step - loss: 99.9509 - val_loss: 100.7490
18/18 [==============================] - 9s 485ms/step - loss: 98.8366 - val_loss: 100.6308
Epoch 4/20
18/18 [==============================] - 11s 632ms/step - loss: 98.1777 - val_loss: 100.8061
18/18 [==============================] - 9s 528ms/step - loss: 98.3525 - val_loss: 100.7715
Epoch 5/20
18/18 [==============================] - 9s 498ms/step - loss: 99.5343 - val_loss: 100.7976
Epoch 6/20
18/18 [==============================] - 9s 504ms/step - loss: 99.6414 - val_loss: 100.7194
Epoch 7/20
18/18 [==============================] - 9s 482ms/step - loss: 98.7556 - val_loss: 100.6924
Epoch 8/20
18/18 [==============================] - 8s 469ms/step - loss: 99.1854 - val_loss: 100.7468
Epoch 9/20
18/18 [==============================] - 9s 509ms/step - loss: 98.0266 - val_loss: 100.8150
Epoch 10/20
18/18 [==============================] - 9s 528ms/step - loss: 98.6620 - val_loss: 100.6718
Epoch 11/20
18/18 [==============================] - 8s 474ms/step - loss: 97.1374 - val_loss: 100.6786
Epoch 12/20
18/18 [==============================] - 9s 494ms/step - loss: 99.4677 - val_loss: 100.6250
Epoch 13/20
18/18 [==============================] - 9s 491ms/step - loss: 99.0034 - val_loss: 100.6607
Epoch 14/20
18/18 [==============================] - 9s 477ms/step - loss: 98.6438 - val_loss: 100.6357
Epoch 15/20
18/18 [==============================] - 8s 469ms/step - loss: 99.4131 - val_loss: 100.5815
Epoch 16/20
18/18 [==============================] - 9s 500ms/step - loss: 98.8292 - val_loss: 100.6375
Epoch 17/20
18/18 [==============================] - 8s 448ms/step - loss: 99.5671 - val_loss: 100.6383
Epoch 18/20
18/18 [==============================] - 8s 452ms/step - loss: 99.8388 - val_loss: 100.7089
Epoch 19/20
18/18 [==============================] - 8s 452ms/step - loss: 97.6967 - val_loss: 100.8448
Epoch 20/20
18/18 [==============================] - 8s 448ms/step - loss: 96.8201 - val_loss: 100.8306
CPU times: user 17min 3s, sys: 59.7 s, total: 18min 3s
Wall time: 3min 53s
18/18 [==============================] - ETA: 0s - loss: 99.4422
%% Cell type:code id: tags:
``` python
from postprocessing.plot_history import plot_history
plot_history(hist)
```
%%%% Output: display_data
[Hidden Image Output]
%% Cell type:markdown id: tags:
# F - Evaluation
%% Cell type:code id: tags:
``` python
from postprocessing.evaluate import evaluate_cnn, compute_score
```
%%%% Output: error
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-1-097459c72699> in <module>
----> 1 from postprocessing.evaluate import evaluate_cnn, compute_score
ModuleNotFoundError: No module named 'postprocessing'
%% Cell type:code id: tags:
``` python
preds = evaluate_cnn(model, df, trainImagesX, trainY, sc)
conf, score = compute_score(trainY,preds.flatten())
print('competition score :', score)
```
%%%% Output: stream
INFO:predicting ...
%%%% Output: stream
avg. FVC: 2771.744318181818, std FVC 835.5745106360505
mean difference : 26.66%, std: 27.72%
competition score : -4.610712300068822
%% Cell type:code id: tags:
``` python
model.evaluate(trainImagesX, trainY)
```
%%%% Output: stream
5/5 [==============================] - 8s 1s/step - loss: 98.6242
%%%% Output: execute_result
98.62422180175781
%% Cell type:markdown id: tags:
### Test set
%% Cell type:code id: tags:
``` python
preds = evaluate_cnn(model, df, testImagesX, testY, sc)
conf, score = compute_score(testY,preds.flatten())
print('competition score :', score)
```
%%%% Output: stream
INFO:predicting ...
%%%% Output: stream
avg. FVC: 2771.744318181818, std FVC 835.5745106360505
mean difference : 31.99%, std: 26.69%
competition score : -4.6117394238573475
%% Cell type:code id: tags:
``` python
model.evaluate(testImagesX, testY)
```
%%%% Output: stream
2/2 [==============================] - 2s 179ms/step - loss: 100.8306
%%%% Output: execute_result
100.83056640625
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment