Commit 4e913ea3 authored by Billy Amélie's avatar Billy Amélie
Browse files

competition score + transfer learning modification + model retraining

parent 3f0a09d4
This source diff could not be displayed because it is too large. You can view the blob instead.
%% Cell type:markdown id: tags:
# CNN superposition + MLP
%% Cell type:markdown id: tags:
https://www.pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/
%% Cell type:code id: tags:
``` python
import numpy as np
import pandas as pd
import os
import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
```
%% Cell type:markdown id: tags:
## A - Preprocessing : Reading Data
%% Cell type:code id: tags:
``` python
os.chdir('../')
```
%% Cell type:code id: tags:
``` python
from preprocessing.read_load_data import read_data
input_directory='../osic-pulmonary-fibrosis-progression'
train_df, test_df, sample_df = read_data(input_directory)
train_df.head()
```
%%%% Output: execute_result
Patient Weeks FVC Percent Age Sex SmokingStatus
0 ID00007637202177411956430 -4 2315 58.253649 79 Male Ex-smoker
1 ID00007637202177411956430 5 2214 55.712129 79 Male Ex-smoker
2 ID00007637202177411956430 7 2061 51.862104 79 Male Ex-smoker
3 ID00007637202177411956430 9 2144 53.950679 79 Male Ex-smoker
4 ID00007637202177411956430 11 2069 52.063412 79 Male Ex-smoker
%% Cell type:markdown id: tags:
## B - Preprocessing : Loading Data
%% Cell type:code id: tags:
``` python
patients_train_ids= train_df.Patient.unique()
patient_test_list= test_df.Patient.unique()
patients_train_ids = [pat for pat in patients_train_ids]
```
%% Cell type:code id: tags:
``` python
from preprocessing.read_load_data import load_images
logging.info("loading attributes...")
df = pd.read_csv(f'{input_directory}/train.csv')
df = df.drop_duplicates(subset = 'Patient', keep='first')
patients_train_ids= df.Patient.unique().tolist()
df = df[df['Patient'].isin(patients_train_ids)]
logging.info("loading images...")
images = load_images(input_directory,
'train',
patients_train_ids,
option='superposition',
outputH = 240,
outputW = 240)
print("Array shape: ", images.shape)
#check value between -1,1
print('min value: ', np.amin(images))
print('max value: ', np.amax(images))
```
%% Cell type:markdown id: tags:
## C - Preprocessing : shuffle
%% Cell type:code id: tags:
``` python
from sklearn.model_selection import train_test_split
split = train_test_split(df, images, test_size=0.2, random_state=42)
(trainAttrX, testAttrX, trainImagesX, testImagesX) = split
```
%% Cell type:markdown id: tags:
## D - Preprocessing : Scaling + Encoding
%% Cell type:code id: tags:
``` python
from preprocessing.scale_data import scale_variable
sc, trainAttrX, testAttrX = scale_variable(trainAttrX, testAttrX,'FVC')
trainY = trainAttrX.loc[:,'FVC_scaled']
testY = testAttrX.loc[:,'FVC_scaled']
```
%% Cell type:code id: tags:
``` python
from preprocessing.scale_data import encode_variable
trainAttrX, testAttrX = encode_variable(trainAttrX, testAttrX,'Sex')
trainAttrX, testAttrX = encode_variable(trainAttrX, testAttrX,'SmokingStatus')
trainAttrX.drop(columns = ['Sex','SmokingStatus','FVC','FVC_scaled','Patient'], inplace = True)
testAttrX.drop(columns = ['Sex','SmokingStatus','FVC','FVC_scaled','Patient'], inplace = True)
```
%% Cell type:markdown id: tags:
## E - Processing : Create models
%% Cell type:code id: tags:
``` python
from processing.models import create_hybrid2
```
%% Cell type:code id: tags:
``` python
from processing.models import create_hybrid2
from keras.optimizers import Adam
model = create_hybrid2(trainAttrX.shape[1], shape = (240,240,4))
opt = Adam(lr=1e-3, decay=1e-3 / 200)
model.compile(loss="mean_absolute_percentage_error", optimizer=opt)
```
%% Cell type:code id: tags:
``` python
model.summary()
```
%% Cell type:code id: tags:
``` python
%%time
hist = model.fit(
x=[trainAttrX, trainImagesX], y=trainY,
validation_data=([testAttrX, testImagesX], testY),
epochs=10, batch_size=8)
```
%% Cell type:code id: tags:
``` python
from postprocessing.plot_history import plot_history
plot_history(hist)
```
%%%% Output: display_data
![]()
%% Cell type:markdown id: tags:
# F - Evaluation
%% Cell type:markdown id: tags:
### Training set
%% Cell type:code id: tags:
``` python
from postprocessing.evaluate import evaluate_hybrid, compute_score
```
%% Cell type:code id: tags:
``` python
preds = evaluate_hybrid(model, df, trainAttrX, trainImagesX, trainY, sc)
conf, score = compute_score(trainY,preds.flatten())
print('competition score :', score)
```
%% Cell type:code id: tags:
``` python
model.evaluate([trainAttrX, trainImagesX], trainY)
```
%%%% Output: execute_result
240.91358947753906
%% Cell type:markdown id: tags:
### Test set
%% Cell type:code id: tags:
``` python
preds = evaluate_hybrid(model, df, testAttrX, testImagesX, testY, sc)
conf, score = compute_score(testY,preds.flatten())
print('competition score :', score)
```
%% Cell type:code id: tags:
``` python
model.evaluate([testAttrX, testImagesX], testY)
```
%%%% Output: execute_result
199.35498046875
%% Cell type:code id: tags:
``` python
_a=model.predict([trainAttrX, trainImagesX])
```
%% Cell type:code id: tags:
``` python
q=0.5
a = np.quantile(_a, q)
```
%% Cell type:code id: tags:
``` python
_a[:,1]
```
%%%% Output: error
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-31-8ed85e029818> in <module>
----> 1 _a[:,1]
IndexError: index 1 is out of bounds for axis 1 with size 1
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
# G - Sample submission file
%% Cell type:code id: tags:
``` python
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import concatenate
from tensorflow.keras.layers import GaussianNoise
```
%% Cell type:code id: tags:
``` python
def create_mlp2(dim,regress = True):
model = Sequential()
model.add(GaussianNoise(0.2, input_dim=dim))
model.add(Dense(8, activation="relu"))
model.add(Dense(4, activation="relu"))
# add dense for regression
model.add(Dense(1, activation="linear"))
return model
```
%% Cell type:code id: tags:
``` python
```
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -143,24 +143,26 @@ def weightify(model_orig, custom_model, layer_modify,input_channel): ...@@ -143,24 +143,26 @@ def weightify(model_orig, custom_model, layer_modify,input_channel):
target_layer.trainable = False target_layer.trainable = False
return custom_model return custom_model
def create_transfer_learning(new_model, custom_model, modify_name,input_channel = 4): def create_transfer_learning(new_model, custom_model, modify_name,input_channel = 4, weights = False, hybrid = False):
# create cnn with transfer learning # create cnn with transfer learning
new = weightify(new_model,custom_model,modify_name,input_channel) new = weightify(new_model,custom_model,modify_name,input_channel)
x = new.output x = new.output
x = GlobalAveragePooling2D()(x) x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x) x = Dropout(0.5)(x)
#x = Dense(1)(x) if hybrid == False :
x = Dense(1, activation='linear')(x)
model = Model(new.input, x) model = Model(new.input, x)
for layer in new.layers: if weights == True:
layer.trainable = False for layer in new.layers:
layer.trainable = False
return model return model
def create_hybrid_transfer(nb_attributes,new_model, custom_model, modify_name,input_channel): def create_hybrid_transfer(nb_attributes,new_model, custom_model, modify_name,input_channel,weight = False):
# create cnn and mlp models # create cnn and mlp models
mlp = create_mlp(nb_attributes) mlp = create_mlp(nb_attributes)
cnn = create_transfer_learning(new_model, custom_model, modify_name,input_channel) cnn = create_transfer_learning(new_model, custom_model, modify_name,input_channel,weights = weight, hybrid = True)
combinedInput = concatenate([mlp.output, cnn.output]) combinedInput = concatenate([mlp.output, cnn.output])
x = Dense(4, activation="tanh")(combinedInput) x = Dense(4, activation="tanh")(combinedInput)
x = Dropout(0.5)(x) x = Dropout(0.5)(x)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment