Commit d60a3d60 authored by Bannier Delphine's avatar Bannier Delphine
Browse files

set up poetry dependencies

parent 5ba4bec5
# Projet_FR
Amelie
https://www.kaggle.com/c/osic-pulmonary-fibrosis-progression
## Set up the environment
Commandes bash pour créer un environnement virtuel, y installer les librairies nécessaires, et le package du projet :
```bash
#create virtual environment
python3 -m venv venvfr
source venvfr/bin/activate
pip install poetry
#install dependencies
cd filrouge/
make build
pip uninstall numpy #answer yes
pip install numpy==1.20.3
#register kernel
python -m ipykernel install --name venvfr
```
on peut alors appeler les fonctions depuis un notebook démarré avec le kernel de l'environnement virtuel:
```bash
from filrouge.preprocessing.read_load_data import *
```
\ No newline at end of file
install:
pip install poetry
poetry build
/opt/conda/bin/python -m pip install dist/filrouge-0.1.0.tar.gz
build:
poetry build
pip install dist/filrouge-0.1.0.tar.gz
__version__ = '0.1.0'
import logging
import numpy as np
import math
#### Fonctions à modifier !!!
def evaluate_hybrid(model,df, trainAttrX, trainImagesX, trainY,sc):
logging.info("predicting ...")
preds = model.predict([trainAttrX, trainImagesX])
diff = sc.inverse_transform(preds.flatten()) - sc.inverse_transform(trainY)
percentDiff = (diff / sc.inverse_transform(trainY)) * 100
absPercentDiff = np.abs(percentDiff)
mean = np.mean(absPercentDiff)
std = np.std(absPercentDiff)
print("avg. FVC: {}, std FVC {}".format(df["FVC"].mean(), df["FVC"].std()))
print("mean difference : {:.2f}%, std: {:.2f}%".format(mean, std))
return preds
def evaluate_cnn(model,df, trainImagesX, trainY,sc):
logging.info("predicting ...")
preds = model.predict(trainImagesX)
diff = sc.inverse_transform(preds.flatten()) - sc.inverse_transform(trainY)
percentDiff = (diff / sc.inverse_transform(trainY)) * 100
absPercentDiff = np.abs(percentDiff)
mean = np.mean(absPercentDiff)
std = np.std(absPercentDiff)
print("avg. FVC: {}, std FVC {}".format(df["FVC"].mean(), df["FVC"].std()))
print("mean difference : {:.2f}%, std: {:.2f}%".format(mean, std))
return preds
def evaluate_mlp(model,df, trainAttrX, trainY,sc):
logging.info("predicting ...")
preds = model.predict(trainAttrX)
diff = sc.inverse_transform(preds.flatten()) - sc.inverse_transform(trainY)
percentDiff = (diff / sc.inverse_transform(trainY)) * 100
absPercentDiff = np.abs(percentDiff)
mean = np.mean(absPercentDiff)
std = np.std(absPercentDiff)
print("avg. FVC: {}, std FVC {}".format(df["FVC"].mean(), df["FVC"].std()))
print("mean difference : {:.2f}%, std: {:.2f}%".format(mean, std))
return preds
def compute_score(y_true, y_pred):
sigma = ( y_true - y_pred ) #########
fvc_pred = y_pred
sigma_clip = np.maximum(sigma, 70)
delta = np.minimum(abs(y_true - fvc_pred),1000)
sq2 = math.sqrt(2)
metric = -(delta / sigma_clip)*sq2 - np.log(sigma_clip* sq2)
return (sigma, np.mean(metric))
\ No newline at end of file
import matplotlib.pyplot as plt
def plot_history(hist):
'''
`hist`: returned by model.fit()
'''
liste = list(hist.history.keys())
metrics = [i for i in liste if 'val' not in i]
for metric in metrics:
plt.plot(hist.history[f'{metric}'])
plt.plot(hist.history[f'val_{metric}'])
plt.title(f'model {metric}')
plt.ylabel(f'{metric}')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
\ No newline at end of file
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage.transform import resize
import os
import re
import pydicom
import glob
import pylibjpeg
import math
import logging
def read_data(input_directory:str):
test_df = pd.read_csv(f'{input_directory}/test.csv')
sample_sub= pd.read_csv(f'{input_directory}/sample_submission.csv')
train_df = pd.read_csv(f'{input_directory}/train.csv')
return train_df, test_df, sample_sub
def print_images_patient(input_directory, patient):
list_files= (([name for name in os.listdir(f'{input_directory}/train/{patient}') ]))
r1 = re.findall(r"\d+",str(list_files))
nbimages= len(list_files)
ncol = 5
nro = nbimages//ncol +1
im = 0
fig = plt.figure(figsize=(10,15))
for i in r1:
im+=1
plt.subplot(nro,ncol,im)
plt.title(f"Image n°{im}")
#fig=plt.figure(figsize=(3, 3))
filename = f'{input_directory}/train/{patient}/{i}.dcm'
ds = pydicom.dcmread(filename)
# fig.add_subplot(rows, columns, i)
plt.imshow(ds.pixel_array) #, cmap='gray'
#plt.subplots_adjust(hspace = 0,wspace=1)
fig.tight_layout()
plt.show()
def load_images(input_directory,
trainset,
list_patient_ids,
option = 'collage',
nb_im = 4,
outputW= 64,
outputH = 64,
):
'''
La fonction permet de créer une seule image par patient pour la donner au CNN,
L'image finale peut soit être un collage, soit une superposition des images du patient
`trainset`: 'train' ou 'test' en format string
`option` : 'collage' ou 'superposition' pour indiquer comment coller les images
`nb_im` : nombre d'images à prendre pour créer l'image finale
`outputW/H` : Width/height of the returned image
'''
# initialize our images array
images = []
ite = 0
nb_patient = len(list_patient_ids)
for num,i in enumerate(list_patient_ids):
#logging.info(f"Loading image for patient n°{num+1} out of {nb_patient}")
#fing all images of a given patient
basePath = f'{input_directory}/{trainset}/{i}/'
list_img=list(os.listdir(basePath))
list_img.sort(key=lambda s: int(s[:-4:]))
nb_img_patient = len(list_img)
# initialize our list of input images along with the output image
inputImages = []
#update image size for collage
if option =='collage':
nb_col = int(math.sqrt(nb_im)) #int permet d'avoir un integer
outputWbis,outputHbis = int(outputW/nb_col),int(outputH/nb_col)
#evite les erreurs si la racine n'est pas entiere:
outputW, outputH = int(outputWbis*nb_col),int(outputHbis*nb_col)
outputImage = np.zeros((outputH, outputW), dtype="float")
else:
outputWbis,outputHbis = int(outputW),int(outputH)
outputImage = np.zeros((outputH, outputW,nb_im), dtype="float")
#euclidean division : obtenir des images bien réparties dans le temps
steps = nb_img_patient//nb_im
nb = 0
for k in range(nb_im):
image = list_img[k*steps]
imagePath = basePath + f"{image}"
if nb <=nb_im:
image = pydicom.dcmread(imagePath)
data = image.pixel_array
image = resize(data, (outputWbis, outputHbis), anti_aliasing=True)
inputImages.append(image)
nb+=1
if option =='collage':
image = 0
for i in range(nb_col):
for j in range(nb_col):
start_h = i*outputHbis
end_h = (i+1)*outputHbis
start_L = j*outputWbis
end_L = (j+1)*outputWbis
outputImage[start_h:end_h, start_L:end_L] = inputImages[image]
image+=1
if option =='superposition':
for i in range(nb_im):
outputImage[:,:, i] = inputImages[i]
images.append(outputImage)
return np.array(images)
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from pandas.core.common import SettingWithCopyWarning
import warnings
import pandas as pd
import numpy as np
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
def scale_variable(trainAttrX, testAttrX, variable):
# FVC score scaling
sc = StandardScaler()
trainAttrX.loc[:,f'{variable}_scaled'] = sc.fit_transform(np.array(trainAttrX.loc[:,f'{variable}']).reshape(-1, 1))
testAttrX.loc[:,f'{variable}_scaled'] = sc.transform(np.array(testAttrX[f'{variable}']).reshape(-1, 1))
return sc,trainAttrX, testAttrX
def encode_variable(trainAttrX,testAttrX,variable):
#label encoding
le = LabelEncoder()
trainAttrX.loc[:,f'{variable}_le'] = le.fit_transform(trainAttrX.loc[:,f'{variable}'])
testAttrX.loc[:, f'{variable}_le'] = le.transform(testAttrX.loc[:,f'{variable}'])
return trainAttrX, testAttrX
%% Cell type:code id: tags:
``` python
import tensorflow as tf
```
%% Cell type:code id: tags:
``` python
import numpy as np
```
%% Cell type:code id: tags:
``` python
print(np.__version__)
```
%%%% Output: stream
1.19.5
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
import numpy as np
print(np.__version__)
```
%%%% Output: stream
1.19.5
%% Cell type:code id: tags:
``` python
```
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import concatenate
def create_cnn(width, height, depth, filters=(32, 64, 128), regress=False):
# initialize the input shape and channel dimension, assuming
# TensorFlow/channels-last ordering
inputShape = (height, width, depth)
chanDim = -1
# define the model input
inputs = Input(shape=inputShape)
# loop over the number of filters
for (i, f) in enumerate(filters):
# if this is the first CONV layer then set the input
# appropriately
if i == 0:
x = inputs
# CONV => RELU => BN => POOL
x = Conv2D(f, (3, 3), padding="same")(x)
x = Activation("relu")(x)
x = BatchNormalization(axis=chanDim)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
# flatten the volume, then FC => RELU => BN => DROPOUT
x = Flatten()(x)
x = Dense(16)(x)
x = Activation("relu")(x)
x = BatchNormalization(axis=chanDim)(x)
x = Dropout(0.5)(x)
# apply another FC layer, this one to match the number of nodes
# coming out of the MLP
x = Dense(4)(x)
x = Activation("relu")(x)
# check to see if the regression node should be added
if regress:
x = Dense(1, activation="linear")(x)
# construct the CNN
model = Model(inputs, x)
# return the CNN
return model
def create_mlp(dim, regress=True):
# define our MLP network
model = Sequential()
model.add(Dense(8, input_dim=dim, activation="relu"))
model.add(Dense(4, activation="relu"))
# add dense for regression
model.add(Dense(1, activation="linear"))
# return our model
return model
def create_hybrid(nb_attributes,shape=(240,240,1)):
# create cnn and mlp models
mlp = create_mlp(nb_attributes)
cnn = create_cnn(*shape)
combinedInput = concatenate([mlp.output, cnn.output])
x = Dense(4, activation="relu")(combinedInput)
x = Dense(1, activation="linear")(x)
model = Model(inputs=[mlp.input, cnn.input], outputs=x)
return model
\ No newline at end of file
This diff is collapsed.
[tool.poetry]
name = "filrouge"
version = "0.1.0"
description = ""
authors = ["Delphine Bannier <delphine.bannier@insa-rouen.fr>"]
[tool.poetry.dependencies]
python = "^3.8"
scikit-image = "^0.18.1"
pandas = "^1.2.4"
pydicom = "^2.1.2"
pylibjpeg = "^1.3.0"
tensorflow = "^2.5.0"
pylibjpeg-libjpeg = "^1.2.0"
[tool.poetry.dev-dependencies]
pytest = "^5.2"
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment