Commit 85e62c1d authored by Bannier Delphine's avatar Bannier Delphine
Browse files

preprocessing update

parent d60a3d60
......@@ -115,3 +115,31 @@ def load_images(input_directory,
images.append(outputImage)
return np.array(images)
def create_dataframe(df):
# new dataframe with one row per patient for training
train_data = []
patientlist = df.Patient.unique().tolist()
for patient in patientlist:
#select all data related to a patient
patientData = df[df['Patient'] == patient]
# save first measurements
firstMeasure = list(patientData.iloc[0, :].values)
#for ech measurement, add fist measurement and duration since first measurement
for i, week in enumerate(patientData['Weeks'].iloc[1:]):
fvc = patientData.iloc[i+1, 2]
trainDataPoint = firstMeasure + [week, fvc]
train_data.append(trainDataPoint)
training_df = pd.DataFrame(train_data)
training_df.columns = ['PatientID', 'First_week', 'First_FVC', 'First_Percent', 'Age', 'Sex', 'SmokingStatus'] + ['target_week', 'Target_FVC']
training_df['Delta_week'] = training_df['target_week'] - training_df['First_week']
#rearrange columns
training_df = training_df[['PatientID','Age','Sex','SmokingStatus', 'First_FVC', 'First_Percent','Delta_week','Target_FVC']]
return training_df
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment