Commit f6a46d22 authored by Lisa Casino's avatar Lisa Casino
Browse files

add fichier loading

parent b3bc45b1
%% Cell type:code id:tough-exposure tags:
``` python
import numpy as np
import os
import pandas as pd
from scipy.io import wavfile
import librosa
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers, activations
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.utils import np_utils, to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from datetime import datetime
from matplotlib import pyplot as plt
```
%% Cell type:code id:polished-tongue tags:
``` python
us8k_df = pd.read_pickle("us8k_df.pkl")
```
%% Cell type:code id:elementary-congress tags:
``` python
def init_model():
model1 = Sequential()
#layer-1
model1.add(Conv2D(filters=24, kernel_size=5, input_shape=(128, 128, 1),
kernel_regularizer=regularizers.l2(1e-3)))
model1.add(MaxPooling2D(pool_size=(3,3), strides=3))
model1.add(Activation(activations.relu))
#layer-2
model1.add(Conv2D(filters=36, kernel_size=4, padding='valid', kernel_regularizer=regularizers.l2(1e-3)))
model1.add(MaxPooling2D(pool_size=(2,2), strides=2))
model1.add(Activation(activations.relu))
#layer-3
model1.add(Conv2D(filters=48, kernel_size=3, padding='valid'))
model1.add(Activation(activations.relu))
model1.add(GlobalAveragePooling2D())
#layer-4 (1st dense layer)
model1.add(Dense(60, activation='relu'))
model1.add(Dropout(0.5))
#layer-5 (2nd dense layer)
model1.add(Dense(10, activation='softmax'))
# compile
model1.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
return model1
```
%% Cell type:code id:afraid-louisville tags:
``` python
model = init_model()
model.summary()
```
%%%% Output: stream
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 124, 124, 24) 624
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 41, 41, 24) 0
_________________________________________________________________
activation (Activation) (None, 41, 41, 24) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 38, 38, 36) 13860
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 19, 19, 36) 0
_________________________________________________________________
activation_1 (Activation) (None, 19, 19, 36) 0
_________________________________________________________________
conv2d_2 (Conv2D) (None, 17, 17, 48) 15600
_________________________________________________________________
activation_2 (Activation) (None, 17, 17, 48) 0
_________________________________________________________________
global_average_pooling2d (Gl (None, 48) 0
_________________________________________________________________
dense (Dense) (None, 60) 2940
_________________________________________________________________
dropout (Dropout) (None, 60) 0
_________________________________________________________________
dense_1 (Dense) (None, 10) 610
=================================================================
Total params: 33,634
Trainable params: 33,634
Non-trainable params: 0
_________________________________________________________________
%% Cell type:code id:altered-vermont tags:
``` python
def train_test_split(fold_k, data, X_dim=(128, 128, 1)):
X_train = np.stack(data[data.fold != fold_k].melspectrogram.to_numpy())
X_test = np.stack(data[data.fold == fold_k].melspectrogram.to_numpy())
y_train = data[data.fold != fold_k].label.to_numpy()
y_test = data[data.fold == fold_k].label.to_numpy()
XX_train = X_train.reshape(X_train.shape[0], *X_dim)
XX_test = X_test.reshape(X_test.shape[0], *X_dim)
yy_train = to_categorical(y_train)
yy_test = to_categorical(y_test)
return XX_train, XX_test, yy_train, yy_test
```
%% Cell type:code id:phantom-strap tags:
``` python
def process_fold(fold_k, data, epochs=100, num_batch_size=32):
# split the data
X_train, X_test, y_train, y_test = train_test_split(fold_k, data)
# init data augmention
#train_datagen, val_datagen = init_data_aug()
# fit augmentation
#train_datagen.fit(X_train)
#val_datagen.fit(X_train)
# init model
model = init_model()
# pre-training accuracy
score = model.evaluate(X_test, y_test, batch_size=num_batch_size, verbose=0)
print("Pre-training accuracy: %.4f%%\n" % (100 * score[1]))
# train the model
start = datetime.now()
history = model.fit(X_train,y_train, epochs=epochs,validation_data=(X_test,y_test),batch_size=num_batch_size)
end = datetime.now()
print("Training completed in time: ", end - start, '\n')
return history
```
%% Cell type:code id:raised-puppy tags:
``` python
def show_results(tot_history):
"""Show accuracy and loss graphs for train and test sets."""
for i, history in enumerate(tot_history):
print('\n({})'.format(i+1))
plt.figure(figsize=(15,5))
plt.subplot(121)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.grid(linestyle='--')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.subplot(122)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.grid(linestyle='--')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
print('\tMax validation accuracy: %.4f %%' % (np.max(history.history['val_accuracy']) * 100))
print('\tMin validation loss: %.5f' % np.min(history.history['val_loss']))
```
%% Cell type:code id:legislative-prior tags:
``` python
FOLD_K = 1
history1 = []
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%%%% Output: stream
--------------------------------------------------------------------------------
(1)
Pre-training accuracy: 13.6312%
Epoch 1/10
246/246 [==============================] - 46s 185ms/step - loss: 2.1745 - accuracy: 0.1996 - val_loss: 1.7348 - val_accuracy: 0.3253
Epoch 2/10
246/246 [==============================] - 44s 178ms/step - loss: 1.8258 - accuracy: 0.3251 - val_loss: 1.5448 - val_accuracy: 0.4948
Epoch 3/10
246/246 [==============================] - 44s 180ms/step - loss: 1.6410 - accuracy: 0.3967 - val_loss: 1.3963 - val_accuracy: 0.5155
Epoch 4/10
246/246 [==============================] - 44s 180ms/step - loss: 1.4471 - accuracy: 0.4709 - val_loss: 1.1977 - val_accuracy: 0.4960
Epoch 5/10
246/246 [==============================] - 45s 182ms/step - loss: 1.3507 - accuracy: 0.5222 - val_loss: 1.1505 - val_accuracy: 0.6632
Epoch 6/10
246/246 [==============================] - 48s 195ms/step - loss: 1.2840 - accuracy: 0.5477 - val_loss: 1.1121 - val_accuracy: 0.5750
Epoch 7/10
246/246 [==============================] - 50s 203ms/step - loss: 1.2254 - accuracy: 0.5791 - val_loss: 1.1266 - val_accuracy: 0.6037
Epoch 8/10
246/246 [==============================] - 48s 195ms/step - loss: 1.1647 - accuracy: 0.5888 - val_loss: 1.0520 - val_accuracy: 0.6942
Epoch 9/10
246/246 [==============================] - 45s 185ms/step - loss: 1.1141 - accuracy: 0.6174 - val_loss: 1.0825 - val_accuracy: 0.7171
Epoch 10/10
246/246 [==============================] - 45s 184ms/step - loss: 1.0420 - accuracy: 0.6379 - val_loss: 1.1577 - val_accuracy: 0.6506
Training completed in time: 0:07:40.769112
%% Cell type:code id:retained-greenhouse tags:
``` python
FOLD_K = 2
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%%%% Output: stream
--------------------------------------------------------------------------------
(2)
%%%% Output: error
---------------------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-1-c639b5d8c6f7> in <module>
4 print("\n({})\n".format(FOLD_K))
5
----> 6 history = process_fold(FOLD_K, us8k_df, epochs=10)
7 history1.append(history)
NameError: name 'process_fold' is not defined
%% Cell type:code id:exotic-institution tags:
``` python
FOLD_K = 3
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%% Cell type:code id:behavioral-worker tags:
``` python
FOLD_K = 4
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%% Cell type:code id:binding-relation tags:
``` python
FOLD_K = 5
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%% Cell type:code id:front-apparel tags:
``` python
FOLD_K = 6
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%% Cell type:code id:elder-saint tags:
``` python
FOLD_K = 7
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%% Cell type:code id:major-subject tags:
``` python
FOLD_K = 8
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%% Cell type:code id:threaded-wallace tags:
``` python
FOLD_K = 9
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%% Cell type:code id:nonprofit-neighborhood tags:
``` python
FOLD_K = 10
print('-'*80)
print("\n({})\n".format(FOLD_K))
history = process_fold(FOLD_K, us8k_df, epochs=10)
history1.append(history)
```
%% Cell type:code id:electric-colorado tags:
``` python
```
......@@ -3,7 +3,7 @@
{
"cell_type": "code",
"execution_count": 1,
"id": "powerful-episode",
"id": "naked-arabic",
"metadata": {},
"outputs": [],
"source": [
......@@ -32,7 +32,7 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "green-moisture",
"id": "stopped-livestock",
"metadata": {},
"outputs": [],
"source": [
......@@ -43,7 +43,7 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "charged-thong",
"id": "located-formation",
"metadata": {},
"outputs": [],
"source": [
......@@ -53,7 +53,7 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "individual-obligation",
"id": "entire-safety",
"metadata": {},
"outputs": [
{
......@@ -256,7 +256,7 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "accurate-charleston",
"id": "seeing-opportunity",
"metadata": {},
"outputs": [
{
......@@ -278,7 +278,7 @@
},
{
"cell_type": "markdown",
"id": "intimate-deficit",
"id": "competitive-semiconductor",
"metadata": {},
"source": [
"Nous avons donc 10 classes dans le dataset qui sont : air_conditioner', 'car_horn', 'children_playing', 'dog_bark','drilling', 'engine_idling', 'gun_shot', 'jackhammer', 'siren','street_music'\n"
......@@ -287,7 +287,7 @@
{
"cell_type": "code",
"execution_count": 6,
"id": "pleased-mother",
"id": "congressional-lancaster",
"metadata": {},
"outputs": [
{
......@@ -317,7 +317,7 @@
},
{
"cell_type": "markdown",
"id": "adolescent-optimum",
"id": "separated-drilling",
"metadata": {},
"source": [
"On remarque donc que les classes sont équilibrées sauf pour car_horn et gun_shot qui beaucoup moins d'échantillons. Nous pourrons donc regarder par la suite si ces 2 classes sont moins bien classifiées. "
......@@ -326,7 +326,7 @@
{
"cell_type": "code",
"execution_count": 7,
"id": "therapeutic-classroom",
"id": "handy-bankruptcy",
"metadata": {},
"outputs": [
{
......@@ -461,7 +461,7 @@
{
"cell_type": "code",
"execution_count": 8,
"id": "personalized-convenience",
"id": "ceramic-depth",
"metadata": {},
"outputs": [],
"source": [
......@@ -500,7 +500,7 @@
{
"cell_type": "code",
"execution_count": 9,
"id": "significant-proposition",
"id": "infrared-block",
"metadata": {},
"outputs": [
{
......@@ -540,7 +540,7 @@
{
"cell_type": "code",
"execution_count": 10,
"id": "renewable-movement",
"id": "victorian-filter",
"metadata": {},
"outputs": [],
"source": [
......@@ -554,7 +554,7 @@
{
"cell_type": "code",
"execution_count": 11,
"id": "hungry-fields",
"id": "pediatric-interest",
"metadata": {},
"outputs": [],
"source": [
......@@ -564,7 +564,7 @@
{
"cell_type": "code",
"execution_count": 12,
"id": "incomplete-jacob",
"id": "unknown-basement",
"metadata": {},
"outputs": [
{
......@@ -597,7 +597,7 @@
{
"cell_type": "code",
"execution_count": 2,
"id": "extensive-technician",
"id": "convenient-royal",
"metadata": {},
"outputs": [],
"source": [
......@@ -607,7 +607,7 @@
{
"cell_type": "code",
"execution_count": 3,
"id": "detailed-parish",
"id": "trying-somalia",
"metadata": {},
"outputs": [],
"source": [
......@@ -648,7 +648,7 @@
{
"cell_type": "code",
"execution_count": 4,
"id": "female-reason",
"id": "african-forum",
"metadata": {},
"outputs": [
{
......@@ -698,7 +698,7 @@
{
"cell_type": "code",
"execution_count": 5,
"id": "worth-ultimate",
"id": "dietary-wagon",
"metadata": {},
"outputs": [],
"source": [
......@@ -721,7 +721,7 @@
{
"cell_type": "code",
"execution_count": 14,
"id": "otherwise-patrol",
"id": "reverse-shoulder",
"metadata": {},
"outputs": [],
"source": [
......@@ -755,7 +755,7 @@
{
"cell_type": "code",
"execution_count": 15,
"id": "settled-payroll",
"id": "sticky-secondary",
"metadata": {},
"outputs": [],
"source": [
......@@ -792,7 +792,7 @@
{
"cell_type": "code",
"execution_count": 16,
"id": "changing-african",
"id": "suited-cologne",
"metadata": {},
"outputs": [
{
......@@ -942,7 +942,7 @@
{
"cell_type": "code",
"execution_count": 18,
"id": "lyric-payment",
"id": "coated-logic",
"metadata": {},
"outputs": [
{
......@@ -988,7 +988,7 @@
{
"cell_type": "code",
"execution_count": null,
"id": "italic-compensation",
"id": "minus-coordinator",
"metadata": {},
"outputs": [],
"source": []
%% Cell type:code id:tough-exposure tags:
``` python
import numpy as np
import os
import pandas as pd
from scipy.io import wavfile
import librosa
from tqdm import tqdm
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers, activations
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.utils import np_utils, to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from datetime import datetime
from matplotlib import pyplot as plt
```
%% Cell type:code id:polished-tongue tags:
``` python
us8k_df = pd.read_pickle("us8k_df.pkl")
```
%% Cell type:code id:elementary-congress tags:
``` python
def init_model():
model1 = Sequential()
#layer-1
model1.add(Conv2D(filters=24, kernel_size=5, input_shape=(128, 128, 1),
kernel_regularizer=regularizers.l2(1e-3)))
model1.add(MaxPooling2D(pool_size=(3,3), strides=3))
model1.add(Activation(activations.relu))
#layer-2
model1.add(Conv2D(filters=36, kernel_size=4, padding='valid', kernel_regularizer=regularizers.l2(1e-3)))
model1.add(MaxPooling2D(pool_size=(2,2), strides=2))
model1.add(Activation(activations.relu))
#layer-3
model1.add(Conv2D(filters=48, kernel_size=3, padding='valid'))
model1.add(Activation(activations.relu))
model1.add(GlobalAveragePooling2D())
#layer-4 (1st dense layer)
model1.add(Dense(60, activation='relu'))
model1.add(Dropout(0.5))
#layer-5 (2nd dense layer)
model1.add(Dense(10, activation='softmax'))
# compile
model1.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
return model1
```
%% Cell type:code id:afraid-louisville tags: