{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "personal-marshall", "metadata": {}, "outputs": [], "source": [ "import numpy as np \n", "import os\n", "import pandas as pd \n", "from scipy.io import wavfile\n", "\n", "import librosa\n", "from tqdm import tqdm\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "\n", "import matplotlib.pyplot as plt\n", "from tensorflow.keras import regularizers, activations\n", "from tensorflow.keras.models import Sequential\n", "from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation, Conv2D, MaxPooling2D, GlobalAveragePooling2D\n", "from tensorflow.keras.utils import to_categorical\n", "\n", "from tensorflow.keras.preprocessing.image import ImageDataGenerator\n", "\n", "from datetime import datetime \n", "\n", "from matplotlib import pyplot as plt\n", "\n", "from sklearn.model_selection import train_test_split\n", "import cv2" ] }, { "cell_type": "code", "execution_count": 2, "id": "intimate-property", "metadata": {}, "outputs": [], "source": [ "us8k_df = pd.read_pickle(\"us8k_df.pkl\")" ] }, { "cell_type": "code", "execution_count": 3, "id": "closed-abraham", "metadata": {}, "outputs": [], "source": [ "df = us8k_df.drop(['fold'],axis=1)\n", "X = np.stack(df.melspectrogram.to_numpy())\n", "X_dim = (128,128,1)\n", "X = X.reshape(X.shape[0], *X_dim)\n", "Y = np.array(df['label'])\n", "Y = to_categorical(Y)" ] }, { "cell_type": "code", "execution_count": 4, "id": "controlling-overhead", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((8732, 10), (8732, 128, 128, 1))" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Y.shape, X.shape" ] }, { "cell_type": "code", "execution_count": 5, "id": "dying-cooking", "metadata": {}, "outputs": [], "source": [ "X_new = np.zeros((8732,128,128,3))\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "increasing-annotation", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(8732, 128, 128, 3)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_new.shape" ] }, { "cell_type": "code", "execution_count": 7, "id": "rubber-hanging", "metadata": {}, "outputs": [], "source": [ "for i in range(len(X)):\n", " X_new[i]=cv2.cvtColor(X[i], cv2.COLOR_GRAY2RGB)\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "after-driving", "metadata": {}, "outputs": [], "source": [ "X=X_new" ] }, { "cell_type": "code", "execution_count": 9, "id": "illegal-partner", "metadata": {}, "outputs": [], "source": [ "X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size=0.3,shuffle=True,stratify = Y)\n", "X_val, X_test, Y_val,Y_test = train_test_split(X_test,Y_test,test_size=0.5,shuffle=True,stratify = Y_test)" ] }, { "cell_type": "markdown", "id": "french-assembly", "metadata": {}, "source": [ "MOdèle utilisant mobile net " ] }, { "cell_type": "code", "execution_count": 10, "id": "geographic-diving", "metadata": {}, "outputs": [], "source": [ "preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input\n", "rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)" ] }, { "cell_type": "code", "execution_count": 11, "id": "pacific-registration", "metadata": {}, "outputs": [], "source": [ "IMG_SHAPE = (128,128,3)\n", "\n", "base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,\n", " include_top=False,\n", " weights='imagenet')" ] }, { "cell_type": "code", "execution_count": 12, "id": "included-mozambique", "metadata": {}, "outputs": [], "source": [ "prediction_layer = tf.keras.layers.Dense(10)\n", "global_average_layer = tf.keras.layers.GlobalAveragePooling2D()" ] }, { "cell_type": "code", "execution_count": 13, "id": "palestinian-crowd", "metadata": {}, "outputs": [], "source": [ "# model : MobileNet puis du dropout et une couche dense pour la prédiction\n", "inputs = tf.keras.Input(shape=IMG_SHAPE)\n", "x = preprocess_input(inputs)\n", "x = rescale(x)\n", "x = base_model(x, training=False)\n", "x = global_average_layer(x)\n", "x = tf.keras.layers.Dropout(0.2)(x)\n", "outputs = prediction_layer(x)\n", "model = tf.keras.Model(inputs, outputs)" ] }, { "cell_type": "code", "execution_count": 14, "id": "simple-rebecca", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"model\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "input_2 (InputLayer) [(None, 128, 128, 3)] 0 \n", "_________________________________________________________________\n", "tf.math.truediv (TFOpLambda) (None, 128, 128, 3) 0 \n", "_________________________________________________________________\n", "tf.math.subtract (TFOpLambda (None, 128, 128, 3) 0 \n", "_________________________________________________________________\n", "rescaling (Rescaling) (None, 128, 128, 3) 0 \n", "_________________________________________________________________\n", "mobilenetv2_1.00_128 (Functi (None, 4, 4, 1280) 2257984 \n", "_________________________________________________________________\n", "global_average_pooling2d (Gl (None, 1280) 0 \n", "_________________________________________________________________\n", "dropout (Dropout) (None, 1280) 0 \n", "_________________________________________________________________\n", "dense (Dense) (None, 10) 12810 \n", "=================================================================\n", "Total params: 2,270,794\n", "Trainable params: 2,236,682\n", "Non-trainable params: 34,112\n", "_________________________________________________________________\n" ] } ], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": 15, "id": "pacific-correction", "metadata": {}, "outputs": [], "source": [ "base_learning_rate = 0.001\n", "model.compile(optimizer=tf.keras.optimizers.Adam(lr=base_learning_rate),\n", " loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n", " metrics=['accuracy'])" ] }, { "cell_type": "code", "execution_count": 16, "id": "durable-straight", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "41/41 [==============================] - 5s 99ms/step - loss: 0.9276 - accuracy: 0.1113\n" ] } ], "source": [ "initial_epochs = 10\n", "num_batch_size = 32\n", "loss0, accuracy0 = model.evaluate(X_val,Y_val)" ] }, { "cell_type": "code", "execution_count": 17, "id": "victorian-hawaiian", "metadata": {}, "outputs": [], "source": [ "log_dir = \"logs/fit/\" + datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n", "tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = log_dir, histogram_freq = 1)\n", "save_best = tf.keras.callbacks.ModelCheckpoint(filepath = \"logs/checkpoints/\", save_weights_only = True,\n", " monitor = \"val_accuracy\", mode = \"max\", save_best_only = True)" ] }, { "cell_type": "code", "execution_count": null, "id": "adolescent-prerequisite", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "63/63 [==============================] - 37s 552ms/step - loss: 0.4256 - accuracy: 0.1145 - val_loss: 0.3422 - val_accuracy: 0.1133\n", "Epoch 2/10\n", " 4/63 [>.............................] - ETA: 39s - loss: 0.3885 - accuracy: 0.0859" ] } ], "source": [ "model_fit = model.fit(X_train[:2000],Y_train[0:2000], epochs=initial_epochs,validation_data=(X_val[0:300],Y_val[0:300]),batch_size=num_batch_size,callbacks = [tensorboard_callback, save_best])\n" ] }, { "cell_type": "code", "execution_count": null, "id": "civic-trinity", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.2 64-bit", "language": "python", "name": "python38264bit553a082d8987410db038174ed8b7851d" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 5 }