Commit 2d21e0a8 authored by Lacoin Achille's avatar Lacoin Achille
Browse files

on repart sur des bases saine (bye mon week-end)

parent be5590b3
clear
clc
load('WhiteW.mat');
% 1// Prise en main des données
% Moyennes
moyX = mean(X);
% Variances
varX = var(X);
% Pour savoir les différentes catégories : unique(Y)
% Pour compter le nombre de 0 dans Y : sum(Y==0)
% On peut aussi utiliser hist(var)
hist(Y)
% On obtient 1640 '0' et 3258 '1'
boxplot(X) % moyenne, mediane, varicance ==> Stat univariée
imagesc(corrcoef(X)), colorbar, set(gca, 'fontsize', 20) %==> Stat bivariée
%% 2// Mise en place de la classification des données
% 2.1// Découpage des données
% Trois groupes : Apprentissage, Test et Validation
[xapp, yapp, xtest, ytest] = splitdata(X, Y, 1/3);
figure(1)
hist(yapp)
[xapp, yapp, xVal, yVal] = splitdata(xapp, yapp, 1/2);
figure(2)
hist(yVal)
% On obtient les mêmes proportions que dans l'échantillon de base
% 2.2// Méthode des k plus proches voisins
% IMPERATIF :: Choisir le K sur la validation jamais sur l'apprentissage
errVal = erreursPrediction(xapp,yapp,xVal,50,yVal)
figure(2)
plot(errVal)
% Dessine le coefficient de prédiction avéré en fonction du choix de k
[min,k] = min(errVal)
% On trouve l'erreur la plus basse avec K = 8
% On teste cette valeur de k sur l'ensemble de test
yValPred = kppv(xtest,xapp,yapp,k,[])
errValFin= mean(ytest ~= yValPred)
% On choisit k = 7, l'erreur est égale à 0.34
% 2.3// Influence de la normalisation des données
%
mapp = mean(xapp);
stdapp = std(xapp);
% On centre et on réduit les 3 groupes
[xappcr,xtestcr] = normalizemeanstd(xapp,xtest,mapp,stdapp);
[~,xvalcr] = normalizemeanstd(xapp,xVal,mapp,stdapp);
[~,kcr] = min(erreursPrediction(xappcr,yapp,xvalcr,50,yVal));
yValPredcr = kppv(xtestcr,xappcr,yapp,kcr,[]);
errValFincr= mean(ytest ~= yValPredcr);
function [errVal] = erreursPrediction(xapp,yapp,x,n,y)
MatDist=[];
errVal=1:1:n;
for k=1:n
[yValPred,MatDist] = kppv(x,xapp,yapp,k,MatDist);
errVal(k)= mean(y ~= yValPred);
end
end
\ No newline at end of file
function [YPeval, MatDist]=kppv(Xeval, Xapp, Yapp, k, MatDist)
% USAGE
% [YPeval,MatDist]=knn(Xeval, Xapp, Yapp, k, MatDist)
% Xeval : les donn�es dont on veut predire la classe. Xeval est une matrice
% de dimensions Ne x d (d: nombre de variables)
% Xapp : donn�es d'apprentissage (donn�es de r�f�rence), matrice Na x d
% Yapp : vect de taille Na contenant les labels des points dans Xapp
% MatDist : matrice de distance entre les points dans Xeval et ceux de
% Xapp. Matrice de dimensions Ne x Na. Si cette matrice n'existe pas il
% faut faire l'appel comme [YPeval,MatDist]=kppv(Xeval, Xapp, Yapp, k, [])
% YPeval : vecteur (taille Ne) des labels pr�dits pour les points de Xeval
% on d�termine combien de classes on a dans les donn�es
classcode=(unique(Yapp))';
nbclasse=length(classcode);
Ne=size(Xeval,1);
Na=size(Xapp,1);
YPeval=zeros(Ne,1);
% on calcule la matrice des distances entre les points Xeval et Xapp si
% l'appel a �t� fait avec MatDist = [] (matrice vide) sinon on ne fait rien
if nargin < 5 || isempty(MatDist) % on a deja calcule la matrice de distance
MatDist = zeros(Ne, Na);
for i=1:Ne;
for j=1:Na;
MatDist(i,j)=(Xeval(i,:)- Xapp(j,:))*(Xeval(i,:)- Xapp(j,:))';
end;
end;
end;
% Calcul de la pr�diction des labels des Ne points de Xeval par kppv
for i=1:Ne
[~,I]=sort(MatDist(i,:));
C=Yapp(I);
classeppv=C(1:k);
nc=0*ones(nbclasse,1);
for j=1:k;
ind=find(classcode==classeppv(j));
nc(ind)=nc(ind)+1;
end;
[~,aff]=max(nc);
YPeval(i)=classcode(aff);
end;
\ No newline at end of file
function [xapp,xtest,meanxapp,stdxapp] = normalizemeanstd(xapp,xtest,meanx,stdx)
% USAGE
%
% [xapp,xtest,meanxapp,stdxapp] = normalizemeanstd(xapp,xtest)
%
% normalize inputs and output mean and standard deviation to 0 and 1
%
%
tol=1e-5;
nbsuppress=0;
if nargin <3
meanxapp=mean(xapp);
stdxapp=std(xapp);
else
meanxapp=meanx;
stdxapp=stdx;
end;
nbxapp=size(xapp,1);
indzero=find(abs(stdxapp)<tol);
%keyboard
if ~isempty(indzero)
stdxapp(indzero)=1;
end;
nbvar=size(xapp,2);
xapp= (xapp - ones(nbxapp,1)*meanxapp)./ (ones(nbxapp,1)*stdxapp) ;
if nargin >1 & ~isempty(xtest)
nbxtest=size(xtest,1);
xtest= (xtest - ones(nbxtest,1)*meanxapp)./ (ones(nbxtest,1)*stdxapp );
else
xtest=[];
end;
\ No newline at end of file
function [xapp, yapp, xtest, ytest] = splitdata(x, y, ratio)
classcode = unique (y);
xapp = [];
yapp= [];
xtest = [];
ytest = [];
for numclass=1:length(classcode)
indclass = find(y==classcode(numclass));
Ni = length(indclass);
aux = randperm(Ni);
auxapp = aux(1: ceil(ratio*Ni));
auxtest = aux(ceil(ratio*Ni)+1:end);
xapp = [xapp; x(indclass(auxapp),:)];
yapp = [yapp; y(indclass(auxapp))];
xtest = [xtest; x(indclass(auxtest),:)];
ytest = [ytest; y(indclass(auxtest))];
end
= Test
:nofooter:
:icons: font
== Machin
* Hello
** Test
... mar
... fjsjk
WARNING: test
\ No newline at end of file
load("uspsasi.mat");
ind = find(y==2);
carac = x(ind,:);
nbImg = 3;
figure(1)
subplot(211)
imagesc(reshape(carac(nbImg, :), 16, 16)'); colormap(gray);
%imagesc(corrcoef(carac)), colorbar;
[valpropres, U, moy] = mypca(carac);
figure(2)
bar(valpropres/sum(valpropres));
title('Valeurs propres par ordre décroissant');
%dimension des données projetées
d=2;
P= U(:,1:d);
C = projpca(carac, moy, P);
figure(3)
plot(C(:,1),C(:,2),'ro')
index = 21;
xhat=reconstructpca(C(index,:),P,moy);
figure(1)
subplot(212)
imagesc(reshape(xhat(nbImg, :), 16, 16)'); colormap(gray);
load("Subset1YaleFaces.mat");
function [errVal] = erreursPrediction(xapp,yapp,x,n,y)
MatDist=[];
errVal=1:1:n;
for k=1:n
[yValPred,MatDist] = kppv(x,xapp,yapp,k,MatDist);
errVal(k)= mean(y ~= yValPred);
end
end
\ No newline at end of file
<!DOCTYPE html
PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<!--
This HTML was auto-generated from MATLAB code.
To make changes, update the MATLAB code and republish this document.
--><title>TP1_2</title><meta name="generator" content="MATLAB 9.3"><link rel="schema.DC" href="http://purl.org/dc/elements/1.1/"><meta name="DC.date" content="2018-09-10"><meta name="DC.source" content="TP1_2.m"><style type="text/css">
html,body,div,span,applet,object,iframe,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,del,dfn,em,font,img,ins,kbd,q,s,samp,small,strike,strong,sub,sup,tt,var,b,u,i,center,dl,dt,dd,ol,ul,li,fieldset,form,label,legend,table,caption,tbody,tfoot,thead,tr,th,td{margin:0;padding:0;border:0;outline:0;font-size:100%;vertical-align:baseline;background:transparent}body{line-height:1}ol,ul{list-style:none}blockquote,q{quotes:none}blockquote:before,blockquote:after,q:before,q:after{content:'';content:none}:focus{outine:0}ins{text-decoration:none}del{text-decoration:line-through}table{border-collapse:collapse;border-spacing:0}
html { min-height:100%; margin-bottom:1px; }
html body { height:100%; margin:0px; font-family:Arial, Helvetica, sans-serif; font-size:10px; color:#000; line-height:140%; background:#fff none; overflow-y:scroll; }
html body td { vertical-align:top; text-align:left; }
h1 { padding:0px; margin:0px 0px 25px; font-family:Arial, Helvetica, sans-serif; font-size:1.5em; color:#d55000; line-height:100%; font-weight:normal; }
h2 { padding:0px; margin:0px 0px 8px; font-family:Arial, Helvetica, sans-serif; font-size:1.2em; color:#000; font-weight:bold; line-height:140%; border-bottom:1px solid #d6d4d4; display:block; }
h3 { padding:0px; margin:0px 0px 5px; font-family:Arial, Helvetica, sans-serif; font-size:1.1em; color:#000; font-weight:bold; line-height:140%; }
a { color:#005fce; text-decoration:none; }
a:hover { color:#005fce; text-decoration:underline; }
a:visited { color:#004aa0; text-decoration:none; }
p { padding:0px; margin:0px 0px 20px; }
img { padding:0px; margin:0px 0px 20px; border:none; }
p img, pre img, tt img, li img, h1 img, h2 img { margin-bottom:0px; }
ul { padding:0px; margin:0px 0px 20px 23px; list-style:square; }
ul li { padding:0px; margin:0px 0px 7px 0px; }
ul li ul { padding:5px 0px 0px; margin:0px 0px 7px 23px; }
ul li ol li { list-style:decimal; }
ol { padding:0px; margin:0px 0px 20px 0px; list-style:decimal; }
ol li { padding:0px; margin:0px 0px 7px 23px; list-style-type:decimal; }
ol li ol { padding:5px 0px 0px; margin:0px 0px 7px 0px; }
ol li ol li { list-style-type:lower-alpha; }
ol li ul { padding-top:7px; }
ol li ul li { list-style:square; }
.content { font-size:1.2em; line-height:140%; padding: 20px; }
pre, code { font-size:12px; }
tt { font-size: 1.2em; }
pre { margin:0px 0px 20px; }
pre.codeinput { padding:10px; border:1px solid #d3d3d3; background:#f7f7f7; }
pre.codeoutput { padding:10px 11px; margin:0px 0px 20px; color:#4c4c4c; }
pre.error { color:red; }
@media print { pre.codeinput, pre.codeoutput { word-wrap:break-word; width:100%; } }
span.keyword { color:#0000FF }
span.comment { color:#228B22 }
span.string { color:#A020F0 }
span.untermstring { color:#B20000 }
span.syscmd { color:#B28C00 }
.footer { width:auto; padding:10px 0px; margin:25px 0px 0px; border-top:1px dotted #878787; font-size:0.8em; line-height:140%; font-style:italic; color:#878787; text-align:left; float:none; }
.footer p { margin:0px; }
.footer a { color:#878787; }
.footer a:hover { color:#878787; text-decoration:underline; }
.footer a:visited { color:#878787; }
table th { padding:7px 5px; text-align:left; vertical-align:middle; border: 1px solid #d6d4d4; font-weight:bold; }
table td { padding:7px 5px; text-align:left; vertical-align:top; border:1px solid #d6d4d4; }
</style></head><body><div class="content"><pre class="codeinput">ind = find(y==2);
carac = x(ind,:);
figure(1)
subplot(211)
imagesc(reshape(carac(198, :), 16, 16)'); colormap(gray);
<span class="comment">%imagesc(corrcoef(carac)), colorbar;</span>
[valpropres, U, moy] = mypca(carac);
figure(2)
bar(valpropres/sum(valpropres));
title(<span class="string">'Valeurs propres par ordre d&eacute;croissant'</span>);
<span class="comment">%dimension des donn&eacute;es projet&eacute;es</span>
d=2;
P= U(:,1:d);
C = projpca(carac, moy, P);
figure(3)
plot(C(:,1),C(:,2),<span class="string">'ro'</span>)
index = 21;
xhat=reconstructpca(C(index,:),P,moy);
figure(1)
subplot(212)
imagesc(reshape(xhat(198, :), 16, 16)'); colormap(gray);
</pre><pre class="codeoutput">Warning: Using only the real component of complex data.
</pre><img vspace="5" hspace="5" src="TP1_2_01.png" alt=""> <img vspace="5" hspace="5" src="TP1_2_02.png" alt=""> <img vspace="5" hspace="5" src="TP1_2_03.png" alt=""> <p class="footer"><br><a href="http://www.mathworks.com/products/matlab/">Published with MATLAB&reg; R2017b</a><br></p></div><!--
##### SOURCE BEGIN #####
ind = find(y==2);
carac = x(ind,:);
figure(1)
subplot(211)
imagesc(reshape(carac(198, :), 16, 16)'); colormap(gray);
%imagesc(corrcoef(carac)), colorbar;
[valpropres, U, moy] = mypca(carac);
figure(2)
bar(valpropres/sum(valpropres));
title('Valeurs propres par ordre décroissant');
%dimension des données projetées
d=2;
P= U(:,1:d);
C = projpca(carac, moy, P);
figure(3)
plot(C(:,1),C(:,2),'ro')
index = 21;
xhat=reconstructpca(C(index,:),P,moy);
figure(1)
subplot(212)
imagesc(reshape(xhat(198, :), 16, 16)'); colormap(gray);
##### SOURCE END #####
--></body></html>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment