Commit beafbe66 authored by Lacoin Achille's avatar Lacoin Achille
Browse files

ajout du TP3 - Lundi Fin TP

parent 8d21b815
X = load("george.dat");
figure, plot(X(:,1), X(:,2), 'o', 'markersize', 8, 'markerfacecolor', 'r')
hold on
title('George','fontsize', 14)
ltlX = mydownsampling(X, 10);
M = distance(ltlX);
%%
method='complete';
level_max = aggclust(M, method);
figure
dendro(level_max);
title('Clustering avec distance max', 'fontsize', 24)
K=2; % nombre de clusters voulus
% recuperation des indices des points de chaque cluster
N = size(ltlX, 1);
clusters=level_max(N-K+1).cluster;
% trace des clusters obtenus
plotclusters(clusters, X, K);
title('Clustering avec distance max', 'fontsize', 24)
\ No newline at end of file
X = load("george.dat");
figure, plot(X(:,1), X(:,2), 'ro', 'markersize', 8, 'markerfacecolor', 'r')
hold on
title('George','fontsize', 14)
ltlX = mydownsampling()
\ No newline at end of file
%%% les données
vmu1 = [0 0]';
vmu2 = [3 3]';
SIGMA = eye(2);
n = 50; % nombre de points par classe
% generation de donnees aleatoires suivant lois gaussiennes
X1 = mvnrnd(vmu1,SIGMA,n); % classe 1
X2 = mvnrnd(vmu2,SIGMA,n); % classe 2
figure, plot(X1(:,1), X1(:,2), 'ro', 'markersize', 8, 'markerfacecolor', 'r')
hold on
plot(X2(:,1), X2(:,2), 'bv', 'markersize', 8, 'markerfacecolor', 'b')
title('La vérité vraie','fontsize', 14)
% on concatene les données (on suppose qu'on ne sait pas quel point est de la classe 1 ou 2
% Dans la suite on ne travaillera qu'avec X1
X = [X1; X2]; clear X1 X2
\ No newline at end of file
function level = aggclust(distance, method)
% AGGCLUST Hierarchical (agglomerative) clustering
% Usage: level = aggclust(distance, method)
%
% distance: 2D distance matrix of data points, with diagonal elements
% of "INF"
% method: "single" for single-linkage
% "complete" for complete-linkage
% level: data structure for a hierarchical clustering result
% level(i).distance: distance matrix at level i
% level(i).height: the minimum distance measure to form level i
% level(i).merged: the two clusters (of level i-1) being merged to form
% level i
% level(i).cluster{j}: a vector denotes the data points in j-th cluster
% of level i
%
% Type "aggclust" to see a demo of a hierarchical clustering
% cluster(single-linkage) of 50 random patterns of dimensionality 2.
%
% See also DENDRO, LINKCLU.
% Roger Jang, 981027
if nargin == 0, selfdemo; return; end
if nargin < 2, method = 'complete'; end
data_n = size(distance, 1);
level(1).distance = distance;
level(1).height = 0;
level(1).merged = [];
for i = 1:data_n,
level(1).cluster{i} = [i];
end
for i = 2:data_n,
level(i) = merge(level(i-1), method);
end
% ====== Merge clusters
function level_out = merge(level, method)
% MERGE Merge a level of n clusters into n-1 clusters
cluster_n = length(level.cluster);
[min_i, min_j, min_value] = minxy(level.distance);
% Reorder to have min_i < min_j
if min_i > min_j,
temp = min_i;
min_i = min_j;
min_j = temp;
end
level_out = level;
% Update height
level_out.height = min_value;
% Update merged cluster
level_out.merged = [min_i min_j];
% Update cluster
level_out.cluster{min_i} = [level_out.cluster{min_i} level_out.cluster{min_j}];
level_out.cluster(min_j) = []; % delete cluster{min_j}
%keyboard
% New distance matrix
distance2 = level.distance;
% "min" for single-linkage; "max" for complete-linkage
if strcmp(method, 'single'),
distance2(:, min_i) = min(distance2(:, min_i), distance2(:, min_j));
distance2(min_i, :) = min(distance2(min_i, :), distance2(min_j, :));
elseif strcmp(method, 'complete'),
distance2(:, min_i) = max(distance2(:, min_i), distance2(:, min_j));
distance2(min_i, :) = max(distance2(min_i, :), distance2(min_j, :));
else
error('Unsupported method in AGGCLUST!');
end
distance2(min_j, :) = [];
distance2(:, min_j) = [];
distance2(min_i, min_i) = inf;
level_out.distance = distance2;
% ====== Find the minimum value in a matrix
function [i, j, min_value] = minxy(A)
[value_row, index_row] = min(A);
[min_value, j] = min(value_row);
i = index_row(j);
% ====== Self demo ======
function selfdemo
data_n = 50;
dimension = 2;
points = rand(data_n, dimension);
for i = 1:data_n,
for j = 1:data_n,
distance(i, j) = norm(points(i,:)-points(j,:));
end
end
% Diagonal elements should always be inf.
for i = 1:data_n, distance(i, i) = inf; end
level = aggclust(distance);
% Plot heights w.r.t. levels
figure;
plot([level.height], 'r:o');
xlabel('Level');
ylabel('Height');
title('Height vs. level');
% Plot the dendrogram
figure;
dendro(level);
% View the formation of clusters
figure;
linkclu(points, distance, level);
function dendro(level)
% DENDRO Dendrogrma plot for the result from hierarchical clustering.
%
% Usage: dendro(level)
% level: the hierarchical clustering output from AGGCLUST
%
% Type "dendro" to see a demo of a hierarchical clustering
% (single-linkage) of 50 random patterns of dimensionality 2.
%
% See also AGGCLUST, HCLUSTDM.
% Roger Jang, 981027
if nargin == 0, selfdemo; return, end
set(gca, 'xticklabel', []);
xticklabel = level(end).cluster{1};
data_n = length(level);
axis([1, data_n, 0, level(end).height]);
xlabel('Data index');
ylabel('Distance');
title('Dendrogram');
for i=1:data_n,
h = text(i, 0, num2str(level(end).cluster{1}(i)));
set(h, 'rot', 90, 'fontsize', 8, 'hori', 'right');
end
% Generate necessary information for plotting dendrogram
% cap_center is the leg position for future cluster
cap_center(xticklabel) = 1:data_n;
levelinfo(1).cap_center = cap_center;
% cap_height is the height for each cap
levelinfo(1).cap_height = zeros(1, data_n);
for i = 2:data_n,
m = level(i).merged(1);
n = level(i).merged(2);
% Find cap_center
levelinfo(i).cap_center = levelinfo(i-1).cap_center;
levelinfo(i).cap_center(m) = ...
(levelinfo(i).cap_center(m)+levelinfo(i).cap_center(n))/2;
levelinfo(i).cap_center(n) = [];
% Find cap_height
levelinfo(i).cap_height = levelinfo(i-1).cap_height;
levelinfo(i).cap_height(m) = level(i).height;
levelinfo(i).cap_height(n) = [];
end
% Plot caps for the dendrogram
center = 1:data_n; % center for each cluster
for i = 2:data_n,
height = level(i).height;
m = level(i).merged(1);
n = level(i).merged(2);
cluster1 = level(i-1).cluster{m};
cluster2 = level(i-1).cluster{n};
left_point = cluster1(end);
right_point = cluster2(1);
left = find(xticklabel==left_point);
right = find(xticklabel==right_point);
left_x = levelinfo(i-1).cap_center(m);
left_y = levelinfo(i-1).cap_height(m);
right_x = levelinfo(i-1).cap_center(n);
right_y = levelinfo(i-1).cap_height(n);
line([left_x left_x], [left_y, height]);
line([right_x right_x], [right_y, height]);
line([left_x right_x], [height, height]);
end
% Plot level lines for the dendrogram
%for i = 1:data_n,
% line([1 data_n], [level(i).height level(i).height], ...
% 'color', 'c', 'linestyle', ':');
%end
% ====== Self demo ======
function selfdemo
data_n = 50;
dimension = 2;
points = rand(data_n, dimension);
% Compute the distance matrix
for i = 1:data_n,
for j = 1:data_n,
distance(i, j) = norm(points(i,:)-points(j,:));
end
end
% Diagonal elements should always be inf.
for i = 1:data_n, distance(i, i) = inf; end
level = aggclust(distance);
% Plot the dendrogram
figure;
dendro(level);
fprintf('The figure is a dendrogram (single-linkage) of 50 random points in 2D\n');
function M = distance(X)
% Calcul de la matrice de Distance pour l'algo CHA
N = size(X,1);
M = X*X';
ps = diag(M);
M = ps*ones(1,N)+ones(N,1)*ps' - 2*M;
% mettre d(x_i, x_i) = infini
for i=1:N
M(i,i) = inf;
end
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
function [C,clusters,JWiter] = kmoyennes(X,K,Co)
C = Co;
MaxIter = 15;
JWiter = zeros(MaxIter,1);
iter = 1;
while (iter <= MaxIter)
[clusters, Jw] = affectation(X,C,K);
JWiter(iter) = Jw;
C = nouveaux_centres(X,clusters);
end
end
function datalight = mydownsampling(data, step)
[ndata, dim] = size(data);
assert(step >=1, 'the downsampling step should be greater than 1')
Nlight = ceil(ndata/step);
datalight = zeros(Nlight, dim);
ind = randperm(ndata);
for i=1:Nlight
datalight(i,:) = data(ind(i), :);
end
\ No newline at end of file
function eff_clusters = plotclusters(clusters, X, K)
% cluster : rsultat de clustering
% X : donnees
% K nombre de clusters voulus
d = size(X, 2);
if d ~=2
error('Affichage disponible uniquement en 2D')
end
couleur = 'rbgmckyrbg';
symbole = 'ovpsh>xd<+';
eff_clusters = zeros(K,1);
figure;
for i=1:K
if iscell(clusters)
ii=clusters{i};
else
ii = find(clusters==i);
end
eff_clusters(i) = length(ii);
plot(X(ii,1), X(ii,2), [couleur(i), symbole(i)], 'markersize', 8, 'markerfacecolor', couleur(i));
%plot(X(ii,1), X(ii,2), [couleur(i), symbole(i)], 'markersize', 8);
hold on ;
end;
title('Rsultat clustering','fontsize', 14)
\ No newline at end of file
% ========================================================
% Utilisation de AGGCLUST
% ========================================================
close all;
clear all;
clc
%%% les donnees
vmu1 = [0 0]';
vmu2 = [3 3]';
SIGMA = eye(2);
n = 50; % nombre de points par cluster
% generation de donnees aleatoires suivant lois gaussiennes
X1 = mvnrnd(vmu1,SIGMA,n); % classe 1
X2 = mvnrnd(vmu2,SIGMA,n); % classe 2
figure, plot(X1(:,1), X1(:,2), 'ro', 'markersize', 8, 'markerfacecolor', 'r')
hold on
plot(X2(:,1), X2(:,2), 'bv', 'markersize', 8, 'markerfacecolor', 'b')
title('La verite vraie','fontsize', 14)
% on concatene les donnees (on suppose qu'on ne sait pas quel
% point est du cluster 1 ou 2
X = [X1; X2];
% Calcul de la matrice de distance
M = distance(X);
%% ========== CHA ===========
% -------- ultra-metrique : diametre maximal -------
method='complete';
level_max = aggclust(M, method);
% Affichage du dendogramme
figure
dendro(level_max);
title('Clustering avec distance max', 'fontsize', 24)
K=3; % nombre de clusters voulus
% recuperation des indices des points de chaque cluster
N = size(X, 1);
clusters=level_max(N-K+1).cluster;
% trace des clusters obtenus
plotclusters(clusters, X, K);
title('Clustering avec distance max', 'fontsize', 24)
% ------- ultra-metrique : diametre min -------
method='single'; % saut minimal
level_min = aggclust(M, method);
% Affichage du dendogramme
figure
dendro(level_min);
title('Clustering avec distance mix', 'fontsize', 24)
% recuperation des indices des points de chaque cluster
N = size(X, 1);
clusters=level_min(N-K+1).cluster;
% trace des clusters obtenus
plotclusters(clusters, X, K);
title('Clustering avec distance min', 'fontsize', 24)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment