Ppca M
Ppca M
Ppca M
% PCA applicable to
% - extreme high-dimensional data (e.g., gene expression data) and
% - incomplete data (missing data)
%
% probabilistic PCA (PPCA) [Verbeek 2002]
% based on sensible principal components analysis [S. Roweis 1997]
% code slightly adapted by M.Scholz
%
% pc = ppca(data)
% [pc,W,data_mean,xr,evals,percentVar]=ppca(data,k)
%
% data - inclomplete data set, d x n - matrix
% rows: d variables (genes or metabolites)
% columns: n samples
%
% k - number of principal components (default k=2)
% pc - principal component scores (feature space)
% plot(pc(1,:),pc(2,:),'.')
% W - loadings (weights)
% xr - reconstructed complete data matrix (for k components)
% evals - eigenvalues
% percentVar - Variance of each PC in percent
%
% pc=W*data
% data_recon = (pinv(W)*pc)+repmat(data_mean,1,size(data,2))
%
% Example:
% [pc,W,data_mean,xr,evals,percentVar]=ppca(data,2)
% plot(pc(1,:),pc(2,:),'.');
% xlabel(['PC 1 (',num2str(round(percentVar(1)*10)/10),'%)',]);
% ylabel(['PC 2 (',num2str(round(percentVar(2)*10)/10),'%)',]);
%
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if nargin==1
k=2
end
[C,ss,M,X,Ye]=ppca_mv(data',k,0,0);
xr=Ye';
W=C';
data_mean=M';
pc=X';
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% calculate variance of PCs
evals=nan(1,k);
for i=1:k
data_recon = (pinv(W(i,:))*pc(i,:)); % without mean correction (does not change
the variance)
evals(i)=sum(var(data_recon'));
end
percentVar=evals./total_variance*100;
% cumsumVarPC=nan(1,k);
% for i=1:k
% data_recon = (pinv(W(1:i,:))*pc(1:i,:)) + repmat(data_mean,1,size(data,2));
% cumsumVarPC(i)=sum(var(data_recon'));
% end
% cumsumVarPC
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% original code by Jakob Verbeek
[N,D] = size(Ye);
Obs = ~isnan(Ye);
hidden = find(~Obs);
missing = length(hidden);
if missing; Ye(hidden)=0;end
r = randperm(N);
C = Ye(r(1:d),:)'; % ======= Initialization ======
C = randn(size(C));
CtC = C'*C;
X = Ye * C * inv(CtC);
recon = X*C'; recon(hidden) = 0;
ss = sum(sum((recon-Ye).^2)) / ( (N*D)-missing);
count = 1;
old = Inf;
count = count + 1;
if ( rel_ch < threshold) & (count > 5); count = 0;end
if dia; fprintf('Objective: M %s relative change: %s \n',objective,
rel_ch ); end
C = orth(C);
[vecs,vals] = eig(cov(Ye*C));
[vals,ord] = sort(diag(vals));
ord = flipud(ord);
vecs = vecs(:,ord);
C = C*vecs;
X = Ye*C;
function plot_it(Y,C,ss,plo);
clf;
if plo==1
plot(Y(:,1),Y(:,2),'.');
hold on;
h=plot(C(1,1)*[-1 1]*(1+sqrt(ss)), (1+sqrt(ss))*C(2,1)*[-1 1],'r');
h2=plot(0,0,'ro');
set(h,'LineWi',4);
set(h2,'MarkerS',10);set(h2,'MarkerF',[1,0,0]);
axis equal;
elseif plo==2
len = 28;nc=1;
colormap([0:255]'*ones(1,3)/255);
d = size(C,2);
m = ceil(sqrt(d)); n = ceil(d/m);
for i=1:d;
subplot(m,n,i);
im = reshape(C(:,i),len,size(Y,2)/len,nc);
im = (im - min(C(:,i)))/(max(C(:,i))-min(C(:,i)));
imagesc(im);axis off;
end;
end
drawnow;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%