Merge branch 'develop'

# Conflicts: # pcm_fitModelGroup.m
jdiedrichsen · Oct 2, 2017 · d15a0e4 · d15a0e4
2 parents 66e4f85 + b2becf9
commit d15a0e4
Show file tree

Hide file tree

Showing 36 changed files with 30,500 additions and 710 deletions.
diff --git a/documentation/Figures/Figure_1.pdf b/documentation/Figures/Figure_1.pdf
diff --git a/documentation/Figures/Figure_3.pdf b/documentation/Figures/Figure_3.pdf
diff --git a/documentation/Figures/Figure_crossval.pdf b/documentation/Figures/Figure_crossval.pdf
diff --git a/documentation/Figures/Figure_feature_corr.pdf b/documentation/Figures/Figure_feature_corr.pdf
diff --git a/documentation/bblibrary.bib b/documentation/bblibrary.bib
diff --git a/documentation/makepdf.txt b/documentation/makepdf.txt
@@ -0,0 +1 @@
+pandoc -o pcm_toolbox_manual.pdf pcm_toolbox_manual.md --latex-engine=xelatex -F pandoc-crossref --filter pandoc-eqnos -F pandoc-citeproc
diff --git a/documentation/neuroimage.csl b/documentation/neuroimage.csl
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<style xmlns="http://purl.org/net/xbiblio/csl" version="1.0" default-locale="en-US">
+  <!-- Generated with https://github.com/citation-style-language/utilities/tree/master/generate_dependent_styles/data/elsevier -->
+  <info>
+    <title>NeuroImage</title>
+    <id>http://www.zotero.org/styles/neuroimage</id>
+    <link href="http://www.zotero.org/styles/neuroimage" rel="self"/>
+    <link href="http://www.zotero.org/styles/elsevier-harvard" rel="independent-parent"/>
+    <category citation-format="author-date"/>
+    <issn>1053-8119</issn>
+    <updated>2014-05-17T12:00:00+00:00</updated>
+    <rights license="http://creativecommons.org/licenses/by-sa/3.0/">This work is licensed under a Creative Commons Attribution-ShareAlike 3.0 License</rights>
+  </info>
+</style>
diff --git a/documentation/pcm_toolbox_manual.md b/documentation/pcm_toolbox_manual.md
diff --git a/documentation/pcm_toolbox_manual.pdf b/documentation/pcm_toolbox_manual.pdf
diff --git a/readme.md → documentation/readme.md b/readme.md → documentation/readme.md
diff --git a/pcm_NR.m b/pcm_NR.m
@@ -1,278 +1,116 @@
-function [G,theta,u,l,k]=pcm_NR(y,Z,varargin)
-% pcm_NR: estimate random-effects variance component coefficients using
-% Newton-Raphson gradient descent.
+function [theta,l,k,reg]=pcm_NR(theta0,likefcn,varargin)
+% function [theta,l,k]=pcm_NR(theta0,likefcn,varargin)
+% Newton-Raphson algorithm. 
 %
-% function [G,theta,u,l,k]=pcm_NR(y,Z,varargin)
-% Estimates the variance coefficients of the model described in:
-% Diedrichsen, Ridgway, Friston & Wiestler (2011).
-%
-% y_n = X b_n + Z u_n + e,
-%         u ~ (b, G)
-%           G = sum (theta * Gc)
-%
-% y: N x P observations
-% Z: N x Q random effects matrix
-%
-% N: numbers of observations (trials)
-% P: numbers of experimental units (voxels)
-% Q: number of random effects
+% likefcn:          Function handle that returns the 
+%                   a) Negative log-likelihood 
+%                   b) First derivative of the negative log-likelihood 
+%                   c) Expected second derivative of the negative log-likelhood
 %
 % VARARGIN:
 %   'numIter'     : Maximal number of iterations
-%   'Gc'           : Cell array (Hx1) of components of variance components
-%                    matrix G = sum(h_m Gc{m}), with m=1...H
-%   'h0'           : Starting values for the parameters (Hx1 vector)
-%                    (otherwise uses starting guess based on Laird & Lange)
+%   'theta0'      : Starting values for the parameters (Hx1 vector)
 %   'TolL'         : Tolerance of the likelihood (l-l'), where l' is
 %                    projected likelihood
-%   'meanS'        : Remove the mean for each pattern component (a)
-%                    (Logical flag, true by default)
-%   'X'            : Fixed effects matrix that will be removed from y
-%                    In this case, ReML will be used for the estimation of
-%                    G.
-%                    ??? Important: X will also be removed from Z to orthogonilise the
-%                    random effects from the constant effects, making the
-%                    estimation of b_n independent of G
+%   'HessReg':      Regulariser on the Hessian to increase the stability of
+%                   the fit (set to 1/256)
 %
 % OUTPUT:
-%   G     : variance-covariance matrix
 %   theta : Variance coefficients (one column for each iteration)
-%   u     : hidden patterns components
-%   l     : 1x2 vector of likelihoods
-%           l(1) = likelihood p(y|theta) for maximal theta, i.e. the maximal
-%               liklihood type II  for the best estimates of theta, integrated over u
-%           l(2) = marginal liklihood p(y) based on the Laplace (Normal)
-%           approximation around the posterior mode of log(theta)
-%
-% Examples:
-% See mva_component_examples
+%   l     : Log-likelihood of p(y|theta) for maximal theta 
+%           This is Type II liklihood type II  the best estimates of theta, integrated over u
 %
-% See also: mva_component_examples, spm_reml, spm_reml_sc, spm_sp_reml
-% Where spm_* are from the SPM software, http://www.fil.ion.ucl.ac.uk/spm
+% See also: pcm_NR_diag, pcm_NR_comp
+% v.1:
 %
-% v.3.0:
-%
-% Copyright 2014 Joern Diedrichsen, [email protected]
+% Copyright 2017 Joern Diedrichsen, [email protected]
 
 % Defaults
 %--------------------------------------------------------------------------
-meanS   = 0;                    % Mean subtract
-ac      = [];                      % Which terms do I want to include?
-numIter = 32;                 % Maximal number of iterations
-Gc      = {};
-h0      = [];
-low     = -16;
-thres   = 1e-2;
-hE      = -8;                 % Hyper prior: Set of the floor to collect evidence
-hP      = 1/256;              % Precision on hyper prior
-X       = [];                 % By default fixed effects are empty
+OPT.numIter = 80;                 % Maximal number of iterations
+OPT.thres   = 1e-4;               % Tolerance on decrease of likelihood 
+OPT.HessReg = 1;                  % Regularisation on the Hessian (Fisher) matrix 
+OPT.verbose = 0; 
+OPT.regularization = 'L';         % Type of regularisation 'L':Levenberg, 'LM': Levenberg-Marquardt 
 
 % Variable argument otions
 %--------------------------------------------------------------------------
-vararginoptions(varargin, ...
-    {'Gc','meanS','h0','ac','hE','hP','thres','X','numIter'});
+OPT=rsa.getUserOptions(varargin,OPT,{'HessReg','thres','low','numIter','verbose','regularization'});
 
-% check input size
-%--------------------------------------------------------------------------
-[N,P]  =  size(y);
-[N2,Q] =  size(Z);
-if N2  ~= N
-    error('Mismatched numbers of rows in data (%d) and design (%d)', N, N2)
-end
+% Set warning to error, so it can be caught 
+warning('error','MATLAB:nearlySingularMatrix');
 
-% Intialize the Model structure
-%--------------------------------------------------------------------------
-H     =  length(Gc)+1;       % Number of Hyperparameters (+ 1 for noise term)
-for i =  1:H-1
-    Gc{i} = sparse(Gc{i});
-end;
-
-% If necessary, subtract the fixed effects estimates (a)
-%--------------------------------------------------------------------------
-if (meanS)
-    a  =  pinv(Z)*sum(y,2)/P;
-    r  =  bsxfun(@minus,y,Z*a);
-else
-    r  =  y;
-end;
 
-YY   =  r*r';                        % This is Suffient stats 1 (S1)
-trYY =  sum(diag(YY));
-
-
-% Scale YY
-%--------------------------------------------------------------------------
-sY = 1; % trace(YY)/(P*N);
-sYY = YY/sY;
-trYY = trYY/sY;
-rs=r/sqrt(sY);
-
-% Figure out which terms to include into the model (set others to -32)
-%--------------------------------------------------------------------------
-if (isempty(ac))
-    as    = 1:H;
-    nas   = [];
-else
-    as    = find(ac);
-    nas   = find(ac==0);
-end;
-
-% initialise h
-%--------------------------------------------------------------------------
-if (isempty(h0))
-    xC     =  zeros(Q,H-1);
-    for i=1:H-1
-        xC(:,i)  = diag(Gc{i});
-    end;
-    h      =  ones(H,1)*low;
-    u      =  pinv(Z)*rs;
-    h(H,1) =  (trYY-traceAB(u'*Z',rs))/(P*N);
-    D      =  u*u'/P-h(H)*pinv(Z'*Z)/P;  % Crude approx for variance-covariance matrix
-    hD     =  diag(D);                   % Use the size of the diagnal values to get starting
-    % Starting values for constrained estimates
-    h(1:H-1,1) = real(log(pinv(xC)*hD(:)));  % (xC'*xC)\(xC'*hD(:))
-    h(h<low) = low+1;
-else
-    h      = h0;
-end;
-h(nas) = low;
-
-% Initialize Interations and hyperpriors
+% Initialize Interations 
 %--------------------------------------------------------------------------
 dF    = Inf;
-dFdh  = zeros(H,1);
-dFdhh = zeros(H,H);
-hE = hE*ones(H,1);             % Prior mean of h
-hP = hP*speye(H,H);          % Prior precision (1/variance) of h
-
-
-for k = 1:numIter
-    % compute current estimate of covariance
-    %----------------------------------------------------------------------
-    G     = sparse(Q,Q);
-    for i = as(1:end-1);
-        G = G + Gc{i}*exp(h(i));
-    end
-
-    % Find the inverse of V - while dropping the zero dimensions in G
-    [u,s] = eig(full(G));
-    dS    = diag(s);
-    idx   = dS>eps;
-    Zu     = Z*u(:,idx);
-    iV    = (eye(N)-Zu/(diag(1./dS(idx))*exp(h(H))+Zu'*Zu)*Zu')./exp(h(H)); % Matrix inversion lemma
-
-    if (~isempty(X))
-        iVX   = iV * X;
-        iVr   = iV - iVX*((X'*iVX)\iVX');  % Correction for the fixed effects
-    else
-        iVr   = iV;
-    end;
+H     = length(theta0); % Number of parameters 
+% OPT.HessReg = OPT.HessReg*eye(H,H);          % Prior precision (1/variance) of h
+theta=theta0; 
+for k = 1:OPT.numIter
+    thetaH(:,k)=theta;
+    regH(k)=OPT.HessReg;
+    [nl(k),dFdh,dFdhh]=likefcn(theta);
 
-    % ReML estimate of hyperparameters
-
-    % Gradient dF/dh (first derivatives)
-    %----------------------------------------------------------------------
-    A     = iVr*Z;
-    B     = sYY*A/P;
-    for i = as(1:end-1)
-
-        % dF/dh = -trace(dF/diC*iC*Q{i}*iC)
-        %------------------------------------------------------------------
-        C{i}  = (A*Gc{i});
-        CZ{i} = C{i}*Z';
-        dFdh(i) = -P/2*(traceABtrans(C{i},Z)-traceABtrans(C{i},B));
-    end
-    dFdh(H) = -P/2*traceABtrans(iVr,(speye(N)-sYY*iVr/P));
-
-    % Expected curvature E{dF/dhh} (second derivatives)
+    % Safety check if negative likelihood decreased
     %----------------------------------------------------------------------
-    for i = 1:length(as)-1
-        for j = i:length(as)-1
-
-            % dF/dhh = -trace{iV*Z*G*Z'*iV*Z*G*Z'}
-            %--------------------------------------------------------------
-            dFdhh(as(i),as(j)) = -P/2*traceAB(CZ{as(i)},CZ{as(j)});
-            dFdhh(as(j),as(i)) =  dFdhh(as(i),as(j));
-        end
-        dFdhh(as(i),H) = -P/2*traceABtrans(CZ{as(i)},iVr);
-        dFdhh(H,as(i)) =  dFdhh(as(i),H);
-    end
-    dFdhh(H,H) = -P/2*traceABtrans(iVr,iVr);
-
-
-    % modulate
-    %----------------------------------------------------------------------
-    dFdh  = dFdh.*exp(h);
-    dFdhh = dFdhh.*(exp(h)*exp(h)');
-
-    % add hyperpriors
-    %----------------------------------------------------------------------
-    e     = h     - hE;
-    dFdh  = dFdh  - hP*e;
-    dFdhh = dFdhh - hP;
+    if (k>1 & (nl(k)-nl(k-1))>eps)      % If not.... 
+        OPT.HessReg = OPT.HessReg*10;   % Increase regularisation 
+        if (OPT.verbose) 
+            fprintf('likelihood decreased. Regularisation %2.3f\n',OPT.HessReg(1)); 
+        end; 
+        theta = thetaH(:,k-1); 
+        thetaH(:,k)=theta;
+        nl(k)=nl(k-1); 
+        dFdh = dFdh_old; 
+        dFdhh = dFdhh_old; 
+        dL = inf;                       % Definitely try again 
+    else
+        if (OPT.HessReg>1e-8) 
+            OPT.HessReg = OPT.HessReg/10;
+        end; 
+        dL = inf; 
+        if (k>1) 
+            dL = nl(k-1)-nl(k); 
+        end;
+    end; 
+    dFdh_old=dFdh; 
+    dFdhh_old = dFdhh; 
 
-    % Fisher scoring: update dh = -inv(ddF/dhh)*dF/dh
+    % Add slight Levenberg-Marquardt-style regularisation to second derivative 
     %----------------------------------------------------------------------
-    % dh    = spm_dx(dFdhh(as,as),dFdh(as),{t}); % This is actually much
-    % less accurate it seems than
-    dh   = -dFdhh(as,as)\dFdh(as);
-    h(as) = h(as) + dh;
-
+    switch (OPT.regularization)
+        case 'LM'       % Levenberg-Marquardt 
+            dFdhh = dFdhh + diag(diag(dFdhh))*OPT.HessReg;
+        case 'L'        % Levenberg 
+            dFdhh = dFdhh + eye(size(dFdhh,1))*OPT.HessReg;
+    end; 
 
-    % predicted change in F - increase regularisation if increasing
+    % Fisher scoring: update dh = inv(ddF/dhh)*dF/dh
     %----------------------------------------------------------------------
-    pF    = dFdh(as)'*dh;
-    dF    = pF;
-
+    try 
+        dtheta   =  dFdhh\dFdh;
+        theta = theta - dtheta;
+    catch % Likely matrix close to singluar 
+        OPT.HessReg = OPT.HessReg*10;
+        dFdhh = dFdhh_old + diag(diag(dFdhh_old))*OPT.HessReg;
+        dtheta   =  dFdhh\dFdh;
+        theta = theta - dtheta;
+        keyboard; 
+    end; 
     % convergence
     %----------------------------------------------------------------------
-    % fprintf('%s %-23d: %10s%e [%+3.2f]\n','  ReML Iteration',k,'...',full(dF),t);
-    if dF < thres
+    if dL < OPT.thres
         break;
     else
-        % eliminate redundant components (automatic selection)
-        %------------------------------------------------------------------
-        as  = find(h > low);
-        h(h<low)=low;
-        as  = as(:)';
+        % as  = find(theta > low);
+        % h(h<low)=low;
+        % as  = as(:)';
     end
-    hh(:,k)=h;
-end
-
-% return exp(h) and rescale
-%--------------------------------------------------------------------------
-theta  = sY*exp(h);
-G     = zeros(Q);
-for i = 1:H-1;
-    G = G + full(Gc{i})*theta(i);
+
 end
-
-% Find the inverse of V - while dropping the zero dimensions in G
-% V = Z*G*Z' + I sigma
-[u,s] = eig(full(G));
-dS    = diag(s);
-idx   = dS>eps;
-Zu     = Z*u(:,idx);
-iV    = (eye(N)-Zu/(diag(1./dS(idx))*theta(H)+Zu'*Zu)*Zu')./theta(H); % Matrix inversion lemma
-
-if (~isempty(X))
-    iVX   = iV * X;
-    iVr   = iV - iVX*((X'*iVX)\iVX');  % Correction for the fixed effects
-else
-    iVr   = iV;
-end;
-
-if nargout > 2
-    u=G*Z'*(iV)*r;
-end;
-if ~all(isreal(u(:)));
-    keyboard;
-end;
-
-if nargout > 3
-    ldet  = -2*sum(log(diag(chol(iV))));        % Safe computation of the log determinant (V) Thanks to code from D. lu
-    l     = -P/2*(ldet)-0.5*traceABtrans(iVr,YY);
-    if (~isempty(X)) % Correct for ReML estimates
-        l = l - P*sum(log(diag(chol(X'*iV*X))));  % - P/2 log(det(X'V^-1*X));
-    end; 
-end;
+% Return likelihood 
+if(nargout>1)
+    l = -likefcn(theta); 
+end; 
+reg=OPT.HessReg;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		pandoc -o pcm_toolbox_manual.pdf pcm_toolbox_manual.md --latex-engine=xelatex -F pandoc-crossref --filter pandoc-eqnos -F pandoc-citeproc