Packaging CCNF code with OpenFace.

2026-05-12 18:32:30 +00:00 · 2018-05-05 11:21:09 +01:00
parent ddbe26108a
commit a557e9c192
156 changed files with 18411 additions and 2 deletions
--- a/model_training/CCNF/CCNF/lib/CCNF_training_bfgs.m
+++ b/model_training/CCNF/CCNF/lib/CCNF_training_bfgs.m
@@ -0,0 +1,125 @@
+function [ alphas, betas, thetas, final_likelihood] = CCNF_training_bfgs(thresholdX, thresholdFun, x, y, alphas, betas, thetas, lambda_a, lambda_b, lambda_th, similarityFNs, sparsityFNs, varargin)
+%CCNF_training_bfgs Performs CCNF training using BFGS (or LBFGS)
+
+    if(sum(strcmp(varargin,'const')))
+        ind = find(strcmp(varargin,'const')) + 1;
+        const = varargin{ind};        
+    else        
+        const = false;
+    end
+    
+    if(iscell(x))        
+        num_seqs = numel(x);
+                
+        x = cell2mat(x)';
+        % add a bias term
+        x =  cat(1, ones(1,size(x,2)), x);        
+        
+        % If all of the sequences are of the same length can flatten them
+        % to the same matrix
+        if(const)
+            
+            y = cell2mat(y);
+            y = reshape(y, numel(y)/num_seqs, num_seqs);
+        end
+        
+    else
+        % if not a cell it has already been flattened, and is constant
+        % (most likely)
+        num_seqs = varargin{find(strcmp(varargin, 'num_seqs'))+1};
+    end
+    
+    % Should try a bunch of seed for initialising theta?
+    if(sum(strcmp(varargin,'reinit')))
+        ind = find(strcmp(varargin,'reinit')) + 1;
+        reinit = varargin{ind};
+    else
+        reinit = false; 
+    end
+        
+    % It is possible to predefine the components B^(k) and C^(k) required 
+    % to compute B and and C terms and partial derivative (from equations 
+    % 30 and 31 in Appendix B), also can predefine yB^(k)y and yC^(k)y,
+    % as they also do not change through the iterations
+    % In constant case Precalc_Bs are same across the sequences, same for 
+    % PrecalcBsFlat, however yB^(k)y is defined per sequence
+    if(sum(strcmp(varargin,'PrecalcBs')) && sum(strcmp(varargin,'PrecalcBsFlat'))...
+             && sum(strcmp(varargin,'Precalc_yBy')))
+        ind = find(strcmp(varargin,'PrecalcBs')) + 1;
+        Precalc_Bs = varargin{ind};
+
+        ind = find(strcmp(varargin,'PrecalcBsFlat')) + 1;
+        Precalc_Bs_flat = varargin{ind};
+
+        ind = find(strcmp(varargin,'Precalc_yBys')) + 1;
+        Precalc_yBys = varargin{ind};
+    else
+        % if these are not provided calculate them        
+        [ ~, Precalc_Bs, Precalc_Bs_flat, Precalc_yBys ] = CalculateSimilarities( num_seqs, x, similarityFNs, sparsityFNs, y, const);
+    end
+                    
+    % Reinitialisation attempts to find a better starting point for the
+    % model training (sometimes helps sometimes doesn't)
+    if(reinit)
+        
+        rng(0);
+        
+        % By default try 200 times, but can override
+        num_reinit = 200;
+        
+        if(sum(strcmp(varargin,'num_reinit')))
+            num_reinit = varargin{find(strcmp(varargin,'num_reinit')) + 1};
+        end
+        
+        thetas_good = cell(num_reinit, 1);
+        lhoods = zeros(num_reinit, 1);
+        for i=1:num_reinit
+            initial_Theta = randInitializeWeights(size(thetas,2)-1, numel(alphas));            
+            lhoods(i) = LogLikelihoodCCNF(y, x, alphas, betas, initial_Theta, lambda_a, lambda_b, lambda_th, Precalc_Bs_flat, [], [], [], [], const, num_seqs);
+            thetas_good{i} = initial_Theta;
+        end
+        [~,ind_max] = max(lhoods);
+        thetas = thetas_good{ind_max};
+    end
+    
+    params = [alphas; betas; thetas(:)];
+    
+    if(any(strcmp(varargin,'lbfgs')))
+        options = optimset('Algorithm','interior-point','GradObj','on', 'Hessian', 'lbfgs', 'TolX', thresholdX, 'TolFun', thresholdFun, 'display', 'off');
+    else
+        options = optimset('Algorithm','interior-point','GradObj','on', 'Hessian', 'bfgs', 'TolX', thresholdX, 'TolFun', thresholdFun, 'display', 'off');
+    end
+    
+    if(any(strcmp(varargin,'max_iter')))
+        options.MaxIter = varargin{find(strcmp(varargin,'max_iter')) + 1};
+    end
+    
+    objectiveFun = @(params)objectiveFunction(params, numel(alphas), numel(betas), size(thetas), lambda_a, lambda_b, lambda_th, Precalc_Bs, x, y, Precalc_yBys, Precalc_Bs_flat, const);
+
+    lowerBound = [zeros(numel(alphas)+numel(betas),1); -Inf(numel(thetas),1)];
+    
+    upperBound = Inf(numel(params),1);
+    params = fmincon(objectiveFun, params, [], [],[],[], lowerBound, upperBound, [], options);
+    alphas = params(1:numel(alphas));
+    betas = params(numel(alphas)+1:numel(alphas)+numel(betas));
+    thetas = reshape(params(numel(alphas) + numel(betas) + 1:end), size(thetas));
+    
+    
+    final_likelihood = LogLikelihoodCCNF(y, x, alphas, betas, thetas, lambda_a, lambda_b, lambda_th, Precalc_Bs_flat, [], [], [], [], const, num_seqs);
+
+end
+
+function [loss, gradient] = objectiveFunction(params, numAlpha, numBeta, sizeTheta, lambda_a, lambda_b, lambda_th, PrecalcQ2s, x, y, PrecalcYqDs, PrecalcQ2sFlat, const)
+    
+    alphas = params(1:numAlpha);
+    betas = params(numAlpha+1:numAlpha+numBeta);
+    thetas = reshape(params(numAlpha + numBeta + 1:end), sizeTheta);
+    
+    num_seqs = size(PrecalcYqDs,1);
+    
+    [gradient, SigmaInvs, CholDecomps, Sigmas, bs, all_x_resp] = gradientCCNF(params, numAlpha, numBeta, sizeTheta, lambda_a, lambda_b, lambda_th, PrecalcQ2s, x, y, PrecalcYqDs, PrecalcQ2sFlat, const, num_seqs);
+        
+    % as bfgs does gradient descent rather than ascent, negate the results
+    gradient = -gradient;
+    loss = -LogLikelihoodCCNF(y, x, alphas, betas, thetas, lambda_a, lambda_b, lambda_th, PrecalcQ2sFlat, SigmaInvs, CholDecomps, Sigmas, bs, const, num_seqs, all_x_resp);
+end
--- a/model_training/CCNF/CCNF/lib/CalcSigmaCCNFflat.m
+++ b/model_training/CCNF/CCNF/lib/CalcSigmaCCNFflat.m
@@ -0,0 +1,19 @@
+function [ SigmaInv] = CalcSigmaCCNFflat(alphas, betas, n, precalc_B_without_beta, precalc_eye, precalc_zeros)
+%CALCSIGMACCNFflat Computing SigmaInv matrices (represented as a vector as
+%it is a symmetric matrix)
+ 
+    A = sum(alphas) .* precalc_eye;
+
+    % calculating the B + C from the paper (here referred to as B)   
+    Btmp = precalc_B_without_beta * betas;        
+
+    B = precalc_zeros;
+    on = tril(true(n,n));
+    B(on) = Btmp;
+    B = B';
+    B(on) = Btmp;
+    
+    SigmaInv = 2 * (A + B);
+
+end
+
--- a/model_training/CCNF/CCNF/lib/CalcbCCNF.m
+++ b/model_training/CCNF/CCNF/lib/CalcbCCNF.m
@@ -0,0 +1,14 @@
+function b = CalcbCCNF( alpha, theta, x, resps)
+%CALCBCCNF Compute the b from CCNF equation
+
+    % Either responses from the neural layers are precomputed and provided
+    % in resps or need to compute it yourself
+    if(nargin < 4)
+        X = [ones(size(x,1),1), x];
+        h1 = 1./(1 + exp(-theta * X'));
+        b = (2 * alpha' * h1)';
+    else
+        b = (2 * alpha' * resps)';
+    end
+end
+
--- a/model_training/CCNF/CCNF/lib/CalculateSimilarities.m
+++ b/model_training/CCNF/CCNF/lib/CalculateSimilarities.m
@@ -0,0 +1,155 @@
+function [ Similarities, B_without_beta, B_without_beta_flat, y_B_y ] = CalculateSimilarities( n_sequences, x, similarityFNs, sparsityFNs, y, const)
+%CALCULATESIMILARITIES Summary of this function goes here
+%   Detailed explanation goes here
+
+    K = numel(similarityFNs);
+    K2 = numel(sparsityFNs);
+    
+    %calculate similarity measures for each of the sequences
+    Similarities = cell(n_sequences, 1);
+    B_without_beta = cell(n_sequences,1);
+    B_without_beta_flat = cell(n_sequences,1);
+    
+    y_B_y = zeros(n_sequences, K + K2);
+    
+    if(~const)
+        
+        similarities = cell(K, 1);
+        sparsities = cell(K2, 1);
+
+        % y can either be in cell format (diff length seqs.) or in matrix
+        %, same length seqs
+        beg_ind = 1;        
+        
+        if(iscell(y))
+            end_ind = numel(y{1});
+            y_cell = true;
+        else
+            end_ind = size(y,1);
+            y_cell = false;
+        end
+
+        for q = 1 : n_sequences
+                      
+            % don't take the bias term
+            xq = x(2:end, beg_ind:end_ind);
+
+            sample_length = end_ind - beg_ind + 1;
+            
+            Similarities{q} = zeros([sample_length, sample_length, K+K2]);
+            
+            B_without_beta{q} = cell(K+K2,1);
+
+            B_without_beta_flat{q} = zeros((sample_length*(sample_length+1))/2,K+K2);
+            
+            % go over all of the similarity metrics and construct the
+            % similarity matrices
+
+            if(y_cell)
+                yq = y{q};
+            else
+                yq = y(:,q);
+            end
+            for k=1:K
+                
+                if(q==1)
+                    similarities{k} = similarityFNs{k}(xq');
+                end
+                
+                Similarities{q}(:,:,k) = similarities{k};
+                S = Similarities{q}(:,:,k);
+                D =  diag(sum(S));
+                
+                B_without_beta{q}{k} = D - S;
+                B = D - S;
+                
+                B_without_beta_flat{q}(:,k) = B(logical(tril(ones(size(S)))));
+   
+                y_B_y(q,k) = -yq'*B*yq;
+
+            end
+            
+            for k=1:K2
+                
+                % this is constant so don't need to recalc
+                if(q==1)
+                   sparsities{k} = sparsityFNs{k}(xq');
+                end
+                
+                Similarities{q}(:,:,K+k) = sparsities{k};
+                S = Similarities{q}(:,:,K+k);
+                D =  diag(sum(S));
+                
+                B_without_beta{q}{K+k} = D + S;
+                B = D +  S;
+                
+                B_without_beta_flat{q}(:,K+k) = B(logical(tril(ones(size(S)))));
+                
+                y_B_y(q,K+k) = -yq'*B*yq;
+
+            end
+
+            % Update the references to sequence start/end
+            if(q ~= n_sequences)
+                beg_ind = end_ind + 1;
+                if(iscell(y))
+                    end_ind = end_ind + numel(y{q+1});
+                else
+                    end_ind = end_ind + size(y,1);
+                end
+            end
+        end
+    else
+        sample_length = size(x,2)/n_sequences;
+
+        similarities = cell(K, 1);
+        sparsities = cell(K2, 1);
+        
+        B_without_beta = {cell(K+K2,1)};
+        B_without_beta_flat = {zeros((sample_length*(sample_length+1))/2,K+K2)};
+        Similarities = {zeros([sample_length, sample_length, K+K2])};
+            
+        beg_ind = 1;
+        end_ind = sample_length;
+
+        % don't take the bias term
+        xq = x(2:end, beg_ind:end_ind);
+
+        % go over all of the similarity metrics and construct the
+        % similarity matrices
+        for k=1:K
+            similarities{k} = similarityFNs{k}(xq');
+
+            Similarities{1}(:,:,k) = similarities{k};
+            
+            S = Similarities{1}(:,:,k);
+            D =  diag(sum(S));
+                        
+            B_without_beta{1}{k} = D - S;
+            
+            B = D - S;
+            % flatten the symmetric matrix to save space
+            B_without_beta_flat{1}(:,k) = B(logical(tril(ones(size(S)))));
+
+            y_B_y(:,k) = diag(-y'*B*y);
+
+        end
+        for k=1:K2
+            % this is constant so don't need to recalc
+            sparsities{k} = sparsityFNs{k}(xq');
+
+            Similarities{1}(:,:,K+k) = sparsities{k};
+            S = Similarities{1}(:,:,K+k);
+            D =  diag(sum(S));
+            
+            B_without_beta{1}{K+k} = D + S;
+            B = D + S;
+
+            B_without_beta_flat{1}(:,K+k) = B(logical(tril(ones(size(S)))));
+  
+            y_B_y(:,K+k) = diag(-y'*B*y);
+
+        end   
+    end
+end
+
--- a/model_training/CCNF/CCNF/lib/LogLikelihoodCCNF.m
+++ b/model_training/CCNF/CCNF/lib/LogLikelihoodCCNF.m
@@ -0,0 +1,139 @@
+function logL = LogLikelihoodCCNF(ys, xs, alphas, betas,thetas,...
+                                  lambda_a,lambda_b,lambda_th, Precalc_Bs_flat,...
+                                  SigmaInvs, ChDecomps, Sigmas, bs, const, num_seq, all_X_resp)
+% Calculating the log likelihood of the CCNF
+
+logL = 0;
+
+% If sequences are of different lengths
+if(~const)
+    
+    % y can either be in cell format (diff length seqs.) or in matrix
+    %, same length seqs
+    beg_ind = 1;
+    
+    if(iscell(ys))
+        end_ind = numel(ys{1});
+        y_cell = true;
+    else
+        end_ind = size(ys,1);
+        y_cell = false;
+    end
+    
+    for q=1:num_seq
+        
+        % Don't take the bias term
+        xq = xs(2:end, beg_ind:end_ind);
+        
+        if(y_cell)
+            yq = ys{q};
+        else
+            yq = ys(:,q);
+        end
+        
+        n = size(xq, 2);
+        
+        % Compute b if not provided (they might be, as
+        % calculation of gradient involves these terms)        
+        if(~isempty(bs))
+            b = bs(beg_ind:end_ind)';
+        else
+            b = CalcbCCNF(alphas, thetas, xq');
+        end
+        
+        % Same goes for inverse of Sigma
+        if(isempty(SigmaInvs))
+            precalc_eye = eye(n);
+            precalc_zeros = zeros(n);
+            
+            [SigmaInv] = CalcSigmaCCNFflat(alphas, betas, n, Precalc_Bs_flat{q}, precalc_eye, precalc_zeros);                        
+            mu = SigmaInv \ b;
+                     
+            % Used for normalisation term
+            L = chol(SigmaInv);
+        else
+            SigmaInv = SigmaInvs{q};
+            Sigma = Sigmas{q};
+            mu = Sigma * b;
+                        
+            % Used for normalisation term
+            L = ChDecomps{q};
+        end
+        
+        % normalisation = 1/((2*pi)^(n/2)*sqrt(det(Sigma)));
+        % Removing the division by pi, as it is constant
+        % normalisation = 1/(sqrt(det(sigma)));
+        % flipping around determinant of SigmaInv, as det(inv(Sigma)) = inv(det(Sigma)
+        % normalisation = log(sqrt(det(SigmaInv)));
+        
+        % log of normalisation using Cholesky decomposition (faster and more
+        % numerically stable)
+        log_normalisation = sum(log(diag(L))); % no times 2 here as we calculate the square root of determinant
+        
+        % prob_q = normalisation * exp(-0.5 * (y - mu)'*SigmaInv*(y-mu));
+        % applying a logarithm to this leads to
+        % logLq = log(normalisation) + (-0.5 * (yq - mu)'*SigmaInv*(yq-mu));
+        logLq = log_normalisation + (-0.5 * (yq - mu)'*SigmaInv*(yq-mu));
+        
+        % Add the current likelihood to the running sum
+        logL = logL + logLq;
+        
+        % Update the references to sequence start/end
+        if(q ~= num_seq)
+            beg_ind = end_ind + 1;
+            if(iscell(ys))
+                end_ind = end_ind + numel(ys{q+1});
+            else
+                end_ind = end_ind + size(ys,1);
+            end
+        end
+    end
+else
+
+    % A version where each sequence is same length and has the same
+    % connections
+    seq_length = size(ys,1);
+    num_seqs = size(ys,2);
+    if(isempty(SigmaInvs))
+            
+        % If not provided compute the neuron activation (Response)
+        if(nargin < 16)
+            all_X_resp = 1./(1 + exp(-thetas * xs));
+        end
+        
+        % Combine the neuron responses to b
+        all_bs = 2*alphas' * all_X_resp;
+    
+        precalc_eye = eye(seq_length);
+        precalc_zeros = zeros(seq_length);
+        
+        % Compute Sigma for one of the sequences (same for all so can
+        % reuse)
+        [SigmaInv] = CalcSigmaCCNFflat(alphas, betas, seq_length, Precalc_Bs_flat{end}, precalc_eye, precalc_zeros);
+        
+        % A faster way of inverting a symmetric matrix
+        CholDecomp = chol(SigmaInv);        
+        Sigma=CholDecomp\(CholDecomp'\precalc_eye);
+
+        % mu values associated with each time step
+        mus = Sigma * reshape(all_bs, seq_length, num_seqs);        
+        
+    else
+        SigmaInv = SigmaInvs;
+        CholDecomp = ChDecomps;
+        Sigma = Sigmas;
+        mus = Sigma * reshape(bs, seq_length, num_seqs);        
+    end
+    
+    log_normalisation = num_seqs * sum(log(diag(CholDecomp)));
+    
+    % Compute the sum across every sequence of
+    % (yq - mu)'*SigmaInv*(yq-mu) and add to normalisation term
+    ymu = (ys - mus);    
+    y1 = SigmaInv * ymu;    
+    
+    logL = log_normalisation - 0.5 *  ymu(:)'* y1(:);
+end
+
+% add regularisation term
+logL = logL -lambda_b * (betas'*betas)/2 - lambda_a * (alphas'*alphas)/2 - lambda_th * (thetas(:)'*thetas(:))/2;
--- a/model_training/CCNF/CCNF/lib/appendix.pdf
+++ b/model_training/CCNF/CCNF/lib/appendix.pdf
--- a/model_training/CCNF/CCNF/lib/evaluate_CCNF_model.m
+++ b/model_training/CCNF/CCNF/lib/evaluate_CCNF_model.m
@@ -0,0 +1,112 @@
+function [ correlations, rms, mean_correlation, mean_RMSE, long_correlation, long_RMSE, predictions, gts ] = evaluate_CCNF_model( alphas, betas, thetas, x, y, similarityFNs, sparsityFNs, offset, scaling, verbose, PrecalcQ2sFlat)
+%evaluate_CCNF_model Evaluate the trained model on test (or training data)
+
+% For visualising time series predictions
+num_x_plots = 8;
+num_y_plots = 10;
+
+total_plots = num_x_plots * num_y_plots;
+
+if(iscell(x))        
+    num_seqs = numel(x);
+    x = cell2mat(x)';
+    % add a bias term
+    x =  cat(1, ones(1,size(x,2)), x);
+else
+    % if not a cell it has already been flattened, and is constant
+    % (most likely)
+    num_seqs = size(y,2);
+    
+end
+
+% if not sure about const assume it is not
+const = false;
+
+if(nargin < 11)
+    [ ~, ~, PrecalcQ2sFlat, ~ ] = CalculateSimilarities( num_seqs, x, similarityFNs, sparsityFNs, y, const);
+end
+    
+correlations = zeros(num_seqs, 1);
+rms = zeros(num_seqs, 1);
+
+% concatenated data for an alternative correlation
+y_predConcat = [];
+y_trueConcat = [];
+
+% Predict each sequence
+for q=1:num_seqs
+     
+    if(iscell(y))        
+        seq_length = size(y{q},1);
+        yq = y{q};
+    else
+        seq_length = size(y,1);            
+        yq = y(:,q);
+    end
+    
+    X = x(:,(q-1)*seq_length+1:q*seq_length);
+
+    h1 = 1./(1 + exp(-thetas * X));
+    b = (2 * alphas' * h1)';
+          
+    PrecalcQ2flat = PrecalcQ2sFlat{q};
+
+    precalc_eye = eye(seq_length);
+    precalc_zeros = zeros(seq_length);
+
+    SigmaInv = CalcSigmaCCNFflat(alphas, betas, seq_length, PrecalcQ2flat, precalc_eye, precalc_zeros);
+    
+    y_est = SigmaInv \ b;
+
+    % Can optionally supply the scaling and offset used on the training
+    % labels to be applied inversely
+    y_est = y_est/scaling + offset;
+
+    if(numel(y_est) > 1)        
+        R = corrcoef(y_est, yq);
+        correlations(q) = R(1,2);
+    end
+    
+    rms(q) = sqrt( mean((y_est - yq).^2) );
+    
+    y_predConcat = cat(1, y_predConcat, y_est);
+    y_trueConcat = cat(1, y_trueConcat, yq);
+
+    if(verbose)
+
+        if(mod(q,total_plots) == 1)
+            figure;
+            remainingPlots = nExamples - q;
+            if(remainingPlots < total_plots)
+                num_y_plots = ceil(remainingPlots / num_x_plots);            
+            end            
+        end        
+        
+        subplot(num_y_plots,num_x_plots,mod(q-1,total_plots)+1);
+        t = 1:nFrames;
+        plot(t,y{q},'g',t,y_est,'b');
+        title(sprintf('C %.2f, R %.2f', correlations(q), rms(q)));
+        set(gca, 'XTick', [], 'YTick', []);
+    
+    end   
+    
+end
+
+% Compute the error metrics
+mean_correlation = mean(correlations); 
+mean_RMSE = mean(rms);
+long_correlation = corr(y_predConcat, y_trueConcat).^2;
+
+long_RMSE = sqrt(mean((y_predConcat - y_trueConcat).^2));
+predictions = y_predConcat;
+gts = y_trueConcat;
+
+if(verbose)
+    figure
+    plot([1:numel(y_trueConcat)],y_trueConcat,'g',[1:numel(y_trueConcat)],y_predConcat,'b');
+    title(sprintf('C %.2f, R %.2f', long_correlation, long_RMSE));
+    set(gca, 'XTick', [], 'YTick', []);
+end
+
+end
+
--- a/model_training/CCNF/CCNF/lib/gradientCCNF.m
+++ b/model_training/CCNF/CCNF/lib/gradientCCNF.m
@@ -0,0 +1,185 @@
+function [ gradientParams, SigmaInvs, CholDecomps, Sigmas, bs, allXresp] = gradientCCNF( params, num_alpha, numBeta, sizeTheta, lambda_a, lambda_b, lambda_th, Precalc_Bs, x, y, Precalc_yBys, Precalc_Bs_flat, constant, num_seqs)
+%gradientCCNF Summary of this function goes here
+%   Detailed explanation goes here
+    
+    % pick out the relevant terms (unpack)
+    alphas_init = params(1:num_alpha);
+    betasInit = params(num_alpha+1:num_alpha+numBeta);
+    thetasInit = reshape(params(num_alpha+numBeta+1:end), sizeTheta);
+            
+    % Compute the response from the neural layers        
+    allXresp = 1./(1 + exp(-thetasInit * x));
+    Xt = x;
+            
+    bs = 2*alphas_init' * allXresp;
+        
+    % This is precalculated for the next step and is basically the
+    % feedforward step of the neural net
+    Z_precalc = 2 * (allXresp .* (1-allXresp));
+    
+    % These are the outputs weighted by the alphas (see eq TODO
+    db2_precalc =  bsxfun(@times, Z_precalc, alphas_init);    
+        
+    num_feats = sizeTheta(2);    
+
+    if(constant)       
+                
+        seq_length = size(x,2)/num_seqs;        
+
+        % As the similarities are the same across all series we can reuse our
+        % Sigma and SigmaInv calculations
+        
+        I = eye(seq_length);
+        [SigmaInv] = CalcSigmaCCNFflat(alphas_init, betasInit, seq_length, Precalc_Bs_flat{1}, I, zeros(seq_length));
+        CholDecomp=chol(SigmaInv);
+
+        % This is a faster way of inverting a symmetric matrix
+        Sigma=CholDecomp\(CholDecomp'\I);
+        Sigma_trace = trace(Sigma);
+            
+        % mu values associated with each time step
+        mus = Sigma * reshape(bs, seq_length, num_seqs);
+
+        % difference between actual and prediction (error)
+        diff = (y - mus);
+               
+        db_precalc_mult = bsxfun(@times, db2_precalc, diff(:)');
+ 
+        % Equation 46 from the appendix
+        gradientThetasT = Xt * db_precalc_mult';
+
+        % Reshape into the correct format
+        gradientThetasT = gradientThetasT(:)';
+        
+        gradientThetasT = reshape(gradientThetasT, sizeTheta(2), sizeTheta(1))';
+        gradientThetasT = gradientThetasT(:);
+        
+        % Some useful precalculations
+        
+        % for every sequence get a dot product with itself
+        yy = dot(y,y);
+        
+        % same goes for the mu
+        mumu = dot(mus,mus);
+
+        % calculating the derivative of L with respect to alpha_k (Equation 27)       
+        % gradientAlphas =  (-yq'*yq +(2*yq'*D')' -2 * D * mu + sum(mu.^2) + trace(Sigma));
+        % allXresp is D
+        gradient_alphas_add = -sum(yy) + sum(mumu) + num_seqs * Sigma_trace;
+        gradient_alphas = 2 * allXresp * (y(:) - mus(:)) + gradient_alphas_add;
+        
+        gradient_betas = zeros(numBeta, 1);
+        
+        % calculating the derivative of log(L) with respect to the betas
+        for k=1:numBeta
+
+            % From Equation 38 (and 39 for gamma)
+            % gradient = -yq'*B^(k)*yq + mu'*B^(k)*mu + Vec(Sigma)'*vec(B^(k)
+
+            % We precalculate B^(k) (equation 30), as it does not change
+            % over the course of optimisation
+            B_k = Precalc_Bs{1}{k};
+
+            % precalculated -yq'*B_k*yq can be used as well as it does not
+            % change (stored in Precalc_yBys)
+            yq_B_k_yq = sum(Precalc_yBys(:,k));
+
+            % A vectorised version of mu'*B^(k)*mu 
+            B_k_mu = B_k*mus;
+            mu_B_k_mu = mus(:)' * B_k_mu(:);
+            
+            % Vec(Sigma)*Vec(B^(k)) can be computed as follows:
+            partition_term = num_seqs * Sigma(:)'*B_k(:);
+            
+            % Equation 38 and 39 basically
+            dLdb = yq_B_k_yq + mu_B_k_mu + partition_term; 
+
+            gradient_betas(k) = dLdb;
+        end        
+        
+        gradientParams = [gradient_alphas;gradient_betas;gradientThetasT];            
+        
+        SigmaInvs = SigmaInv;
+        CholDecomps = CholDecomp;
+        Sigmas = Sigma;
+        
+        
+    else 
+        
+        SigmaInvs = cell(num_seqs, 1);
+        CholDecomps = cell(num_seqs, 1);
+        Sigmas = cell(num_seqs, 1);
+        gradients = zeros(num_seqs, numel(params));        
+        
+        a_precalc = zeros(sizeTheta(2)*sizeTheta(1), size(allXresp,2));
+
+        for i=1:size(db2_precalc,1)
+            a_precalc((i-1)*num_feats+1:i*num_feats,:) = bsxfun(@times, Xt, db2_precalc(i,:));
+        end        
+        
+        % y can either be in cell format (diff length seqs.) or in matrix
+        %, same length seqs
+        beg_ind = 1;        
+        
+        if(iscell(y))
+            end_ind = numel(y{1});
+            y_cell = true;
+        else
+            end_ind = size(y,1);
+            y_cell = false;
+        end
+        
+        % Go through every sequence summing the gradients
+        for q = 1 : num_seqs
+
+            currResp = allXresp(:,beg_ind:end_ind);
+            currB = bs(beg_ind:end_ind)';
+
+            PrecalcB = Precalc_Bs{q};
+            PrecalcBFlat = Precalc_Bs_flat{q};
+
+            if(y_cell)
+                yq = y{q};
+            else
+                yq = y(:,q);
+            end
+            
+            xq = x(2:end, beg_ind:end_ind);
+
+            % Used for equation 46 computation
+            a_precalc_curr = a_precalc(:,beg_ind:end_ind);
+
+            precalc_eye = eye(numel(yq));
+            precalc_zeros = zeros(numel(yq));
+
+            [ gradientsAlphas, gradientsBetas, gradientsThetas, SigmaInv, CholDecomp, Sigma ] = gradientCCNF_per_seq(alphas_init, betasInit, thetasInit, PrecalcB, xq, yq, currResp, currB, Precalc_yBys(q, :), PrecalcBFlat, a_precalc_curr, precalc_eye, precalc_zeros);
+            
+            gradients(q,:) = [gradientsAlphas; gradientsBetas; gradientsThetas(:)];
+            SigmaInvs{q} = SigmaInv;
+            CholDecomps{q} = CholDecomp;
+            Sigmas{q} = Sigma;
+
+            % Update the references to sequence start/end
+            if(q ~= num_seqs)
+                beg_ind = end_ind + 1;
+                if(iscell(y))
+                    end_ind = end_ind + numel(y{q+1});
+                else
+                    end_ind = end_ind + size(y,1);
+                end
+            end
+            
+        end
+        
+        gradientParams = sum(gradients,1)';
+        
+    end
+    
+    % Add the regularisation term
+    regAlpha = alphas_init * lambda_a;
+    regBeta = betasInit * lambda_b;
+    regTheta = thetasInit * lambda_th;
+    
+    
+    gradientParams = gradientParams - [regAlpha; regBeta; regTheta(:)];
+end
--- a/model_training/CCNF/CCNF/lib/gradientCCNF_per_seq.m
+++ b/model_training/CCNF/CCNF/lib/gradientCCNF_per_seq.m
@@ -0,0 +1,53 @@
+function [ gradient_alphas, gradient_betas, gradient_thetas, SigmaInv, CholDecomp, Sigma ] = gradientCCNF_per_seq( alphas, betas, thetas, precalc_Bk, xq, yq, curr_resp, b, precalc_y_B_y, Precalc_Bk_flat, a_precalc, precalc_eye, precalc_zeros)
+%gradientCCNF_per_seq Compute the partial derivatives for a single sequence
+   
+    % This is an optimised version as it does not use the whole matrix but
+    % a lower diagonal part due to symmetry
+    n = size(xq, 2);
+    [SigmaInv] = CalcSigmaCCNFflat(alphas, betas, n, Precalc_Bk_flat, precalc_eye, precalc_zeros);
+    
+    % Get the actual sigma from out SigmaInv
+    % Optimised for symmetric matrices
+    CholDecomp=chol(SigmaInv);
+    Sigma=CholDecomp\(CholDecomp'\precalc_eye);
+
+    % mu = SigmaInv \ b = Sigma * b;
+    % as we've calculate Sigma already, this is equivalent of the above
+    mu = Sigma * b;    
+   
+    % calculating the derivative of L with respect to alpha_k (Equation 27)       
+    % gradientAlphas =  (-yq'*yq +(2*yq'*D')' -2 * D * mu + sum(mu.^2) + trace(Sigma));
+    % curr_resp is D from the paper
+    
+    yqq = -yq'*yq;
+    curr_resp_yq = (2*curr_resp*yq);
+    gradient_alphas = yqq +  curr_resp_yq + -2 * curr_resp * mu + mu' * mu +  sum(diag(Sigma));
+
+    gradient_betas = zeros(size(betas));    
+    
+    K2 = numel(betas);
+    
+    % calculating the derivative of log(L) with respect to the betas
+    for k=1:K2
+        % From Equation 38 (and 39 for gamma)
+        % gradient = -yq'*B^(k)*yq + mu'*B^(k)*mu + Vec(Sigma)'*vec(B^(k)
+        
+        % We precalculate B^(k) (equation 30), as it does not change
+        % over the course of optimisation        
+        B_k = precalc_Bk{k};
+
+        % Vec(Sigma)*Vec(B^(k)) can be computed as follows:
+        partition_gradient = Sigma(:)'*B_k(:);
+        
+        % Equation 38 and 39 basically
+        dLdb = precalc_y_B_y(k) + mu'*B_k*mu + partition_gradient;
+        
+        gradient_betas(k) = dLdb;
+    end
+
+    % Equation 46 from the appendix
+    gradient_thetas = (yq - mu)' * a_precalc';
+    
+    gradient_thetas = (reshape(gradient_thetas, size(thetas')))';
+
+end
--- a/model_training/CCNF/CCNF/lib/randInitializeWeights.m
+++ b/model_training/CCNF/CCNF/lib/randInitializeWeights.m
@@ -0,0 +1,35 @@
+function W = randInitializeWeights(L_in, L_out)
+%RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
+%incoming connections and L_out outgoing connections
+%   W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 
+%   of a layer with L_in incoming connections and L_out outgoing 
+%   connections. 
+%
+%   Note that W should be set to a matrix of size(L_out, 1 + L_in) as
+%   the column row of W handles the "bias" terms
+%
+
+% You need to return the following variables correctly 
+% epsilon_init  =  0.12;
+% epsilon_init  =  0.12;
+epsilon_init = 1/sqrt(L_in);
+W  =  rand(L_out,  1  +  L_in)  *  2  *  epsilon_init - epsilon_init;
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Initialize W randomly so that we break the symmetry while
+%               training the neural network.
+%
+% Note: The first row of W corresponds to the parameters for the bias units
+%
+
+
+
+
+
+
+
+
+
+% =========================================================================
+
+end
--- a/model_training/CCNF/CCNF/lib/similarityNeighbor.m
+++ b/model_training/CCNF/CCNF/lib/similarityNeighbor.m
@@ -0,0 +1,15 @@
+function [ SimilarityMatrix ] = similarityNeighbor( x, n, ~)
+%similarityNeighbor Create a link for the n'th neighbour
+
+    sz = size(x,1);
+    SimilarityMatrix = eye(sz);
+
+    i = 1:sz-n;
+    SimilarityMatrix(sub2ind([sz, sz], i+n,i)) = 1;
+    SimilarityMatrix(sub2ind([sz, sz], i,i+n)) = 1;
+    
+    DiagMask = ones(size(x, 1)) - eye(size(x,1));
+    SimilarityMatrix = SimilarityMatrix .* DiagMask;
+    SimilarityMatrix = SimilarityMatrix + eye(size(x, 1));
+    
+end
--- a/model_training/CCNF/CCNF/lib/similarity_neighbor_grid.m
+++ b/model_training/CCNF/CCNF/lib/similarity_neighbor_grid.m
@@ -0,0 +1,59 @@
+function [ SimilarityMatrix ] = similarity_neighbor_grid( x, side, types)
+%similarity_neighbor_grid Create a neighbourhood similarities (for a grid)
+
+    % this assumes that the patch is laid out with first column, then second
+    % column, ... final column (column major)
+
+    SimilarityMatrix = eye(side*side);
+
+    % types - 1 - horizontal, 2 - vertical, 3 - diagonal (bl-tr), 4 -
+    % diagonal (br - tl)
+    for t=1:numel(types)
+
+        if(types(t) == 1)
+            
+            % for horizontal we want to link both neighbours 
+            % (which are offset from the points by height)
+            i = 1:(side*side-side);
+            % create the neighboring links for i
+            SimilarityMatrix(sub2ind([side^2, side^2], i, i+side)) = 1;
+            SimilarityMatrix(sub2ind([side^2, side^2], i+side, i)) = 1;            
+                        
+        end        
+        if(types(t) == 2)
+            
+            % for vertical we want to link both neighbours except at edge
+            % cases which are mod(y_loc,side) = 0 as they are at the edges
+            i = 1:side*side;
+            i_to_rem = i(mod(i, side) == 0);
+            i_both = setdiff(i, i_to_rem);
+            % create the neighboring links for i
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both+1, i_both)) = 1;
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both, i_both+1)) = 1;            
+                        
+        end        
+        if(types(t) == 3)
+            
+            % for diagonal to top right, and bottom left don't use right most column
+            i = 1:(side^2)-side;
+            i_to_rem = i(mod(i-1, side) == 0);
+            i_both = setdiff(i, i_to_rem);
+            % create the neighboring links for i
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both+side-1, i_both)) = 1;
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both, i_both+side-1)) = 1;                                    
+        end
+        if(types(t) == 4)
+            
+            % for diagonal to top left, and bottom right don't use right most column
+            i = 1:(side^2)-side;
+            i_to_rem = i(mod(i, side) == 0);
+            i_both = setdiff(i, i_to_rem);
+            % create the neighboring links for i
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both+side+1, i_both)) = 1;
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both, i_both+side+1)) = 1;                   
+                        
+        end
+        
+    end        
+    assert(isequal(SimilarityMatrix, SimilarityMatrix'));
+end
--- a/model_training/CCNF/CCNF/lib/similarity_neighbor_grid_further.m
+++ b/model_training/CCNF/CCNF/lib/similarity_neighbor_grid_further.m
@@ -0,0 +1,69 @@
+function [ SimilarityMatrix ] = similarity_neighbor_grid_further( x, side, types, dist)
+%similarity_neighbor_grid_further Summary of this function goes here
+
+    % this assumes that the patch is laid out with first column, then second
+    % column, ... final column (column major)
+
+%     dist = 2;
+    SimilarityMatrix = eye(side*side);
+
+    % types - 1 - horizontal, 2 - vertical, 3 - diagonal (bl-tr), 4 -
+    % diagonal (br - tl)
+    for t=1:numel(types)
+
+        if(types(t) == 1)
+            
+            % for horizontal we want to link both neighbours 
+            % (which are offset from the points by height)
+            i = 1:(side*side-side*dist);
+            % create the neighboring links for i
+            SimilarityMatrix(sub2ind([side^2, side^2], i, i+side*dist)) = 1;
+            SimilarityMatrix(sub2ind([side^2, side^2], i+side*dist, i)) = 1;            
+                        
+        end        
+        if(types(t) == 2)
+            
+            % for vertical we want to link both neighbours except at edge
+            % cases which are mod(y_loc,side) = 0 as they are at the edges
+            i = 1:side*side;
+            i_to_rem =[];
+            for s=1:dist
+                i_to_rem = union(i_to_rem, i(mod(i+s-1, side) == 0));
+            end
+            i_both = setdiff(i, i_to_rem);
+            % create the neighboring links for i
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both+dist, i_both)) = 1;
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both, i_both+dist)) = 1;
+                        
+        end        
+        if(types(t) == 3)
+            
+            % for diagonal to top right, and bottom left don't use right most column
+            i = 1:(side^2)-dist * side;
+            i_to_rem = [];
+            for s=1:dist
+                i_to_rem = union(i_to_rem,i(mod(i-s, side) == 0));
+            end
+            i_both = setdiff(i, i_to_rem);
+            % create the neighboring links for i
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both+dist*side-dist, i_both)) = 1;
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both, i_both+dist*side-dist)) = 1;                                    
+        end
+        if(types(t) == 4)
+            
+            % for diagonal to top left, and bottom right don't use right most column
+            i = 1:(side^2)-dist*side;
+            i_to_rem = [];
+            for s=1:dist
+                i_to_rem = union(i_to_rem, i(mod(i+s-1, side) == 0));
+            end
+            i_both = setdiff(i, i_to_rem);
+            % create the neighboring links for i
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both+dist*side+dist, i_both)) = 1;
+            SimilarityMatrix(sub2ind([side^2, side^2], i_both, i_both+dist*side+ dist)) = 1;                   
+                        
+        end
+        
+    end        
+    assert(isequal(SimilarityMatrix, SimilarityMatrix'));
+end
--- a/model_training/CCNF/CCNF/lib/sparsity_grid.m
+++ b/model_training/CCNF/CCNF/lib/sparsity_grid.m
@@ -0,0 +1,22 @@
+function [ SparsityMatrix ] = sparsity_grid( x, side, width, width_end)
+%sparsity_grid Summary of this function goes here
+%   Detailed explanation goes here
+  
+    % width and width-end define the start and end for the sparsity (or
+    % similarity) grid, allowing to control enforced smoothnes and
+    % sparsity/inhibition
+
+    SimilarityMatrix = zeros(side*side);
+    for i=1:width
+        SimilarityMatrix = (similarity_neighbor_grid_further(x, side, [1,2,3,4], i) | SimilarityMatrix);
+    end
+    
+    SimilarityMatrix_end = zeros(side*side);
+    for i=1:width_end
+        SimilarityMatrix_end = (similarity_neighbor_grid_further(x, side, [1,2,3,4], i) | SimilarityMatrix_end);
+    end
+    
+    SparsityMatrix = double(SimilarityMatrix_end & (~SimilarityMatrix));
+    
+    assert(isequal(SparsityMatrix, SparsityMatrix'));
+end
--- a/model_training/CCNF/CCRF/lib/CCRF_training_bfgs.m
+++ b/model_training/CCNF/CCRF/lib/CCRF_training_bfgs.m
@@ -0,0 +1,57 @@
+function [ alphas, betas, scaling, finalLikelihood] = CCRF_training_bfgs( num_seqs, thresholdX, thresholdFun, x, y, yUnnormed, alphas, betas, lambda_a, lambda_b, similarityFNs, Precalc_Bs, Precalc_Bs_flat, Precalc_yBys, varargin)
+%GRADIENTDESCENTCCRF Performs CCRF gradient descen given the initial state
+%and gradient descent parameters
+%   Detailed explanation goes here
+
+    % if these are not provided calculate them, TODO this might be
+    
+    % It is possible to predefine the component B^(k) required 
+    % to compute B term and partial derivatives, also can predefine yB^(k)y,
+    % as they also do not change through the iterations
+    if(sum(strcmp(varargin,'PrecalcBs')) && sum(strcmp(varargin,'PrecalcBsFlat'))...
+             && sum(strcmp(varargin,'Precalc_yBy')))
+         
+        ind = find(strcmp(varargin,'PrecalcBs')) + 1;
+        Precalc_Bs = varargin{ind};
+
+        ind = find(strcmp(varargin,'PrecalcBsFlat')) + 1;
+        Precalc_Bs_flat = varargin{ind};
+
+        ind = find(strcmp(varargin,'Precalc_yBys')) + 1;
+        Precalc_yBys = varargin{ind};
+    else
+        % if these are not provided calculate them        
+        [ ~, Precalc_Bs, Precalc_Bs_flat, Precalc_yBys ] = CalculateSimilarities( num_seqs, x, similarityFNs, y);
+    end              
+    
+    params = [alphas; betas];
+    
+    objectiveFun = @(params)objectiveFunction(params, numel(alphas), lambda_a, lambda_b, Precalc_Bs, x, y, Precalc_yBys, Precalc_Bs_flat);
+
+    options = optimset('Algorithm','interior-point','GradObj','on', 'TolX', thresholdX, 'TolFun', thresholdFun, 'Hessian', 'bfgs', 'display','off', 'useParallel', 'Always');
+    
+    if(sum(strcmp(varargin,'max_iter'))) 
+        options.MaxIter = varargin{find(strcmp(varargin,'max_iter')) + 1};
+    end      
+
+    params = fmincon(objectiveFun, params, [], [],[],[], zeros(numel(params),1), Inf(numel(params), 1), [], options);
+    alphas = params(1:numel(alphas));
+    betas = params(numel(alphas)+1:end);
+
+    finalLikelihood = LogLikelihoodCCRF(y, x, alphas, betas, lambda_a, lambda_b, Precalc_Bs_flat);
+%     fprintf('Final log likelihood at iteration; logL %f, learning rate\n', finalLikelihood);
+    
+    % establish the scaling
+    scaling = getScaling2(alphas, betas, x, yUnnormed, Precalc_Bs);
+
+end
+
+function [loss, gradient] = objectiveFunction(params, numAlpha, lambda_a, lambda_b, PrecalcBs, x, y, Precalc_yBys, PrecalcBsFlat)
+    
+    alphas = params(1:numAlpha);
+    betas = params(numAlpha+1:end);
+    [gradient, SigmaInvs, CholDecomps, Sigmas] = gradientCCRFFull(params, lambda_a, lambda_b, PrecalcBs, x, y, Precalc_yBys, PrecalcBsFlat);
+    % as bfgs does gradient descent rather than ascent, negate the results
+    gradient = -gradient;
+    loss = -LogLikelihoodCCRF(y, x, alphas, betas, lambda_a, lambda_b, PrecalcBsFlat, SigmaInvs, CholDecomps, Sigmas);
+end
--- a/model_training/CCNF/CCRF/lib/CCRF_training_gradient_descent.m
+++ b/model_training/CCNF/CCRF/lib/CCRF_training_gradient_descent.m
@@ -0,0 +1,122 @@
+function [ alphas, betas, scaling, finalLikelihood] = CCRF_training_gradient_descent( nIterations, nExamples, learningRate, threshold, x, y, yUnnormed, masks, alphas, betas, lambda_a, lambda_b, similarityFNs, useIndicators, verbose)
+%GRADIENTDESCENTCCRF Performs CCRF gradient descen given the initial state
+%and gradient descent parameters
+%   Detailed explanation goes here
+
+    if(verbose)
+        logLikelihood = zeros(round(nIterations/10)+1, 1);
+        alphaTrack = zeros(nIterations, numel(alphas));
+        betaTrack = zeros(nIterations, numel(betas));
+    end
+
+    logAlphas = log(alphas);
+    logBetas = log(betas);
+
+    K = numel(similarityFNs);
+    
+    %calculate similarity measures for each of the sequences
+    Similarities = cell(nExamples, 1);
+    PrecalcQ2s = cell(nExamples,1);
+    PrecalcQ2sFlat = cell(nExamples,1);
+    
+    PrecalcYqDs = zeros(nExamples, K);
+    
+    for q = 1 : nExamples
+
+        yq = y{q};
+        xq = x{q};
+        mask = masks{q};
+        
+        n = size(yq, 1);
+        Similarities{q} = zeros([n, n, K]);
+%         PrecalcQ2s{q} = zeros([n, n, K]);
+        PrecalcQ2s{q} = cell(K,1);
+%         PrecalcQ2sFlat{q} = cell(K,1);
+        PrecalcQ2sFlat{q} = zeros((n*(n+1))/2,K);
+        % go over all of the similarity metrics and construct the
+        % similarity matrices
+        for k=1:K
+            Similarities{q}(:,:,k) = similarityFNs{k}(xq, mask);
+            S = Similarities{q}(:,:,k);
+            D =  diag(sum(S));
+            B = D - S;
+%             PrecalcQ2s{q}(:,:,k) = B;
+            PrecalcQ2s{q}{k} = B;
+%             PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
+            PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
+            PrecalcYqDs(q,k) = -yq'*B*yq;
+        end
+    end    
+    
+    %stochastic gradient descent
+    for iter = 1 : nIterations
+        prevAlphas = alphas;
+        prevBetas = betas;        
+
+        for q = 1 : nExamples
+
+            yq = y{q};
+            xq = x{q};
+            mask = masks{q};
+
+            PrecalcQ2 = PrecalcQ2s{q};
+            PrecalcQ2Flat = PrecalcQ2sFlat{q};
+            [ logGradientsAlphas, logGradientsBetas] = gradientCCRF(alphas, betas, lambda_a, lambda_b, PrecalcQ2, xq, yq, mask, PrecalcYqDs(q, :), useIndicators, PrecalcQ2Flat);
+            
+%             [logGradientAlphasAnalytical, logGradientBetasAnalytical] = gradientAnalytical(PrecalcQ2, alphas, betas, lambda, xq, yq, mask);
+%  
+%             diffInGradientsAlpha = mean(abs(logGradientsAlphas - logGradientAlphasAnalytical));
+%             diffInGradientsBeta = mean(abs(logGradientsBetas - logGradientBetasAnalytical));
+            
+            %update log alpha
+            logAlphas = logAlphas + learningRate * logGradientsAlphas;
+            alphas = exp(logAlphas);
+
+            %update log beta
+            logBetas = logBetas + learningRate * logGradientsBetas;
+            betas = exp(logBetas);
+
+            if(verbose)
+                %record alpha and beta values for each iteration for debug purposes
+                alphaTrack(iter,:) = alphas(:);
+                betaTrack(iter,:) = betas;
+            end
+        end
+
+        %check for convergence 
+        if (norm([prevAlphas;prevBetas] - [alphas;betas])/norm([prevAlphas;prevBetas]) < threshold || norm([logGradientsAlphas;logGradientsBetas]) < threshold)
+            break;
+        end
+        
+        if(verbose)
+            if(mod(iter, 10)==0)
+                logLikelihood(iter/10 + 1) = LogLikelihoodCCRF(y, x, masks, alphas, betas, lambda_a, lambda_b, PrecalcQ2sFlat, useIndicators);
+                fprintf('Iteration %d; logL %f\n', iter, logLikelihood(iter/10 + 1));
+            end
+            
+        end
+    end
+
+    % establish the scaling
+    scaling = getScaling(alphas, betas, x, yUnnormed, masks, PrecalcQ2s, useIndicators);
+
+    if(verbose)  
+        figure
+        subplot(1,3,1)
+        plot(betaTrack(1:iter,:));
+        title('beta');
+        subplot(1,3,2)
+        plot(alphaTrack(1:iter,:))
+        title('alpha');
+        subplot(1,3,3)
+        plot(logLikelihood(1:round(iter/10),:))
+        title('log likelihood');
+        finalLikelihood = LogLikelihoodCCRF(y, x, masks, alphas, betas, lambda_a, lambda_b, PrecalcQ2sFlat, useIndicators);
+        fprintf('Final log likelihood at iteration %d; logL %f, learning rate %f\n', iter, finalLikelihood, learningRate);
+    else
+        finalLikelihood = LogLikelihoodCCRF(y, x, masks, alphas, betas, lambda_a, lambda_b, PrecalcQ2sFlat, useIndicators);
+        fprintf('Final log likelihood at iteration %d; logL %f, learning rate %f\n', iter, finalLikelihood, learningRate);
+    end
+    
+end
+
--- a/model_training/CCNF/CCRF/lib/CalcSigmaCCRF.m
+++ b/model_training/CCNF/CCRF/lib/CalcSigmaCCRF.m
@@ -0,0 +1,50 @@
+function [ SigmaInv] = CalcSigmaCCRF(alphas, betas, precalcBwithoutBeta )
+%CALCSIGMAPRF Summary of this function goes here
+%   Detailed explanation goes here
+% constructing the sigma
+ 
+    % the number of elements in a current sequence
+    n = size(precalcBwithoutBeta{1},1);
+
+    q1 = sum(alphas) * eye(n);
+ 
+    % the above code can be simplified by the following 2 lines of the
+    % inner loop, we want to do that for every beta however
+    K2 = numel(betas);
+
+    q2 = zeros([n,n]);
+
+    % calculating the q2 from the paper
+    for i=1:K2
+
+        % We're basically performing the following calculation, but use
+        % precalculated D - S instead of doing it every iteration
+%         S = Similarities(:,:,i);
+%         D =  diag(sum(S));
+%         q = betas(i) * D - betas(i) * S;
+%         q2s(:,:,i) = q;
+%         q2 = q2 + betas(i)*precalcQ2withoutBeta(:,:,i);
+        q2 = q2 + betas(i)*precalcBwithoutBeta{i};
+    end
+    % This is another alternative, does not seem to be faster
+%     q2old = sum(bsxfun(@times, precalcQ2withoutBeta, reshape(betas,[1,1,K2])),3);
+
+%     q2 = sum(q2s, 3);
+%     % An alternative way of calculating the above could be using bsxfun,
+%     but this seems to be actually slower than using it
+%     S = bsxfun(@times, Similarities, -reshape(betas,[1,1,K2]));
+% 
+%     % now need the diagonals
+%     d = sum(Similarities);
+% 
+%     I = repmat(eye(n), [1, 1, K2]);
+%     I = bsxfun(@times, I, reshape(betas,[1,1,K2]));
+%     D = bsxfun(@times, I, d);
+% 
+%     q2s = D + S;
+%     q2 = sum(q2s2,3);
+    
+    SigmaInv = 2 * (q1 + q2);
+
+end
+
--- a/model_training/CCNF/CCRF/lib/CalcSigmaCCRFflat.m
+++ b/model_training/CCNF/CCRF/lib/CalcSigmaCCRFflat.m
@@ -0,0 +1,26 @@
+function [ SigmaInv] = CalcSigmaCCRFflat(alphas, betas, n, PrecalcB_flat)
+%CALCSIGMAPRF Summary of this function goes here
+%   Detailed explanation goes here
+% constructing the Sigma (that is laid out in an efficient way for
+% symmertic matrices
+ 
+    A = sum(alphas) * eye(n);
+
+    % calculating the B from the paper
+    % using the precalculated lower triangular elements of B without beta
+    Btmp = PrecalcB_flat * betas;        
+    
+    % not faster
+
+    % now make it into a square symmetric matrix
+    B = zeros(n,n);
+    on = tril(true(n,n));
+    B(on) = Btmp;
+    B = B';
+    B(on) = Btmp;
+    
+    % Combine A and B
+    SigmaInv = 2 * (A + B);
+
+end
+
--- a/model_training/CCNF/CCRF/lib/CalcbCCRF.m
+++ b/model_training/CCNF/CCRF/lib/CalcbCCRF.m
@@ -0,0 +1,14 @@
+function b = CalcbCCRF( alpha, x)
+%CALCBPRF Summary of this function goes here
+%   Detailed explanation goes here
+
+%     b = zeros(size(x,1),1);
+% 
+%     for i=1:size(x,1)
+%        b(i) = 2 *  x(i,:) * alpha; 
+%     end
+
+    % vectorising above code
+    b = 2 * x * alpha;
+end
+
--- a/model_training/CCNF/CCRF/lib/CalculateSimilarities.m
+++ b/model_training/CCNF/CCRF/lib/CalculateSimilarities.m
@@ -0,0 +1,85 @@
+function [ Similarities, PrecalcQ2s, PrecalcQ2sFlat, PrecalcYqDs ] = CalculateSimilarities( n_sequences, x, similarityFNs, y)
+%CALCULATESIMILARITIES Summary of this function goes here
+%   Detailed explanation goes here
+
+    K = numel(similarityFNs);
+    
+    %calculate similarity measures for each of the sequences
+    Similarities = cell(n_sequences, 1);
+    PrecalcQ2s = cell(n_sequences,1);
+    PrecalcQ2sFlat = cell(n_sequences,1);
+    
+    PrecalcYqDs = zeros(n_sequences, K);
+    
+    if(iscell(x))
+        for q = 1 : n_sequences
+
+            xq = x{q};
+
+            n = size(xq, 1);
+            Similarities{q} = zeros([n, n, K]);
+            
+            PrecalcQ2s{q} = cell(K,1);
+
+            PrecalcQ2sFlat{q} = zeros((n*(n+1))/2,K);
+            % go over all of the similarity metrics and construct the
+            % similarity matrices
+
+            if(nargin > 3)
+                yq = y{q};
+            end
+
+            for k=1:K
+                Similarities{q}(:,:,k) = similarityFNs{k}(xq);
+                S = Similarities{q}(:,:,k);
+                D =  diag(sum(S));
+    %             PrecalcQ2s{q}(:,:,k) = D - S;
+                PrecalcQ2s{q}{k} = D - S;
+                B = D - S;
+    %             PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
+                PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
+                if(nargin > 3)        
+                    PrecalcYqDs(q,k) = -yq'*B*yq;
+                end
+            end
+        end
+    else
+        sample_length = size(x,2)/n_sequences;
+        for q = 1 : n_sequences
+
+            beg_ind = (q-1)*sample_length + 1;
+            end_ind = q*sample_length;
+            
+            % don't take the bias term
+            xq = x(2:end, beg_ind:end_ind);
+
+            Similarities{q} = zeros([sample_length, sample_length, K]);
+            
+            PrecalcQ2s{q} = cell(K,1);
+
+            PrecalcQ2sFlat{q} = zeros((sample_length*(sample_length+1))/2,K);
+            
+            % go over all of the similarity metrics and construct the
+            % similarity matrices
+
+            if(nargin > 3)
+                yq = y(:,q);
+            end
+
+            for k=1:K
+                Similarities{q}(:,:,k) = similarityFNs{k}(xq);
+                S = Similarities{q}(:,:,k);
+                D =  diag(sum(S));
+    %             PrecalcQ2s{q}(:,:,k) = D - S;
+                PrecalcQ2s{q}{k} = D - S;
+                B = D - S;
+    %             PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
+                PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
+                if(nargin > 3)        
+                    PrecalcYqDs(q,k) = -yq'*B*yq;
+                end
+            end
+        end        
+    end
+end
+
--- a/model_training/CCNF/CCRF/lib/CalculateSimilarities_sparsity.m
+++ b/model_training/CCNF/CCRF/lib/CalculateSimilarities_sparsity.m
@@ -0,0 +1,173 @@
+function [ Similarities, PrecalcQ2s, PrecalcQ2sFlat, PrecalcYqDs ] = CalculateSimilarities_sparsity( n_sequences, x, similarityFNs, sparsityFNs, y, const)
+%CALCULATESIMILARITIES Summary of this function goes here
+%   Detailed explanation goes here
+
+    K = numel(similarityFNs);
+    K2 = numel(sparsityFNs);
+    
+    %calculate similarity measures for each of the sequences
+    Similarities = cell(n_sequences, 1);
+    PrecalcQ2s = cell(n_sequences,1);
+    PrecalcQ2sFlat = cell(n_sequences,1);
+    
+    PrecalcYqDs = zeros(n_sequences, K + K2);
+    
+    if(iscell(x))
+        for q = 1 : n_sequences
+
+            xq = x{q};
+
+            n = size(xq, 1);
+            Similarities{q} = zeros([n, n, K+K2]);
+            
+            PrecalcQ2s{q} = cell(K+K2,1);
+
+            PrecalcQ2sFlat{q} = zeros((n*(n+1))/2,K+K2);
+            % go over all of the similarity metrics and construct the
+            % similarity matrices
+
+            if(nargin > 4)
+                yq = y{q};
+            end
+
+            for k=1:K
+                Similarities{q}(:,:,k) = similarityFNs{k}(xq);
+                S = Similarities{q}(:,:,k);
+                D =  diag(sum(S));
+    %             PrecalcQ2s{q}(:,:,k) = D - S;
+                PrecalcQ2s{q}{k} = D - S;
+                B = D - S;
+    %             PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
+                PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
+                if(nargin > 4)        
+                    PrecalcYqDs(q,k) = -yq'*B*yq;
+                end
+            end
+            for k=1:K2
+                Similarities{q}(:,:,K+k) = sparsityFNs{k}(xq);
+                S = Similarities{q}(:,:,K+k);
+                D =  diag(sum(S));
+    %             PrecalcQ2s{q}(:,:,k) = D - S;
+                PrecalcQ2s{q}{K+k} = D + S;
+                B = D +  S;
+    %             PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
+                PrecalcQ2sFlat{q}(:,K+k) = B(logical(tril(ones(size(S)))));
+                if(nargin > 4)        
+                    PrecalcYqDs(q,K+k) = -yq'*B*yq;
+                end
+            end            
+        end
+    elseif(~const)
+        sample_length = size(x,2)/n_sequences;
+
+        similarities = cell(K, 1);
+        sparsities = cell(K2, 1);
+
+        for q = 1 : n_sequences
+
+            beg_ind = (q-1)*sample_length + 1;
+            end_ind = q*sample_length;
+            
+            % don't take the bias term
+            xq = x(2:end, beg_ind:end_ind);
+
+            Similarities{q} = zeros([sample_length, sample_length, K+K2]);
+            
+            PrecalcQ2s{q} = cell(K+K2,1);
+
+            PrecalcQ2sFlat{q} = zeros((sample_length*(sample_length+1))/2,K+K2);
+            
+            % go over all of the similarity metrics and construct the
+            % similarity matrices
+
+            if(nargin > 4)
+                yq = y(:,q);
+            end
+
+            for k=1:K
+                if(q==1)
+                    similarities{k} = similarityFNs{k}(xq);
+                end
+                Similarities{q}(:,:,k) = similarities{k};
+                S = Similarities{q}(:,:,k);
+                D =  diag(sum(S));
+    %             PrecalcQ2s{q}(:,:,k) = D - S;
+                PrecalcQ2s{q}{k} = D - S;
+                B = D - S;
+    %             PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
+                PrecalcQ2sFlat{q}(:,k) = B(logical(tril(ones(size(S)))));
+                if(nargin > 4)        
+                    PrecalcYqDs(q,k) = -yq'*B*yq;
+                end
+            end
+            for k=1:K2
+                % this is constant so don't need to recalc
+                if(q==1)
+                   sparsities{k} = sparsityFNs{k}(xq);
+                end
+                
+                Similarities{q}(:,:,K+k) = sparsities{k};
+                S = Similarities{q}(:,:,K+k);
+                D =  diag(sum(S));
+    %             PrecalcQ2s{q}(:,:,k) = D - S;
+                PrecalcQ2s{q}{K+k} = D + S;
+                B = D +  S;
+    %             PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
+                PrecalcQ2sFlat{q}(:,K+k) = B(logical(tril(ones(size(S)))));
+                if(nargin > 4)        
+                    PrecalcYqDs(q,K+k) = -yq'*B*yq;
+                end
+            end
+
+        end
+    else
+        sample_length = size(x,2)/n_sequences;
+
+        similarities = cell(K, 1);
+        sparsities = cell(K2, 1);
+        
+        PrecalcQ2s = {cell(K+K2,1)};
+        PrecalcQ2sFlat = {zeros((sample_length*(sample_length+1))/2,K+K2)};
+        Similarities = {zeros([sample_length, sample_length, K+K2])};
+            
+        beg_ind = 1;
+        end_ind = sample_length;
+
+        % don't take the bias term
+        xq = x(2:end, beg_ind:end_ind);
+
+        % go over all of the similarity metrics and construct the
+        % similarity matrices
+        for k=1:K
+            similarities{k} = similarityFNs{k}(xq);
+
+            Similarities{1}(:,:,k) = similarities{k};
+            S = Similarities{1}(:,:,k);
+            D =  diag(sum(S));
+            PrecalcQ2s{1}{k} = D - S;
+            B = D - S;
+            % flatten the symmetric matrix to save space
+            PrecalcQ2sFlat{1}(:,k) = B(logical(tril(ones(size(S)))));
+            if(nargin > 4)
+                PrecalcYqDs(:,k) = diag(-y'*B*y);
+            end
+        end
+        for k=1:K2
+            % this is constant so don't need to recalc
+            sparsities{k} = sparsityFNs{k}(xq);
+
+            Similarities{1}(:,:,K+k) = sparsities{k};
+            S = Similarities{1}(:,:,K+k);
+            D =  diag(sum(S));
+%             PrecalcQ2s{q}(:,:,k) = D - S;
+            PrecalcQ2s{1}{K+k} = D + S;
+            B = D +  S;
+%             PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
+            PrecalcQ2sFlat{1}(:,K+k) = B(logical(tril(ones(size(S)))));
+            if(nargin > 4)        
+                PrecalcYqDs(:,K+k) = diag(-y'*B*y);
+            end
+        end   
+    end
+end
+
--- a/model_training/CCNF/CCRF/lib/CalculateYqDs.m
+++ b/model_training/CCNF/CCRF/lib/CalculateYqDs.m
@@ -0,0 +1,54 @@
+function [ PrecalcYqDs ] = CalculateYqDs( n_sequences, x, similarityFNs, sparsityFNs, y)
+%CALCULATESIMILARITIES Summary of this function goes here
+%   Detailed explanation goes here
+
+    K = numel(similarityFNs);
+    K2 = numel(sparsityFNs);
+    
+    PrecalcYqDs = zeros(n_sequences, K + K2);
+    
+
+    sample_length = size(y,1);
+
+    similarities = cell(K, 1);
+    sparsities = cell(K2, 1);
+
+    Similarities = zeros([sample_length, sample_length, K+K2]);
+
+    Bs = zeros([sample_length, sample_length, K+K2]);
+
+    for k=1:K
+        similarities{k} = similarityFNs{k}(x);
+        Similarities(:,:,k) = similarities{k};
+        S = Similarities(:,:,k);
+        D =  diag(sum(S));
+        Bs(:,:,k) = D - S;
+
+    end    
+    
+    for k=1:K2
+        % this is constant so don't need to recalc
+        sparsities{k} = sparsityFNs{k}(x);
+        
+        Similarities(:,:,K+k) = sparsities{k};
+        S = Similarities(:,:,K+k);
+        D =  diag(sum(S));
+        %             PrecalcQ2s{q}(:,:,k) = D - S;
+        Bs(:,:,K+k) = D + S;
+        %             PrecalcQ2sFlat{q}{k} = PrecalcQ2s{q}{k}(logical(tril(ones(size(S)))));
+
+    end
+    
+    for q = 1 : n_sequences
+
+        % go over all of the similarity metrics and construct the
+        % similarity matrices
+        yq = y(:,q);
+   
+        for k=1:K+K2
+            PrecalcYqDs(q,k) = -yq'*Bs(:,:,k)*yq;                       
+        end
+
+    end
+end
+
--- a/model_training/CCNF/CCRF/lib/LogLikelihoodCCRF.m
+++ b/model_training/CCNF/CCRF/lib/LogLikelihoodCCRF.m
@@ -0,0 +1,48 @@
+function logL = LogLikelihoodCCRF(y_coll, x_coll, alphas, betas,...
+                                  lambda_a,lambda_b, PrecalcBsFlat,...
+                                  SigmaInvs, ChDecomps, Sigmas)
+% Calculating the log likelihood of the CCRF with multi alpha and beta    
+
+Q = numel(y_coll);
+logL = 0;
+for q=1:Q
+    
+    yq = y_coll{q};
+    xq = x_coll{q};
+    
+    n = size(xq, 1);
+      
+    b = CalcbCCRF(alphas, xq);
+            
+    % constructing the sigma inverse
+    if(nargin < 11)
+        [SigmaInv] = CalcSigmaCCRFflat(alphas, betas, n, PrecalcBsFlat{q});
+        L = chol(SigmaInv);        
+        mu = SigmaInv \ b;
+    else
+        SigmaInv = SigmaInvs{q};
+        L = ChDecomps{q};
+        Sigma = Sigmas{q};        
+        mu = Sigma * b;
+    end    
+
+    % normalisation = 1/((2*pi)^(n/2)*sqrt(det(Sigma)));
+    % Removing the division by pi, as it is constant
+    % normalisation = 1/(sqrt(det(sigma)));
+    % flipping around determinant of SigmaInv, as det(inv(Sigma)) = inv(det(Sigma)  
+%     normalisation = log(sqrt(det(SigmaInv)));
+
+    % normalisation 2 using Cholesky decomposition
+    normalisation2 = sum(log(diag(L))); % no times 2 here as we calculate the square root of determinant
+
+    % probq = normalisation * exp(-0.5 * (y - mu)'*SigmaInv*(y-mu));
+    % applying a logarithm to this leads to
+%     logLq = log(normalisation) + (-0.5 * (yq - mu)'*SigmaInv*(yq-mu));
+    logLq = normalisation2 + (-0.5 * (yq - mu)'*SigmaInv*(yq-mu));
+  
+    logL = logL + logLq;
+    
+end
+
+% add regularisation term
+logL = logL -lambda_b * (betas'*betas)/2 - lambda_a * (alphas'*alphas)/2;
--- a/model_training/CCNF/CCRF/lib/evaluateCCRFmodel.m
+++ b/model_training/CCNF/CCRF/lib/evaluateCCRFmodel.m
@@ -0,0 +1,83 @@
+function [ correlations, rms, meanCorr, meanRMS, longCorr, longRMS, predictions, gt ] = evaluateCCRFmodel( alphas, betas, x, xOffsets, y, similarityFNs, scaling, verbose, PrecalcBsFlat)
+%EVALUATEPRFMODEL Summary of this function goes here
+%   Detailed explanation goes here
+
+num_x_plots = 8;
+num_y_plots = 10;
+
+total_plots = num_x_plots * num_y_plots;
+
+nExamples = numel(x);
+
+if(nargin < 11)
+    [ ~, ~, PrecalcBsFlat, ~ ] = CalculateSimilarities( nExamples, x, similarityFNs);
+end
+    
+correlations = zeros(nExamples, 1);
+rms = zeros(nExamples, 1);
+
+% concatenated data for an alternative correlation
+y_predConcat = [];
+y_trueConcat = [];
+
+for q=1:nExamples
+     
+    X = x{q};
+    
+    nFrames = size(X,1);
+          
+    PrecalcBflat = PrecalcBsFlat{q};
+    
+    SigmaInv = CalcSigmaCCRFflat(alphas, betas, nFrames, PrecalcBflat);
+    b = CalcbCCRF(alphas, x{q});
+    y_est = SigmaInv \ b;
+    
+%     y_est = y_est * scaling + xOffsets(q);
+    y_est = y_est * scaling + xOffsets(q);
+
+    R = corrcoef(y_est, y{q});
+    correlations(q) = R(1,2);
+ 
+    rms(q) = sqrt( (1/nFrames) * sum((y_est - y{q}).^2) );
+    
+    y_predConcat = cat(1, y_predConcat, y_est);
+    y_trueConcat = cat(1, y_trueConcat, y{q});
+
+    if(verbose)
+
+        if(mod(q,total_plots) == 1)
+            figure;
+            remainingPlots = nExamples - q;
+            if(remainingPlots < total_plots)
+                num_y_plots = ceil(remainingPlots / num_x_plots);            
+            end            
+        end        
+        
+        subplot(num_y_plots,num_x_plots,mod(q-1,total_plots)+1);
+        t = 1:nFrames;
+        plot(t,y{q},'g',t,y_est,'b');
+        title(sprintf('C %.2f, R %.2f', correlations(q), rms(q)));
+        set(gca, 'XTick', [], 'YTick', []);
+%         legend('y_{true}','y_{ccrf}');
+    
+    end   
+    
+end
+
+meanCorr = mean(correlations); 
+meanRMS = mean(rms);
+longCorr = corr(y_predConcat, y_trueConcat).^2;
+longRMS = sqrt( (1/numel(y_predConcat)) * sum((y_predConcat - y_trueConcat).^2) );
+
+predictions = y_predConcat;
+gt = y_trueConcat;
+
+if(verbose)
+    figure
+    plot([1:numel(y_trueConcat)],y_trueConcat,'g',[1:numel(y_trueConcat)],y_predConcat,'b');
+    title(sprintf('C %.2f, R %.2f', longCorr, longRMS));
+    set(gca, 'XTick', [], 'YTick', []);
+end
+
+end
+
--- a/model_training/CCNF/CCRF/lib/getScaling.m
+++ b/model_training/CCNF/CCRF/lib/getScaling.m
@@ -0,0 +1,28 @@
+function [ scaling ] = getScaling(  alphas, betas, x, y, masks, PrecalcQ2s, useIndicator)
+%getScaling Summary of this function goes here
+%   Detailed explanation goes here
+
+% for visualisation use only the first sequence
+nExamples = numel(x);
+
+scalings = zeros(1,nExamples);
+
+for q=1:nExamples
+     
+    mask = masks{q};
+    
+    PrecalcQ2 = PrecalcQ2s{q};
+    SigmaInv = CalcSigmaCCRF(alphas, betas, PrecalcQ2, mask, useIndicator);
+
+    
+    b = CalcbCCRF(alphas, x{q}, mask, useIndicator);
+    y_est = SigmaInv \ b;
+        
+    sc = std(y{q}) / std(y_est);
+    scalings(q) = sc;
+end
+ 
+scaling = mean(scalings);
+
+end
+
--- a/model_training/CCNF/CCRF/lib/getScaling2.m
+++ b/model_training/CCNF/CCRF/lib/getScaling2.m
@@ -0,0 +1,30 @@
+function [ scaling ] = getScaling2(  alphas, betas, x, y, PrecalcBs)
+%getScaling Summary of this function goes here
+%   Detailed explanation goes here
+
+% for visualisation use only the first sequence
+nExamples = numel(x);
+
+cat_y = [];
+cat_y_pred = [];
+
+for q=1:nExamples
+     
+    
+    PrecalcB = PrecalcBs{q};
+    SigmaInv = CalcSigmaCCRF(alphas, betas, PrecalcB);
+
+    b = CalcbCCRF(alphas, x{q});
+    y_est = SigmaInv \ b;
+        
+    cat_y = cat(1, cat_y, y{q} - mean(y{q}));
+%     cat_y = cat(1, cat_y, y{q});
+    cat_y_pred = cat(1, cat_y_pred, y_est);
+    
+end
+ 
+% scaling = (max(cat_y) - min(cat_y)) / (max(cat_y_pred) - min(cat_y_pred));
+scaling = std(cat_y) / std(cat_y_pred);
+
+end
+
--- a/model_training/CCNF/CCRF/lib/gradientCCRF.m
+++ b/model_training/CCNF/CCRF/lib/gradientCCRF.m
@@ -0,0 +1,92 @@
+function [ logGradientAlphas, logGradientBetas, SigmaInv, ChDecomp ] = gradientCCRF( alphas, betas, lambda_a, lambda_b, precalcQ2withoutBeta, xq, yq, mask, precalcYQ, useIndicator, PrecalcQ2Flat)
+%GRADIENTPRF Summary of this function goes here
+%   Detailed explanation goes here
+
+    % Calculate the Sigma inverse now
+%     [SigmaInv2] = CalcSigmaCCRF(alphas, betas, precalcQ2withoutBeta, mask);
+    
+    % This is an optimised version as it does not use the whole matrix but
+    % a lower diagonal part due to symmetry
+    numElemsInSeq = size(precalcQ2withoutBeta{1}, 1);
+    [SigmaInv] = CalcSigmaCCRFflat(alphas, betas, numElemsInSeq, PrecalcQ2Flat, mask, useIndicator);
+    
+    % Get the actual sigma from out SigmaInv
+    
+    % Sigma = inv(SigmaInv);
+    % Below is an optimised version of the above using Cholesky decomposition
+    % which decomposes a matrix into a upper triangular (R) and its
+    % conjugate transpose R'; A = R'*R for real numbers, thus
+    % inv(A) = inv(R)inv(R')
+    ChDecomp=chol(SigmaInv);
+    I=eye(size(SigmaInv));    
+        
+    % Rinv = (R\I);
+    % Sigma = Rinv*Rinv';
+    % This is a very slightly faster version of the above
+    Sigma=ChDecomp\(ChDecomp'\I);
+    
+    b = CalcbCCRF(alphas, xq, mask, useIndicator);
+
+    % mu = SigmaInv \ b = Sigma * b;
+    % as we've calculate Sigma already, this is equivalent of the above
+    mu = Sigma * b;    
+   
+    logGradientAlphas = zeros(size(alphas));
+    logGradientBetas = zeros(size(betas));
+
+    K1 = numel(alphas);
+    K2 = numel(betas);
+
+    % calculating the derivative of L with respect to alpha_k        
+    for k = 1:K1
+
+        if(useIndicator)
+            dQ1da = diag(mask(:,k));
+            dbda = xq(:,k).*mask(:,k);
+            gaussGradient = -yq'*dQ1da*yq +2*yq'*dbda -2 * dbda' * mu + mu'*dQ1da*mu;
+            zGradient = Sigma(:)'*dQ1da(:);
+        else
+            % if we don't use the masks here's a speedup
+            gaussGradient = -yq'*yq +2*yq'*xq(:,k) -2 * xq(:,k)' * mu + sum(mu.^2);
+                    
+            % simplification as trace(Sigma * I) = trace(Sigma)
+            zGradient = trace(Sigma);
+        end
+        
+        % add the Z derivative now
+        dLda = zGradient + gaussGradient;
+        
+        % add regularisation
+        dLda = dLda - lambda_a * alphas(k);
+ 
+        logGradientAlphas(k) = alphas(k) * dLda;
+
+    end
+
+    % This was done for gradient checking
+%   [alphasG, betaG] = gradientAnalytical(nFrames, S, alphas, beta, xq, yq, mask); 
+
+    % calculating the derivative of log(L) with respect to the betas
+    for k=1:K2
+
+        % Bs = Bs(:,:,k);
+        % dSdb = q2./betas(k); we precalculate this, as it does not change
+        % over the course of optimisation (dSdb - dSigma/dbeta)
+        dSdb = precalcQ2withoutBeta{k};
+
+        % -yq'*dSdb*yq can be precalculated as they don't change through
+        % iterations (precalcQ2withoutBeta is dSdb
+        % gaussGradient = -yq'*dSdb*yq + mu'*dSdb*mu;
+        % this does the above line
+        gaussGradient = precalcYQ(k) + mu'*dSdb*mu;
+        
+        % zGradient = trace(Sigma*dSdb);
+        zGradient = Sigma(:)'*dSdb(:); % equivalent but faster to the above line
+        dLdb = gaussGradient + zGradient;
+        
+        % add regularisation term
+        dLdb = dLdb - lambda_b * betas(k);
+        
+        logGradientBetas(k) = betas(k) * dLdb;
+    end
+end
--- a/model_training/CCNF/CCRF/lib/gradientCCRFFull.m
+++ b/model_training/CCNF/CCRF/lib/gradientCCRFFull.m
@@ -0,0 +1,39 @@
+function [ gradientParams, SigmaInvs, CholDecomps, Sigmas ] = gradientCCRFFull( params, lambda_a, lambda_b, PrecalcBs, x, y, Precalc_yBys, PrecalcBsFlat)
+%GRADIENTPRF Summary of this function goes here
+%   Detailed explanation goes here
+
+    nExamples = numel(x);
+
+    numBetas = size(PrecalcBsFlat{1},2);
+    numAlphas = numel(params) - numBetas;
+    
+    alphasInit = params(1:numAlphas);
+    betasInit = params(numAlphas+1:end);
+    gradientParams = zeros(size(params));
+    
+    % These might be use to calculate the LogLikelihood, don't want to
+    % recompute them
+    SigmaInvs = cell(nExamples, 1);
+    CholDecomps = cell(nExamples, 1);
+    Sigmas = cell(nExamples, 1);
+    gradients = zeros(nExamples, numel(params));
+    for q = 1 : nExamples
+
+        yq = y{q};
+        xq = x{q};
+
+        PrecalcB = PrecalcBs{q};
+        PrecalcB_flat = PrecalcBsFlat{q};
+        
+        [ logGradientsAlphas, logGradientsBetas, SigmaInv, CholDecomp, Sigma ] = gradientCCRF_withoutReg(alphasInit, betasInit, PrecalcB, xq, yq, Precalc_yBys(q, :), PrecalcB_flat);
+        SigmaInvs{q} = SigmaInv;
+        CholDecomps{q} = CholDecomp;
+        Sigmas{q} = Sigma;
+        
+        gradients(q,:) = [logGradientsAlphas; logGradientsBetas];
+    end
+    gradientParams = sum(gradients,1)';
+    regAlpha = alphasInit * lambda_a;
+    regBeta = betasInit * lambda_b;
+    gradientParams = gradientParams - [regAlpha; regBeta];
+end
--- a/model_training/CCNF/CCRF/lib/gradientCCRF_withoutReg.m
+++ b/model_training/CCNF/CCRF/lib/gradientCCRF_withoutReg.m
@@ -0,0 +1,76 @@
+function [ logGradientAlphas, logGradientBetas, SigmaInv, CholDecomp, Sigma ] = gradientCCRF_withoutReg( alphas, betas, precalcQ2withoutBeta, xq, yq, Precalc_yBy, PrecalcB_flat)
+%GRADIENTPRF Summary of this function goes here
+%   Detailed explanation goes here
+
+    % Calculate the Sigma inverse now
+    
+    % This is an optimised version as it does not use the whole matrix but
+    % a lower diagonal part due to symmetry
+    n = size(xq, 1);
+    [SigmaInv] = CalcSigmaCCRFflat(alphas, betas, n, PrecalcB_flat);
+    
+    % Get the actual sigma from out SigmaInv
+    
+    % Sigma = inv(SigmaInv);
+    % Below is an optimised version of the above using Cholesky decomposition
+    % which decomposes a matrix into a upper triangular (R) and its
+    % conjugate transpose R'; A = R'*R for real numbers, thus
+    % inv(A) = inv(R)inv(R')
+
+    CholDecomp=chol(SigmaInv);
+    I=eye(size(SigmaInv));    
+    
+    % This is a way of calculating it faster than just inv(SigmaInv)
+    Sigma=CholDecomp\(CholDecomp'\I);
+    b = CalcbCCRF(alphas, xq);
+
+    % mu = SigmaInv \ b = Sigma * b;
+    % as we've calculate Sigma already, this is equivalent of the above
+    mu = Sigma * b;    
+   
+    logGradientAlphas = zeros(size(alphas));
+    logGradientBetas = zeros(size(betas));
+
+    K1 = numel(alphas);
+    K2 = numel(betas);
+
+    % calculating the derivative of L with respect to alpha_k        
+    for k = 1:K1
+
+        gaussGradient = -yq'*yq +2*yq'*xq(:,k) -2 * xq(:,k)' * mu + sum(mu.^2);
+
+        % simplification as trace(Sigma * I) = trace(Sigma)
+        zGradient = trace(Sigma);
+        
+        % add the Z (partition function) derivative now
+        dLda = zGradient + gaussGradient;
+
+        logGradientAlphas(k) = dLda;
+
+    end
+
+    % This was done for gradient checking
+%   [alphasG, betaG] = gradientAnalytical(nFrames, S, alphas, beta, xq, yq, mask); 
+
+    % calculating the derivative of log(L) with respect to the betas
+    for k=1:K2
+
+        % Bs = Bs(:,:,k);
+        % dSdb = q2./betas(k); we precalculate this, as it does not change
+        % over the course of optimisation (dSdb - dSigma/dbeta)
+        dSdb = precalcQ2withoutBeta{k};
+
+        % -yq'*dSdb*yq can be precalculated as they don't change through
+        % iterations (precalcQ2withoutBeta is dSdb
+        % gaussGradient = -yq'*dSdb*yq + mu'*dSdb*mu;
+        % this does the above line
+        gaussGradient = Precalc_yBy(k) + mu'*dSdb*mu;
+        
+        % zGradient = trace(Sigma*dSdb);
+        zGradient = Sigma(:)'*dSdb(:); % equivalent but faster to the above line
+        
+        dLdb = gaussGradient + zGradient;
+        
+        logGradientBetas(k) = dLdb;
+    end
+end
--- a/model_training/CCNF/CCRF/lib/randInitializeWeights.m
+++ b/model_training/CCNF/CCRF/lib/randInitializeWeights.m
@@ -0,0 +1,35 @@
+function W = randInitializeWeights(L_in, L_out)
+%RANDINITIALIZEWEIGHTS Randomly initialize the weights of a layer with L_in
+%incoming connections and L_out outgoing connections
+%   W = RANDINITIALIZEWEIGHTS(L_in, L_out) randomly initializes the weights 
+%   of a layer with L_in incoming connections and L_out outgoing 
+%   connections. 
+%
+%   Note that W should be set to a matrix of size(L_out, 1 + L_in) as
+%   the column row of W handles the "bias" terms
+%
+
+% You need to return the following variables correctly 
+% epsilon_init  =  0.12;
+% epsilon_init  =  0.12;
+epsilon_init = 1/sqrt(L_in);
+W  =  rand(L_out,  1  +  L_in)  *  2  *  epsilon_init - epsilon_init;
+
+% ====================== YOUR CODE HERE ======================
+% Instructions: Initialize W randomly so that we break the symmetry while
+%               training the neural network.
+%
+% Note: The first row of W corresponds to the parameters for the bias units
+%
+
+
+
+
+
+
+
+
+
+% =========================================================================
+
+end
--- a/model_training/CCNF/CCRF/lib/similarityEuclidean.m
+++ b/model_training/CCNF/CCRF/lib/similarityEuclidean.m
@@ -0,0 +1,8 @@
+function SimilarityMatrix = similarityEuclidean(x)
+    %spatial distance measure
+    Distances = sqrt(pdist(x)+3e-6).^-1; % 0.05 best so far
+
+    SimilarityMatrix = squareform(Distances) + eye(size(x, 1));
+     
+end
+
--- a/model_training/CCNF/CCRF/lib/similarityGauss.m
+++ b/model_training/CCNF/CCRF/lib/similarityGauss.m
@@ -0,0 +1,25 @@
+function SimilarityMatrix = similarityGauss(x, sigma, range, mask)
+%spatial distance measure, based on exponential decay, creates a matrix of
+%similarities
+
+% get the euclidean distance for each pair
+if(numel(range) > 0)
+Distances = exp(-pdist(x(:,range))/sigma); % 0.05 best so far
+else
+Distances = exp(-pdist(x)/sigma); % 0.05 best so far    
+end
+SimilarityMatrix = squareform(Distances);
+
+% invalidate the illegal values from the mask (if at least one element is
+% not present in the mask set similarity to 0)
+if(numel(mask) ~= 0)    
+    invalidInds = sum(mask(:,range),2) < numel(range);
+
+    SimilarityMatrix(invalidInds,:) = 0;
+    SimilarityMatrix(:,invalidInds) = 0;
+end
+
+SimilarityMatrix = SimilarityMatrix + eye(size(x, 1));
+
+end
+
--- a/model_training/CCNF/CCRF/lib/similarityNeighbor.m
+++ b/model_training/CCNF/CCRF/lib/similarityNeighbor.m
@@ -0,0 +1,25 @@
+function [ SimilarityMatrix ] = similarityNeighbor( x, n, range)
+%SIMILARITYNEIGHBOR Summary of this function goes here
+%   Detailed explanation goes here
+
+    sz = size(x,1);
+    SimilarityMatrix = eye(sz);
+
+    i = 1:sz-n;
+    SimilarityMatrix(sub2ind([sz, sz], i+n,i)) = 1;
+    SimilarityMatrix(sub2ind([sz, sz], i,i+n)) = 1;
+
+    % invalidate the illegal values from the mask (if at least one element is
+    % not present in the mask set similarity to 0)
+%     if(numel(mask)~=0)
+%         invalidInds = sum(mask(:,range),2) < numel(range);
+% 
+%         SimilarityMatrix(invalidInds,:) = 0;
+%         SimilarityMatrix(:,invalidInds) = 0;
+%     end
+    
+    DiagMask = ones(size(x, 1)) - eye(size(x,1));
+    SimilarityMatrix = SimilarityMatrix .* DiagMask;
+    SimilarityMatrix = SimilarityMatrix + eye(size(x, 1));
+    
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/CCNF_ncc_response.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/CCNF_ncc_response.m
@@ -0,0 +1,78 @@
+function [ responses ] = CCNF_ncc_response( patches, patch_experts, normalisation_options, window_size, patch_length)
+%PATCHRESPONSESVM Computing a patch response from a CCNF patch expert
+%   Using convolution, for testing purposes and not for actual speed
+
+    SigmaInv = patch_experts.SigmaInv;
+    
+    patchSize = normalisation_options.patchSize;
+    
+    if(~iscell(patches))       
+        patches = {patches};
+    end
+    
+    num_modalities = numel(patches);
+    
+    responses = zeros(size(patches{1},1), patch_length);
+    
+    % prepare the patches by normalising them to zscore (if used)
+    if(normalisation_options.zscore)
+        for i=1:num_modalities
+            patches{i} = zscore(patches{i});
+        end
+    end
+    
+    for i = 1:size(patches{1},1)
+        
+        norm_cross_corr = normalisation_options.useNormalisedCrossCorr == 1;
+        
+        b = zeros(patch_length,1);
+
+        hl_per_modality = size(patch_experts.thetas,1);
+        
+        for p=1:num_modalities
+            smallRegionVec = patches{p}(i,:);
+            smallRegion = reshape(smallRegionVec, window_size(1), window_size(2));                
+
+            for hls = 1:hl_per_modality
+
+                % because the normalised cross correlation calculates the
+                % responses from a normalised template and a normalised image,
+                % normalise the thetas here and then apply the normalisation to
+                % the response
+                
+                w = patch_experts.thetas(hls, 2:end, p);
+                norm_w = norm(w);
+                w = w/norm(w);
+                w = reshape(w, patchSize);
+
+                response = -norm_w * Cross_corr_resp(smallRegion, w, norm_cross_corr, patchSize) - patch_experts.thetas(hls,1,p);
+
+                % here we include the bias term as well, as it wasn't added
+                % during the response calculation
+                h1 = 1./(1 + exp(response(:)));
+                b = b + (2 * patch_experts.alphas((p-1)*hl_per_modality + hls) * h1);
+
+            end
+        end
+        response = SigmaInv \ b;
+         
+        responses(i,:) = response(:);
+        
+    end
+    responses = responses';
+    responses = responses(:);
+end
+
+function response = Cross_corr_resp(region, patchExpert, normalise_x_corr,patchSize)
+
+    if(normalise_x_corr)
+        [response] = normxcorr2(patchExpert, region);
+        response = response(patchSize(1):end-patchSize(1)+1,patchSize(2):end-patchSize(2)+1);       
+    else        
+        % this assumes that the patch is already normed, so just use
+        % cross-correlation
+        template = rot90(patchExpert,2);
+        response = conv2(region, template, 'valid');
+        
+    end
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/Create_CCNF_Regressor.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/Create_CCNF_Regressor.m
@@ -0,0 +1,77 @@
+function [alphas, betas, thetas, similarities, sparsities] = Create_CCNF_Regressor(samples, labels, patch_length, similarity_types, sparsity_types, normalisation_options)
+%CREATESVMCLASSIFIER creating a CCNF (LNF) patch expert given labeled
+%training samples
+
+% Add the CCNF library
+addpath('../../CCNF/lib');
+
+%% Preparing the similarity and sparsity function
+   
+% this will create a family of similarity neighbour node connections
+similarities = {};
+    
+for i=1:size(similarity_types, 1)
+    type = similarity_types{i};
+    neighFn = @(x) similarity_neighbor_grid(x, sqrt(patch_length), type);
+    similarities = [similarities; {neighFn}];
+end
+
+sparsities = {};
+
+% this will create a family of sparsity (inhibition) neighbour node
+% connections
+for i=1:size(sparsity_types, 1)
+
+    spFn = @(x) sparsity_grid(x, sqrt(patch_length), sparsity_types(i,1), sparsity_types(i,2));
+    sparsities = [sparsities; {spFn}];
+end
+
+%% Default training hyper-parameters
+thresholdX = 1e-8;
+thresholdFn = 1e-4;
+max_iter = 200;
+
+input_layer_size  = size(samples, 1)-1;
+   
+% Some rule of thumb hyper-parameters if similarities are defined or not
+if(numel(similarities) == 0)
+    best_lambda_a = 10000;
+    best_lambda_b = 0;
+    best_lambda_th = 0.1;
+    best_num_layer = 5;
+else
+    best_lambda_a = 100;
+    best_lambda_b = 1000;
+    best_lambda_th = 0.1;
+    best_num_layer = 5;
+end
+
+% Checking if hyper-parameters are specified to be overriden
+if(isfield(normalisation_options, 'lambda_a'))
+    best_lambda_a = normalisation_options.lambda_a;
+end
+
+if(isfield(normalisation_options, 'lambda_b'))
+    best_lambda_b = normalisation_options.lambda_b;
+end
+
+if(isfield(normalisation_options, 'lambda_th'))
+    best_lambda_th = normalisation_options.lambda_th;
+end
+
+if(isfield(normalisation_options, 'num_layers'))
+    best_num_layer = normalisation_options.num_layers;
+end
+
+% Initial parameter values
+alphas = 1 * ones(best_num_layer,1);
+betas = 1 * ones(numel(similarities) + numel(sparsities), 1);    
+initial_Theta = randInitializeWeights(input_layer_size, best_num_layer);
+
+num_seqs = size(samples, 2)/patch_length;
+labels = reshape(labels, patch_length, num_seqs);
+
+% Actual training
+[alphas, betas, thetas] = CCNF_training_bfgs(thresholdX, thresholdFn, samples, labels, alphas, betas, initial_Theta, best_lambda_a, best_lambda_b, best_lambda_th, similarities, sparsities, 'const', true, 'reinit', true, 'num_reinit', 20, 'num_seqs', num_seqs, 'lbfgs', 'max_iter', max_iter);
+
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/EvaluatePatchExpert.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/EvaluatePatchExpert.m
@@ -0,0 +1,13 @@
+function [ meanSquaredError, correlation, predictions ] = EvaluatePatchExpert( samples, labels, alphas, betas, thetas, similarities, sparsities, normalisationOptions, region_length )
+%EVALUATEPATCHEXPERT Summary of this function goes here
+%   Detailed explanation goes here
+   
+    num_seqs = size(samples, 2)/region_length;
+                        
+    % adding the bias term, and transposing to optimise
+    labels = reshape(labels, region_length, num_seqs);
+
+    [~,~,~, ~, correlation, meanSquaredError, predictions] = evaluate_CCNF_model(alphas, betas, thetas, samples, labels, similarities, sparsities, 0, 1, false);
+
+end
+
--- a/model_training/CCNF/patch_experts/ccnf_training/ExtractTrainingSamples.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/ExtractTrainingSamples.m
@@ -0,0 +1,132 @@
+function [samples, labels, samples_unnormed, imgs_used] = ExtractTrainingSamples(examples, landmarkLoc, img_names, sigma, numSamples, landmark, normalisation_options)
+
+%%
+% for an area of interest of 19x19 and patch support region of 11x11, we
+% would have 9x9=81 samples (9 is the single_input_size, 11 is
+% patch_expert_support_size, 19x19 is normalisation_size, 9 would be the
+% normalisation_side_size)
+evaluation_size = normalisation_options.normalisationRegion;
+patch_expert_support_size = normalisation_options.patchSize;
+
+normalisation_side_size = (evaluation_size - 1)/2;
+single_input_size = evaluation_size - patch_expert_support_size + 1;
+
+% Determine the ratio of images to be sampled (most likely not all of them will be)
+samples_per_img = (numSamples / (size(examples,1) * (1 + normalisation_options.rate_negative))) / (single_input_size(1)^2);
+
+num_samples = int32(samples_per_img * (1 + normalisation_options.rate_negative) * size(examples,1) * (single_input_size(1)^2));
+
+%% Initialise the samples and labels
+samples = zeros(num_samples, patch_expert_support_size(1) * patch_expert_support_size(2));                
+labels = zeros(num_samples, 1);    
+
+%% Initialise the unnormed versions of the images
+
+% This is done in order to assert our use of algorithms for calculating
+% the responses, as for training we might use regular ml procedures,
+% whereas for fitting normalised cross-correlation or just
+% cross-correlation will be used, so keep some unnormed samples
+samples_unnormed = zeros(int32(num_samples/300), evaluation_size(1)^2);
+
+img_size = [size(examples,2), size(examples,3)];
+
+% Extract only images of differing shaped faces to extract more diverse
+% training samples
+to_keep = FindDistantLandmarks(landmarkLoc, landmark, round(samples_per_img*size(examples,1)));
+
+inds_all = 1:size(examples,1);
+samples_to_use = inds_all(to_keep);
+
+% Keep track of how many samples have been computed already
+samples_filled = 1;
+samples_unnormed_filled = 1;
+
+%% parse the image names for reporting purposes
+imgs_used = img_names(samples_to_use);
+for i=1:numel(imgs_used)
+    [~,name,ext] = fileparts(imgs_used{i});
+    imgs_used{i} = [name, ext];
+end
+for i=samples_to_use
+
+    % Do rate_negative negatives and a single positive
+    for p=1:normalisation_options.rate_negative+1
+
+        % create a gaussian
+        corrPoint = landmarkLoc(i,landmark,:);
+        
+        % Ignore occluded points
+        if(corrPoint(1) == 0)
+           break; 
+        end
+                
+        startX = 1 - corrPoint(1);
+        startY = 1 - corrPoint(2);
+
+        patchWidth = img_size(2);
+        patchHeight = img_size(1);
+
+        [X, Y] = meshgrid(startX:patchWidth + startX-1, startY:patchHeight + startY-1);
+
+        response = exp(-0.5*(X.^2+Y.^2)/(sigma^2));
+
+        % Choose positive or negative sample
+        if(p==normalisation_options.rate_negative+1)
+            sample_centre = squeeze(corrPoint) + round(1*randn(2,1));
+        else
+            sample_centre = squeeze(corrPoint) + round(10*randn(2,1));                
+        end
+
+        sample_centre = round(sample_centre);
+
+        sample_centre(sample_centre <= normalisation_side_size(1)) = normalisation_side_size(1) + 1;
+        sample_centre(sample_centre > img_size(1)-normalisation_side_size(1)) = img_size(1) - normalisation_side_size(1) - 1;
+
+        patches = squeeze(examples(i, sample_centre(2) - normalisation_side_size:sample_centre(2) + normalisation_side_size, sample_centre(1) - normalisation_side_size:sample_centre(1) + normalisation_side_size));
+        side = (single_input_size - 1)/2;
+        responses = response(sample_centre(2) - side(2):sample_centre(2) + side(2), sample_centre(1) - side(1):sample_centre(1) + side(1));
+
+        if(samples_unnormed_filled <= size(samples_unnormed,1))
+            % even if correct size is not initialised Matlab will
+            % sort that out (would only happen once anyway)
+            samples_unnormed(samples_unnormed_filled,:) = patches(:);
+            samples_unnormed_filled = samples_unnormed_filled + 1;
+        end
+
+        % if we want to normalise each patch individualy do it here
+
+        patch = im2col(patches, patch_expert_support_size, 'sliding')';
+        response = im2col(responses, [1,1], 'sliding');
+
+        labels(samples_filled:samples_filled+size(patch,1)-1,:) = response;
+
+        samples(samples_filled:samples_filled+size(patch,1)-1,:) = patch;                             
+        samples_filled = samples_filled + size(patch,1);           
+
+    end
+end
+
+if(normalisation_options.useNormalisedCrossCorr == 1)
+
+    mean_curr = mean(samples, 2);
+    patch_normed = samples - repmat(mean_curr,1, patch_expert_support_size(1)*patch_expert_support_size(2));
+
+    % Normalising the patches using the L2 norm
+    scaling = sqrt(sum(patch_normed.^2,2));
+    scaling(scaling == 0) = 1;
+
+    patch_normed = patch_normed ./ repmat(scaling, 1, patch_expert_support_size(1)*patch_expert_support_size(2));
+
+    samples = patch_normed;
+    clear 'patch_normed';
+end
+
+% Only keep the filled samples
+samples = samples(1:samples_filled-1,:);
+labels = labels(1:samples_filled-1,:);
+
+if((samples_filled-1)/(single_input_size(1)*single_input_size(2)) < size(samples_unnormed,1))
+    samples_unnormed = samples_unnormed(1:(samples_filled-1)/(single_input_size(1)*single_input_size(2)),:);
+end
+
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/FindDistantLandmarks.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/FindDistantLandmarks.m
@@ -0,0 +1,41 @@
+function [to_keep] = FindDistantLandmarks(landmarkLoc, landmark_num, num_to_keep)
+
+    % First align all of them
+    a = landmarkLoc(:,:,1);
+    b = landmarkLoc(:,:,2);
+
+    offset_x = mean(a,2);
+    offset_y = mean(b,2);
+
+    landmark_loc_off = cat(3, bsxfun(@plus, a, -offset_x), bsxfun(@plus, b, -offset_y));
+    fixed_x = landmark_loc_off(:,:,1);
+    fixed_y = landmark_loc_off(:,:,2);
+
+    % Extract the relevant landmarks
+    fixed_x_l = fixed_x(:,landmark_num);
+    fixed_y_l = fixed_y(:,landmark_num);
+    
+    obs = cat(2, fixed_x_l, fixed_y_l);
+    
+    % Discard landmarks that are very close to each other, so that we only
+    % keep more diverse images
+    D = squareform(pdist(obs));
+    
+    to_keep = true(size(landmarkLoc,1),1);
+    
+    for i = 1:(size(landmarkLoc,1) - num_to_keep)
+       
+        diversity_score = mean(D,2);
+        
+        a = min(diversity_score);
+        
+        lowest = find(diversity_score == a);
+        lowest = lowest(1);
+        
+        to_keep(lowest) = 0;
+        
+        D(:,~to_keep) = 0;
+        D(~to_keep,:) = 200;
+    end
+    
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/Parse_settings.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/Parse_settings.m
@@ -0,0 +1,120 @@
+function [ normalisation_options ] = Parse_settings( sigma, ratio_neg, num_samples, varargin)
+%PARSE_SETTINGS Summary of this function goes here
+%   Detailed explanation goes here
+
+    % creating the parameters to use when training colour (intensity) patches 
+    normalisation_options = struct;
+
+    % this is what currently is expected (although could potentially have
+    % bigger or smaller patches, this should not be bigger that the patch
+    % available in examples and negExamples
+    normalisation_options.patchSize = [11 11];    
+    
+    % The region size of a region that is taken for training around an
+    % aligned or misaligned landmark
+    if(sum(strcmp(varargin,'normalisation_size')))
+        ind = find(strcmp(varargin,'normalisation_size')) + 1;
+        normalisation_options.normalisationRegion = [varargin{ind}, varargin{ind}];
+    else
+        normalisation_options.normalisationRegion = [21 21];
+    end
+    
+    % This specifies the split of data ratios
+    normalisation_options.ccnf_ratio = 0.9; % proportion of data used for cross-validating CCNFs
+    % the rest is used for testing and provides the F1 and accuracy scores
+
+    if(any(strcmp(varargin, 'patch_types')))
+        ind = find(strcmp(varargin,'patch_types')) + 1;
+        normalisation_options.patch_type = varargin{ind}; 
+    else
+        normalisation_options.patch_type = {'reg'};         
+    end
+    
+    if(any(strcmp(varargin, 'sparsity_types')))
+        ind = find(strcmp(varargin,'sparsity_types')) + 1;
+        if(~isempty( varargin{ind}))
+            normalisation_options.sparsity = 1;
+            normalisation_options.sparsity_types = varargin{ind}; 
+        else
+            normalisation_options.sparsity = 0; 
+            normalisation_options.sparsity_types = [];
+        end
+    else
+        normalisation_options.sparsity = 0;
+        normalisation_options.sparsity_types = [];
+    end
+    
+    if(any(strcmp(varargin, 'lambda_a')))
+        ind = find(strcmp(varargin,'lambda_a')) + 1;
+        normalisation_options.lambda_a = varargin{ind};            
+    end
+    
+    if(any(strcmp(varargin, 'lambda_b')))
+        ind = find(strcmp(varargin,'lambda_b')) + 1;
+        normalisation_options.lambda_b = varargin{ind};            
+    end
+    
+    if(any(strcmp(varargin, 'lambda_th')))
+        ind = find(strcmp(varargin,'lambda_th')) + 1;
+        normalisation_options.lambda_th = varargin{ind};            
+    end
+    
+    if(any(strcmp(varargin, 'num_layers')))
+        ind = find(strcmp(varargin,'num_layers')) + 1;
+        normalisation_options.num_layers = varargin{ind};            
+    end
+    
+    if(any(strcmp(varargin, 'num_bins')))
+        ind = find(strcmp(varargin,'num_bins')) + 1;
+        normalisation_options.num_hog_bins = varargin{ind};            
+    else    
+        normalisation_options.num_hog_bins = 9;
+    end
+    
+    normalisation_options.numSamples = num_samples;
+    
+    normalisation_options.useZeroMeanPerPatch = 1;
+    normalisation_options.useNormalisedCrossCorr = 1;
+    normalisation_options.zscore = 0;
+
+    % Should invalid pixels be taken into account when normalising (yes in
+    % case of depth and no in case of colour)
+    normalisation_options.ignoreInvalidInMeanStd = 0; % we don't care about invalid pixels at this time (black is valid here) TODO background simulation?
+    normalisation_options.setIllegalToPost = 0;
+
+    if(sum(strcmp(varargin,'use_bu')))
+        ind = find(strcmp(varargin,'use_bu')) + 1;
+        normalisation_options.bu = varargin{ind};       
+    else
+        normalisation_options.bu = 1;
+    end
+    
+    if(sum(strcmp(varargin,'use_mpie')))
+        ind = find(strcmp(varargin,'use_mpie')) + 1;
+        normalisation_options.mpie = varargin{ind};       
+    else
+        normalisation_options.mpie = 1;
+    end
+    
+    if(sum(strcmp(varargin,'use_wild')))
+        ind = find(strcmp(varargin,'use_wild')) + 1;
+        normalisation_options.wild = varargin{ind};       
+    else
+        normalisation_options.wild = 0;
+    end    
+    
+    normalisation_options.sigma = sigma;
+    
+    normalisation_options.rate_negative = ratio_neg;
+    
+    % the similarities need to be tested separately (1,2,3 and 4) and
+    % together all, vs hor/ver and diags, and none of course
+    if(any(strcmp(varargin, 'similarity_types')))
+        ind = find(strcmp(varargin,'similarity_types')) + 1;
+        normalisation_options.similarity_types = varargin{ind};
+    else        
+        normalisation_options.similarity_types = [];
+    end    
+
+end
+
--- a/model_training/CCNF/patch_experts/ccnf_training/Script_Training_cofw.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/Script_Training_cofw.m
@@ -0,0 +1,38 @@
+clear
+% define the root name of database
+root = '../data_preparation/prepared_data/';
+
+
+% which scales we're doing
+sigma = 1;
+num_samples = 5e5;
+
+scales = [0.25,0.35,0.5];
+frontalView = 1;
+
+profileViewInds = [];
+
+version = 'cofw';
+ratio_neg = 5;
+norm = 1;
+
+data_loc = 'cofw_';
+rng(0);
+
+similarities = {};
+sparsity = 1;
+sparsity_types = [];
+
+lambda_a = 100;
+lambda_b = 1000;
+lambda_th = 1;
+num_layers = 7;
+
+for s=scales
+
+    Train_all_patch_experts(root, frontalView, profileViewInds,...
+        s, sigma, version, 'ratio_neg', ratio_neg,...
+        'num_samples', num_samples, 'data_loc', data_loc,...
+        'normalisation_size', 19, 'similarity_types', similarities, 'sparsity', sparsity,...
+        'sparsity_types', sparsity_types, 'lambda_a', lambda_a, 'lambda_b', lambda_b, 'lambda_th', lambda_th, 'num_layers', num_layers);
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/Script_Training_general.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/Script_Training_general.m
@@ -0,0 +1,38 @@
+clear
+% define the root name of database
+root = '../data_preparation/prepared_data/';
+
+
+% which scales we're doing
+sigma = 1;
+num_samples = 2.5e6;
+
+scales = [0.25,0.35,0.5,1.0];
+frontalView = 1;
+
+profileViewInds = [2,3,4];
+
+version = 'general';
+ratio_neg = 10;
+norm = 1;
+
+data_loc = 'combined_';
+rng(0);
+
+similarities = {[1,2]; [3, 4]};
+sparsity = 1;
+sparsity_types = [4,6];
+
+lambda_a = 200;
+lambda_b = 7500;
+lambda_th = 1;
+num_layers = 7;
+
+for s=scales
+
+    Train_all_patch_experts(root, frontalView, profileViewInds,...
+        s, sigma, version, 'ratio_neg', ratio_neg,...
+        'num_samples', num_samples, 'data_loc', data_loc,...
+        'normalisation_size', 19, 'similarity_types', similarities, 'sparsity', sparsity,...
+        'sparsity_types', sparsity_types, 'lambda_a', lambda_a, 'lambda_b', lambda_b, 'lambda_th', lambda_th, 'num_layers', num_layers);
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/Script_Training_multi_pie.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/Script_Training_multi_pie.m
@@ -0,0 +1,38 @@
+clear
+% define the root name of database
+root = '../data_preparation/prepared_data/';
+
+
+% which scales we're doing
+sigma = 1;
+num_samples = 5e5;
+
+scales = [0.25,0.35,0.5];
+frontalView = 1;
+
+profileViewInds = [2,3,4];
+
+version = 'multi_pie';
+ratio_neg = 5;
+norm = 1;
+
+data_loc = 'mpie_';
+rng(0);
+
+similarities = {[1,2]; [3, 4]};
+sparsity = 1;
+sparsity_types = [4,6];
+
+lambda_a = 100;
+lambda_b = 1000;
+lambda_th = 1;
+num_layers = 7;
+    
+for s=scales
+
+    Train_all_patch_experts(root, frontalView, profileViewInds,...
+        s, sigma, version, 'ratio_neg', ratio_neg,...
+        'num_samples', num_samples, 'data_loc', data_loc,...
+        'normalisation_size', 19, 'similarity_types', similarities, 'sparsity', sparsity,...
+        'sparsity_types', sparsity_types, 'lambda_a', lambda_a, 'lambda_b', lambda_b, 'lambda_th', lambda_th, 'num_layers', num_layers);
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/Script_Training_wild.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/Script_Training_wild.m
@@ -0,0 +1,38 @@
+clear
+% define the root name of database
+root = '../data_preparation/prepared_data/';
+
+
+% which scales we're doing
+sigma = 1;
+num_samples = 2e6;
+
+scales = [0.25,0.35,0.5,1.0];
+frontalView = 1;
+
+profileViewInds = [2];
+
+version = 'wild';
+ratio_neg = 5;
+norm = 1;
+
+data_loc = 'wild_';
+rng(0);
+
+similarities = {[1,2]; [3, 4]};
+sparsity = 1;
+sparsity_types = [4,6];
+
+lambda_a = 200;
+lambda_b = 7500;
+lambda_th = 1;
+num_layers = 7;
+
+for s=scales
+
+    Train_all_patch_experts(root, frontalView, profileViewInds,...
+        s, sigma, version, 'ratio_neg', ratio_neg,...
+        'num_samples', num_samples, 'data_loc', data_loc,...
+        'normalisation_size', 19, 'similarity_types', similarities, 'sparsity', sparsity,...
+        'sparsity_types', sparsity_types, 'lambda_a', lambda_a, 'lambda_b', lambda_b, 'lambda_th', lambda_th, 'num_layers', num_layers);
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/Train_CCNF_patches.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/Train_CCNF_patches.m
@@ -0,0 +1,152 @@
+function [correlations, rmsErrors, patchExperts, visiIndex, centres, imgs_used, normalisation_options] = Train_CCNF_patches(training_loc, view, scale, sigma, ratio_neg, num_samples, varargin)
+%% creating the model   
+    % creating the regression models
+
+    normalisation_options = Parse_settings( sigma, ratio_neg, num_samples, varargin{:});    
+    
+    if(sum(strcmp(varargin,'data_loc')))       
+        ind = find(strcmp(varargin,'data_loc')) + 1;
+        data_loc = varargin{ind};
+        data_loc = sprintf(['%s\\' data_loc '%s_%s.mat'], training_loc, num2str(scale), num2str(view));
+    else        
+        data_loc = sprintf('%s\\wild_%s_%s.mat', training_loc, num2str(scale), num2str(view));
+    end    
+    
+    load(data_loc);
+    examples = all_images;
+    landmark_loc = landmark_locations;
+    clear 'all_images'    
+    
+    numPoints = size(landmark_loc,2);
+    
+    correlations = zeros(1, numPoints);
+    rmsErrors = zeros(1, numPoints);
+    patchExperts = cell(1, numPoints);    
+    
+    for j=1:numPoints
+        pause(0);
+        % can only do mirroring if there is no yaw
+        if((numPoints == 68 || numPoints == 29 )&& centres(2) == 0)
+            % Do not redo a mirror feature (just flip them)
+            if(numPoints == 68)
+                mirrorInds = [1,17;2,16;3,15;4,14;5,13;6,12;7,11;8,10;18,27;19,26;20,25;21,24;22,23;...
+                      32,36;33,35;37,46;38,45;39,44;40,43;41,48;42,47;49,55;50,54;51,53;60,56;59,57;...
+                      61,65;62,64;68,66];     
+            else
+                mirrorInds = [1,2; 3,4; 5,7; 6,8; 9,10; 11,12; 13,15; 14,16; 17,18; 19,20; 23,24];                
+            end
+            
+            mirror_idx = j;
+            if(any(mirrorInds(:,1)==j))
+                mirror_idx = mirrorInds(mirrorInds(:,1)==j,2);
+            elseif(any(mirrorInds(:,2)==j))
+                mirror_idx = mirrorInds(mirrorInds(:,2)==j,1);
+            end
+
+            if(mirror_idx~=j & correlations(1,mirror_idx) ~= 0)
+
+                correlations(1,j) = correlations(1,mirror_idx);
+                rmsErrors(1, j) = rmsErrors(1,mirror_idx);
+                patchExperts{1, j} = patchExperts{1,mirror_idx};
+
+                num_hl = size(patchExperts{1,mirror_idx}.thetas, 1);
+                num_mod = size(patchExperts{1,mirror_idx}.thetas, 3);
+                for m=1:num_mod
+                    for hl=1:num_hl
+                        w = reshape(patchExperts{1,mirror_idx}.thetas(hl, 2:end, m),11,11);
+                        w = fliplr(w);
+                        w = reshape(w, 121,1);
+                        patchExperts{1, j}.thetas(hl, 2:end, m) = w;
+                    end
+                end
+
+                fprintf('Feature %d done\n', j);
+
+                continue;
+
+            end
+        end
+        
+        imgs_used = {};
+        if(visiIndex(j)) 
+            
+            tic;
+            % instead of loading the patches compute them here:
+            num_samples = normalisation_options.numSamples;
+
+            [samples, labels, unnormed_samples, imgs_used_n] = ExtractTrainingSamples(examples, landmark_loc, actual_imgs_used, sigma, num_samples, j, normalisation_options);
+            imgs_used = union(imgs_used, imgs_used_n);
+            % add the bias term
+            samples = [ones(1,size(samples,1)); samples'];                  
+            
+            region_length = normalisation_options.normalisationRegion - normalisation_options.patchSize + 1;
+            region_length = region_length(1) * region_length(2);
+                        
+            num_examples = size(samples, 2);
+
+            % this part sets the split boundaries for training and test subsets
+            train_ccnf_start = 1;
+            train_ccnf_end = int32(normalisation_options.ccnf_ratio * num_examples - 1);
+            % make sure we don't split a full training region apart
+            train_ccnf_end = train_ccnf_end - mod(train_ccnf_end, region_length);
+
+            test_start = train_ccnf_end + 1;
+            test_end = size(samples,2);
+
+            samples_train = samples(:,train_ccnf_start:train_ccnf_end);
+            labels_train = labels(train_ccnf_start:train_ccnf_end);
+
+            samples_test = samples(:,test_start:test_end);                                                
+            labels_test = labels(test_start:test_end);
+            
+            % Set up the patch expert
+            similarity_types = normalisation_options.similarity_types;    
+            patch_expert.similarity_types = similarity_types;
+            patch_expert.sparsity = normalisation_options.sparsity;
+            patch_expert.sparsity_types = normalisation_options.sparsity_types;
+            patch_expert.patch_expert_type = 'CCNF';
+                        
+            % The actual regressor training
+            [alpha, betas, thetas, similarities, sparsities] = Create_CCNF_Regressor(samples_train, labels_train, region_length, similarity_types, normalisation_options.sparsity_types, normalisation_options);
+
+            % The learned patch expert
+            patch_expert.alphas = alpha;
+            patch_expert.betas = betas;
+            patch_expert.thetas = thetas;
+
+
+            % if we have a SigmaInv, we don't need betas anymore (or
+            % similarity and sparsity functions for that matter), compute
+            % in a single sample for efficiency
+            [ ~, ~, Precalc_Bs_flat, ~ ] = CalculateSimilarities( 1, zeros(size(samples,1),region_length), similarities, sparsities, labels_train(1:region_length), true);
+            Precalc_B_flat = Precalc_Bs_flat{1};
+            SigmaInv = CalcSigmaCCNFflat(patch_expert.alphas, patch_expert.betas, region_length, Precalc_B_flat, eye(region_length), zeros(region_length));
+            patch_expert.SigmaInv = SigmaInv;
+
+            % Evaluate the patch expert
+            [rmsError, corr,~] = EvaluatePatchExpert(samples_test, labels_test, alpha, betas, thetas, similarities, sparsities, normalisation_options, region_length);
+
+            fprintf('Rms error %.3f, correlation %.3f\n', rmsError, corr);
+
+            % Assert that our normalisation and different fitting are equivalent
+%             normed_samples = samples(:,1:size(unnormed_samples,1)*region_length);
+%             [~, ~, responses_ccnf] = EvaluatePatchExpert(normed_samples, labels(1:size(unnormed_samples,1)*region_length), alpha, betas, thetas, similarities, sparsities, normalisation_options, region_length);
+%             [responses_ccnf_ncc] = CCNF_ncc_response(unnormed_samples, patch_expert, normalisation_options, normalisation_options.normalisationRegion, region_length);
+%             assert(norm(responses_ccnf-responses_ccnf_ncc)< 10e-1);
+
+            correlations(1,j) = corr;
+            rmsErrors(1, j) = rmsError;
+            patchExperts{1, j} = patch_expert(:);
+
+            fprintf('Landmark %d done\n', j);
+            clear samples
+            clear samples_test
+            clear samples_train
+            clear labels
+            clear unnormed_samples
+            clear imgs_used_n
+            toc;
+        end
+    end
+    
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/Train_all_patch_experts.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/Train_all_patch_experts.m
@@ -0,0 +1,155 @@
+function Train_all_patch_experts(trainingLoc, frontalView, profile_views, scaling, sigma, version, varargin)
+% need some documentation here
+
+if(sum(strcmp(varargin,'ratio_neg')))
+    ind = find(strcmp(varargin,'ratio_neg')) + 1;
+    ratio_neg = varargin{ind};
+else
+    ratio_neg = 20;
+end
+
+if(sum(strcmp(varargin,'num_samples')))
+    ind = find(strcmp(varargin,'num_samples')) + 1;
+    num_samples = varargin{ind};
+else
+    num_samples = 5e5;
+end
+
+patch_experts = struct;
+patch_experts.types = {'reg'};
+patch_experts.correlations = [];
+patch_experts.rms_errors = [];
+patch_experts.patch_experts = cell(numel(patch_experts.types), 1);
+
+% first do the frontal view
+[visiIndex, centres, patch_experts, imgs_used, norm_options] = ...
+    AppendTraining(trainingLoc, frontalView, scaling, sigma, [], [], patch_experts, ratio_neg, num_samples, varargin{:});
+
+fprintf('Frontal done\n');
+
+% now do the profile views
+for i=1:numel(profile_views)
+    [visiIndex, centres, patch_experts, imgs_used_profile] = ...
+        AppendTraining(trainingLoc, profile_views(i), scaling, sigma, visiIndex, centres, patch_experts, ratio_neg, num_samples, varargin{:});
+    fprintf('Profile %d done\n', i);
+    
+    imgs_used = cat(1, imgs_used, imgs_used_profile);
+
+end
+
+% saving time by not retraining mirrored (left/right) views, but just
+% filpping the patch expert
+for i=1:numel(profile_views)
+    [visiIndex, centres, patch_experts] = ...
+        AppendMirror(visiIndex, centres, patch_experts, numel(profile_views) - i + 2, varargin{:});
+    fprintf('Profile %d done\n', i + numel(profile_views));
+end
+
+% output the training
+locationTxtCol = sprintf('trained/ccnf_patches_%s_%s.txt', num2str(scaling), version);
+locationMlabCol = sprintf('trained/ccnf_patches_%s_%s.mat', num2str(scaling), version);
+
+Write_patch_experts_ccnf(locationTxtCol, locationMlabCol, scaling, centres, visiIndex, patch_experts, norm_options, [7,9,11,15]);
+
+% save the images used
+location_imgs_used = sprintf('trained/imgs_used_%s.mat', version);
+save(location_imgs_used, 'imgs_used');
+
+end
+
+function [visi_index, centres, patches_m, imgs_used, norm_options] = AppendTraining(training_data_loc, view, scale, sigma, visibilities_init, centres_init, patches_m_init, ratio_neg, num_samples, varargin)
+    
+    patches_m = patches_m_init;
+
+    [correlations, rms_errors, patch_experts, visi_index, centres, imgs_used, norm_options] = Train_CCNF_patches(training_data_loc, view, scale, sigma, ratio_neg, num_samples, varargin{:});
+    
+    if(numel(patches_m_init.correlations) > 0)
+        patches_m.correlations = cat(1, patches_m_init.correlations, correlations);
+        patches_m.rms_errors = cat(1, patches_m_init.rms_errors, rms_errors);
+        patches_m.patch_experts = cat(1, patches_m_init.patch_experts, patch_experts);
+    else
+        patches_m.correlations = correlations;
+        patches_m.rms_errors = rms_errors;
+        patches_m.patch_experts = patch_experts;
+    end
+    
+    % also add the visibility indices and centres, as that will need to be
+    % output to the patch expert when it's written out
+    if(numel(visibilities_init) > 0)
+        visi_index = cat(1, visibilities_init, visi_index);
+        centres = cat(1, centres_init, centres);
+    end
+
+end
+
+function [visiIndex, centres, patches_m] = AppendMirror(visiIndexInit, centresInit, patches_m, index, varargin)
+
+    
+    if(numel(visiIndexInit) > 0)
+        
+        corr_T = patches_m.correlations(index,:);
+        
+        if(numel(corr_T) == 66)
+            % this specifies the mirrored points, say 1 in left profile becomes 17
+            % in right profile if mirrored, non-mirrored points don't need to be
+            % specified, they just get flipped but index remains the same
+            mirrorInds = [1,17;2,16;3,15;4,14;5,13;6,12;7,11;8,10;18,27;19,26;20,25;21,24;22,23;...
+                      32,36;33,35;37,46;38,45;39,44;40,43;41,48;42,47;49,55;50,54;51,53;60,56;59,57;...
+                      61,63;66,64];            
+        elseif(numel(corr_T) == 68)
+            mirrorInds = [1,17;2,16;3,15;4,14;5,13;6,12;7,11;8,10;18,27;19,26;20,25;21,24;22,23;...
+                          32,36;33,35;37,46;38,45;39,44;40,43;41,48;42,47;49,55;50,54;51,53;60,56;59,57;...
+                          61,65;62,64;68,66];            
+        end
+        
+        
+        corr_T  = swap(corr_T, mirrorInds(:,1), mirrorInds(:,2));
+        patches_m.correlations = cat(1, patches_m.correlations, corr_T);
+        
+        AccT = patches_m.rms_errors(index,:);
+        AccT = swap(AccT, mirrorInds(:,1), mirrorInds(:,2));
+        patches_m.rms_errors = cat(1, patches_m.rms_errors, AccT);
+        
+        visiIndexT = visiIndexInit(index,:);                
+        visiIndexT = swap(visiIndexT, mirrorInds(:,1), mirrorInds(:,2));
+        visiIndex = cat(1, visiIndexInit, visiIndexT);
+        
+        % mirroring of the orientation involves flipping yaw or roll (we
+        % assume only views with one rotation will be present (say only
+        % pitch or yaw or roll)
+        centresMirror = [centresInit(index,1), -centresInit(index,2), -centresInit(index,3)];        
+        centres = cat(1, centresInit, centresMirror);
+        
+        patchExpertMirror = patches_m.patch_experts(index,:);
+        patchExpertMirrorT1 = patchExpertMirror(1,mirrorInds(:,1),:);
+        patchExpertMirrorT2 = patchExpertMirror(1,mirrorInds(:,2),:);
+        patchExpertMirror(1,mirrorInds(:,2),:) = patchExpertMirrorT1;
+        patchExpertMirror(1,mirrorInds(:,1),:) = patchExpertMirrorT2;
+        
+        % To flip a patch expert it        
+        for p=1:size(patchExpertMirror,2)
+            if(visiIndexT(p))
+                num_hl = size(patchExpertMirror{p}.thetas, 1);
+                num_mod = size(patchExpertMirror{p}.thetas, 3);
+                for m=1:num_mod
+                    for hl=1:num_hl
+                        w = reshape(patchExpertMirror{p}.thetas(hl, 2:end, m),11,11);
+                        w = fliplr(w);
+                        w = reshape(w, 121,1);
+                        patchExpertMirror{p}.thetas(hl, 2:end, m) = w;
+                    end
+                end
+            end
+        end
+        
+        patches_m.patch_experts = cat(1, patches_m.patch_experts, patchExpertMirror);
+              
+    end
+end
+
+function arr = swap(arr, ind1, ind2)
+    val1 = arr(ind1);
+    val2 = arr(ind2);
+    arr(ind1) = val2;
+    arr(ind2) = val1;
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/Write_patch_experts_ccnf.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/Write_patch_experts_ccnf.m
@@ -0,0 +1,139 @@
+function Write_patch_experts_ccnf(location_txt, location_mlab, trainingScale, centers, visiIndex, patch_experts, normalisationOptions, w_sizes)
+      
+    save(location_mlab, 'patch_experts', 'trainingScale', 'centers', 'visiIndex', 'normalisationOptions');
+    
+    patches_file = fopen(location_txt, 'w');        
+    
+    [n_views, n_landmarks, ~] = size(patch_experts.correlations);
+
+%     fprintf(patches_file, '# scaling factor of training\r\n%f\r\n', trainingScale);
+    fwrite(patches_file, trainingScale, 'float64');
+    
+    % write out the scaling factor as this is what will be used when
+    % fitting on the window
+%     fprintf(patches_file, '# number of views\r\n%d\r\n', n_views);
+    fwrite(patches_file, n_views, 'int');
+    
+    % Write out the information about the view's and centers here
+%     fprintf(patches_file, '# centers of the views\r\n');
+
+    for i=1:n_views
+        % this indicates that we're writing a 3x1 double matrix
+        writeMatrixBin(patches_file, centers(i,:)', 6);
+    end
+            
+%     fprintf(patches_file, '# visibility indices per view\r\n');
+    
+    for i=1:n_views
+        % this indicates that we're writing a 3x1 double matrix
+        writeMatrixBin(patches_file, visiIndex(i,:)', 4);
+    end
+    
+%     fprintf(patches_file, '# Sigma component matrices being used in these patches\r\n');    
+
+%     fprintf(patches_file, '# Number of windows sizes\r\n');
+%     fprintf(patches_file, '%d\r\n', numel(w_sizes));
+    fwrite(patches_file, numel(w_sizes), 'int');
+    
+    for w = 1:numel(w_sizes)
+%         fprintf(patches_file, '# Size of window\r\n');
+%         fprintf(patches_file, '%d\r\n', w_sizes(w));
+        fwrite(patches_file, w_sizes(w), 'int');
+        similarities = {};
+        response_side_length = w_sizes(w);
+
+        for st=1:size(patch_experts.patch_experts{1,1}.similarity_types, 1)
+            type_sim = patch_experts.patch_experts{1,1}.similarity_types{st};
+            neighFn = @(x) similarity_neighbor_grid(x, response_side_length, type_sim);
+            similarities = cat(1, similarities, {neighFn});
+        end
+
+        sparsities = {};
+
+        for st=1:size(patch_experts.patch_experts{1,1}.sparsity_types, 1)
+            spFn = @(x) sparsity_grid(x, response_side_length, patch_experts.patch_experts{1,1}.sparsity_types(st,1), patch_experts.patch_experts{1,1}.sparsity_types(st,2));
+            sparsities = cat(1, sparsities, {spFn});
+        end
+                
+        region_length = response_side_length^2;
+
+        % Adding the sparsities here if needed (assuming we are using an
+        % 11x11 support area, hard-coded)
+        [ ~, PrecalcQ2s, ~, ~ ] = CalculateSimilarities( 1, zeros(122,region_length), similarities, sparsities, zeros(region_length,1), true);
+
+        PrecalcQ2s = PrecalcQ2s{1};
+%         fprintf(patches_file, '# Number of Sigma components\r\n');
+        fwrite(patches_file, numel(PrecalcQ2s), 'int');
+        for q2=1:numel(PrecalcQ2s)
+            writeMatrixBin(patches_file, PrecalcQ2s{q2}, 5);
+        end
+    end    
+    
+%     fprintf(patches_file, '# Patches themselves (1 line patches of a vertex)\r\n');
+    
+    for i=1:n_views
+        for j=1:n_landmarks
+
+            % Write out that we're writing a ccnf patch expert of 11x11 support region
+            fwrite(patches_file, 5, 'int');
+            fwrite(patches_file, 11, 'int');
+            fwrite(patches_file, 11, 'int');
+            
+            if(~visiIndex(i,j))       
+                % Write out that there won't be any neurons for this
+                % landmark
+                fwrite(patches_file, 0, 'int');
+                fwrite(patches_file, 0, 'int');
+            else
+                num_neurons = size(patch_experts.patch_experts{i,j}.thetas, 1);
+
+                % CCNF patch(5), width, height, num_neurons, Patch(2), neuron_type,
+                % normalisation, bias, alpha, rows, cols, type            
+                num_modalities = size(patch_experts.patch_experts{i,j}.thetas, 3);
+
+                fwrite(patches_file, num_neurons, 'int');
+
+                for n=1:num_neurons
+                    for m=1:num_modalities
+
+                        if(strcmp(patch_experts.types{m}, 'reg'))
+                           type = 0; 
+                        elseif(strcmp(patch_experts.types{m}, 'grad'))
+                           type = 1; 
+                        else
+                           fprintf('Not supported patch type\n');
+                           type = 0;
+                        end
+
+                        % normalise the w
+                        w = patch_experts.patch_experts{i,j}.thetas(n, 2:end, m);
+                        norm_w = norm(w);
+                        w = w/norm(w);
+                        bias = patch_experts.patch_experts{i,j}.thetas(n, 1, m);
+                        alpha = patch_experts.patch_experts{i,j}.alphas((m-1)*num_neurons+n);
+                        
+                        % also add patch confidence based on correlation scores
+                        fwrite(patches_file, 2, 'int');
+                        fwrite(patches_file, type, 'int');
+                        fwrite(patches_file, norm_w, 'float64');
+                        fwrite(patches_file, bias, 'float64');
+                        fwrite(patches_file, alpha, 'float64');
+                        
+                        % the actual weight matrix
+                        writeMatrixBin(patches_file, reshape(w, 11, 11), 5);
+                    end
+                end
+
+                % Write out the betas
+                for b=1:numel(patch_experts.patch_experts{i,j}.betas)
+                    fwrite(patches_file, patch_experts.patch_experts{i,j}.betas(b), 'float64');
+                end
+                
+                % finally write out the confidence
+                fwrite(patches_file, patch_experts.correlations(i,j), 'float64');
+                
+            end    
+        end
+    end
+
+    fclose(patches_file);
--- a/model_training/CCNF/patch_experts/ccnf_training/extract_inner.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/extract_inner.m
@@ -0,0 +1,13 @@
+clear;
+load('trained/ccnf_patches_1_general.mat');
+
+% now drop the first 17 points
+visiIndex = visiIndex(:,18:end);
+
+patch_experts.correlations = patch_experts.correlations(:,18:end);
+patch_experts.rms_errors = patch_experts.rms_errors(:,18:end);
+patch_experts.patch_experts = patch_experts.patch_experts(:,18:end);
+
+Write_patch_experts_ccnf('trained/ccnf_patches_1.00_inner.txt',...
+    'trained/ccnf_patches_1.00_inner.mat', trainingScale, centers,...
+    visiIndex, patch_experts, normalisationOptions, [7,9,11,15]);
--- a/model_training/CCNF/patch_experts/ccnf_training/generateDisplayData.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/generateDisplayData.m
@@ -0,0 +1,56 @@
+function [ display_array ] = generateDisplayData( X )
+%GENERATEDISPLAYDATA Summary of this function goes here
+%   Detailed explanation goes here
+    example_width = 11;
+    example_height = 11;
+    
+    % Compute rows, cols
+    [m n] = size(X);
+
+    % Compute number of items to display
+    display_rows = floor(sqrt(m));
+    display_cols = ceil(m / display_rows);
+
+    % Between images padding
+    pad = 1;
+
+    % Setup blank display
+    display_array = double(zeros(pad + display_rows * (example_height + pad), ...
+                           pad + display_cols * (example_width + pad)));
+    % Copy each example into a patch on the display array
+    curr_ex = 1;
+    for j = 1:display_rows
+        for i = 1:display_cols
+            if curr_ex > m, 
+                break; 
+            end
+            % Copy the patch
+
+%             if(isa(X, 'uint8'))
+                display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
+                              pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
+                                reshape(X(curr_ex, :), example_height, example_width);
+%             else
+%                 % Get the max value of the patch                
+%                 minVal = min(X(curr_ex, X(curr_ex,:)~=0)) - 10;     
+%                 if(numel(minVal) < 1)
+%                     minVal = 0;
+%                 end
+%                 maxVal = double(max(X(curr_ex,:)-minVal))/255.0;
+%                 if(numel(minVal) < 1 || maxVal == 0)
+%                     maxVal = 1;
+%                 end
+%                 display_array(pad + (j - 1) * (example_height + pad) + (1:example_height), ...
+%                               pad + (i - 1) * (example_width + pad) + (1:example_width)) = ...
+%                                 reshape((X(curr_ex, :)-minVal)/maxVal, example_height, example_width);                
+%             end
+            curr_ex = curr_ex + 1;
+        end
+        if curr_ex > m, 
+            break; 
+        end
+    end    
+    
+
+end
+
--- a/model_training/CCNF/patch_experts/ccnf_training/readme.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/readme.txt
@@ -0,0 +1,13 @@
+Scripts for training continuous conditional neural field (CCNF, or alternatively Local Neural Field - LNF) patch experts.
+
+This requires data preparation first (not included with the source code), to prepare the data go to '../data_preparation/readme.txt'.
+
+To train the patch experts run:
+Script_Training_wild.m (using in-the-wild data)
+Script_Training_multi_pie (using multi-pie data)
+Script_Training_general (using combined data)
+
+To prepare the inner face general patch expert run:
+extract_inner.m
+
+The trained patch experts are included in the './trained/' folder, the experts you train might differ slightly based on the version of Matlab used.
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_cofw.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_cofw.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_cofw.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_cofw.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_general.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_general.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_general.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_general.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_wild.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_wild.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_wild.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.25_wild.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_cofw.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_cofw.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_cofw.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_cofw.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_general.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_general.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_general.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_general.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_wild.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_wild.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_wild.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.35_wild.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_cofw.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_cofw.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_cofw.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_cofw.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_general.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_general.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_general.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_general.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_wild.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_wild.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_wild.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_0.5_wild.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1.00_inner.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1.00_inner.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1.00_inner.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1.00_inner.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1_general.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1_general.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1_general.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1_general.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1_wild.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1_wild.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1_wild.txt
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/ccnf_patches_1_wild.txt
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/imgs_used_cofw.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/imgs_used_cofw.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/imgs_used_general.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/imgs_used_general.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/trained/imgs_used_wild.mat
+++ b/model_training/CCNF/patch_experts/ccnf_training/trained/imgs_used_wild.mat
--- a/model_training/CCNF/patch_experts/ccnf_training/writeMatrix.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/writeMatrix.m
@@ -0,0 +1,16 @@
+% for easier readibility write them row by row
+function writeMatrix(fileID, M, type)
+
+    fprintf(fileID, '%d\r\n', size(M,1));
+    fprintf(fileID, '%d\r\n', size(M,2));
+    fprintf(fileID, '%d\r\n', type);
+    
+    for i=1:size(M,1)
+        if(type == 4 || type == 0)
+            fprintf(fileID, '%d ', M(i,:));
+        else
+            fprintf(fileID, '%.9f ', M(i,:));
+        end
+        fprintf(fileID, '\r\n');
+    end
+end
--- a/model_training/CCNF/patch_experts/ccnf_training/writeMatrixBin.m
+++ b/model_training/CCNF/patch_experts/ccnf_training/writeMatrixBin.m
@@ -0,0 +1,37 @@
+% for easier readibility write them row by row
+function writeMatrixBin(fileID, M, type)
+
+    % 4 bytes each for the description
+    fwrite(fileID, size(M,1), 'uint');
+    fwrite(fileID, size(M,2), 'uint');
+    fwrite(fileID, type, 'uint');
+    
+    % Convert the matrix to OpenCV format (row minor as opposed to column
+    % minor)
+    M = M';
+    
+    % type 0 - uint8, 1 - int8, 2 - uint16, 3 - int16, 4 - int, 5 -
+    % float32, 6 - float64
+    
+    % Write out the matrix itself    
+    
+    switch type
+        case 0
+            type = 'uint8';
+        case 1
+            type = 'int8';
+        case 2
+            type = 'uint16';
+        case 3
+            type = 'int16';
+        case 4
+            type = 'int';
+        case 5
+            type = 'float32';
+        case 6
+            type = 'float64';            
+        otherwise
+            type = 'float32';
+    end
+    fwrite(fileID, M, type);
+end
--- a/model_training/CCNF/patch_experts/collect_menpo_data/Collect_all_patches.m
+++ b/model_training/CCNF/patch_experts/collect_menpo_data/Collect_all_patches.m
@@ -0,0 +1,35 @@
+function Collect_all_patches(trainingLoc, frontalView, profile_views, scaling, sigma, version, varargin)
+% need some documentation here
+
+if(sum(strcmp(varargin,'ratio_neg')))
+    ind = find(strcmp(varargin,'ratio_neg')) + 1;
+    ratio_neg = varargin{ind};
+else
+    ratio_neg = 20;
+end
+
+if(sum(strcmp(varargin,'num_samples')))
+    ind = find(strcmp(varargin,'num_samples')) + 1;
+    num_samples = varargin{ind};
+else
+    num_samples = 5e5;
+end
+
+% first do the frontal view
+AppendTraining(trainingLoc, frontalView, scaling, sigma, ratio_neg, num_samples, 0, version, varargin{:});
+
+fprintf('Frontal done\n');
+
+% now do the profile views
+for i=1:numel(profile_views)
+    AppendTraining(trainingLoc, profile_views(i), scaling, sigma, ratio_neg, num_samples, i, version, varargin{:});
+    fprintf('Profile %d done\n', i);    
+end
+
+end
+
+function AppendTraining(training_data_loc, view, scale, sigma, ratio_neg, num_samples, varargin)
+    
+    Collect_patches_view(training_data_loc, view, scale, sigma, ratio_neg, num_samples, varargin{:});
+   
+end
--- a/model_training/CCNF/patch_experts/collect_menpo_data/Collect_patches_view.m
+++ b/model_training/CCNF/patch_experts/collect_menpo_data/Collect_patches_view.m
@@ -0,0 +1,71 @@
+function Collect_patches_view(training_loc, view, scale, sigma, ratio_neg, num_samples, profile_id, version, varargin)
+%% creating the model   
+    % creating the regression models
+
+    normalisation_options = Parse_settings( sigma, ratio_neg, num_samples, varargin{:});    
+    
+    if(sum(strcmp(varargin,'data_loc')))       
+        ind = find(strcmp(varargin,'data_loc')) + 1;
+        data_loc = varargin{ind};
+        data_loc = sprintf(['%s\\' data_loc '%s_%s.mat'], training_loc, num2str(scale), num2str(view));
+    else        
+        data_loc = sprintf('%s\\wild_%s_%s.mat', training_loc, num2str(scale), num2str(view));
+    end    
+    
+    load(data_loc);
+    examples = all_images;
+    landmark_loc = landmark_locations;
+    clear 'all_images'    
+    
+    numPoints = size(landmark_loc,2);
+    done = false(numPoints,1);
+    for j=1:numPoints
+
+        % can only do mirroring if there is no yaw
+        if((numPoints == 68 || numPoints == 29 )&& centres(2) == 0)
+            % Do not redo a mirror feature (just flip them)
+            if(numPoints == 68)
+                mirrorInds = [1,17;2,16;3,15;4,14;5,13;6,12;7,11;8,10;18,27;19,26;20,25;21,24;22,23;...
+                      32,36;33,35;37,46;38,45;39,44;40,43;41,48;42,47;49,55;50,54;51,53;60,56;59,57;...
+                      61,65;62,64;68,66];     
+            else
+                mirrorInds = [1,2; 3,4; 5,7; 6,8; 9,10; 11,12; 13,15; 14,16; 17,18; 19,20; 23,24];                
+            end
+            
+            mirror_idx = j;
+            if(any(mirrorInds(:,1)==j))
+                mirror_idx = mirrorInds(mirrorInds(:,1)==j,2);
+            elseif(any(mirrorInds(:,2)==j))
+                mirror_idx = mirrorInds(mirrorInds(:,2)==j,1);
+            end
+            if(mirror_idx~=j & done(mirror_idx))
+
+                continue;
+
+            end
+        end
+        
+        imgs_used = {};
+        if(visiIndex(j)) 
+            
+            tic;
+            % instead of loading the patches compute them here:
+            num_samples = normalisation_options.numSamples;
+
+            [samples, labels, imgs_used_n] = ExtractTrainingSamples(examples, landmark_loc, actual_imgs_used, sigma, num_samples, j, normalisation_options);
+            imgs_used = union(imgs_used, imgs_used_n);
+            % add the bias term
+            samples = [ones(1,size(samples,1)); samples'];                  
+            if(centres(2) == 0)
+                save(sprintf('E:/menpo_data/%s_data%.2f_frontal_%d.mat', version, scale, j), 'samples', 'labels', '-v7.3');
+            else
+                save(sprintf('E:/menpo_data/%s_data%.2f_profile%d_%d.mat', version, scale, profile_id, j), 'samples', 'labels', '-v7.3');
+            end
+            fprintf('Landmark %d done\n', j);
+            clear samples            
+            clear labels
+            done(j) = true;
+        end
+    end
+    
+end
--- a/model_training/CCNF/patch_experts/collect_menpo_data/ExtractTrainingSamples.m
+++ b/model_training/CCNF/patch_experts/collect_menpo_data/ExtractTrainingSamples.m
@@ -0,0 +1,133 @@
+function [samples, labels, imgs_used] = ExtractTrainingSamples(examples, landmarkLoc, img_names, sigma, numSamples, landmark, normalisation_options)
+
+%%
+% for an area of interest of 19x19 and patch support region of 11x11, we
+% would have 9x9=81 samples (9 is the single_input_size, 11 is
+% patch_expert_support_size, 19x19 is normalisation_size, 9 would be the
+% normalisation_side_size)
+evaluation_size = normalisation_options.normalisationRegion;
+patch_expert_support_size = normalisation_options.patchSize;
+
+normalisation_side_size = (evaluation_size - 1)/2;
+single_input_size = evaluation_size - patch_expert_support_size + 1;
+
+% Determine the ratio of images to be sampled (most likely not all of them will be)
+samples_per_img = (numSamples / (size(examples,1) * (1 + normalisation_options.rate_negative))) / (single_input_size(1)^2);
+
+num_samples = int32(samples_per_img * (1 + normalisation_options.rate_negative) * size(examples,1) * (single_input_size(1)^2));
+
+%% Initialise the samples and labels
+samples = zeros(num_samples, patch_expert_support_size(1) * patch_expert_support_size(2));                
+labels = zeros(num_samples, 1);    
+
+%% Initialise the unnormed versions of the images
+
+% This is done in order to assert our use of algorithms for calculating
+% the responses, as for training we might use regular ml procedures,
+% whereas for fitting normalised cross-correlation or just
+% cross-correlation will be used, so keep some unnormed samples
+% samples_unnormed = zeros(int32(num_samples/300), evaluation_size(1)^2);
+
+img_size = [size(examples,2), size(examples,3)];
+
+% Extract only images of differing shaped faces to extract more diverse
+% training samples
+to_keep = FindDistantLandmarks(landmarkLoc, landmark, round(samples_per_img*size(examples,1)));
+
+inds_all = 1:size(examples,1);
+samples_to_use = inds_all(to_keep);
+
+% Keep track of how many samples have been computed already
+samples_filled = 1;
+samples_unnormed_filled = 1;
+
+%% parse the image names for reporting purposes
+imgs_used = img_names(samples_to_use);
+for i=1:numel(imgs_used)
+    [~,name,ext] = fileparts(imgs_used{i});
+    imgs_used{i} = [name, ext];
+end
+for i=samples_to_use
+
+    % Do rate_negative negatives and a single positive
+    for p=1:normalisation_options.rate_negative+1
+
+        % create a gaussian
+        corrPoint = landmarkLoc(i,landmark,:);
+        
+        % Ignore occluded points
+        if(corrPoint(1) == 0)
+           break; 
+        end
+                
+        startX = 1 - corrPoint(1);
+        startY = 1 - corrPoint(2);
+
+        patchWidth = img_size(2);
+        patchHeight = img_size(1);
+
+        [X, Y] = meshgrid(startX:patchWidth + startX-1, startY:patchHeight + startY-1);
+
+        response = exp(-0.5*(X.^2+Y.^2)/(sigma^2));
+
+        % Choose positive or negative sample
+        if(p==normalisation_options.rate_negative+1)
+            sample_centre = squeeze(corrPoint) + round(1*randn(2,1));
+        else
+            sample_centre = squeeze(corrPoint) + round(10*randn(2,1));                
+        end
+
+        sample_centre = round(sample_centre);
+
+        sample_centre(sample_centre <= normalisation_side_size(1)) = normalisation_side_size(1) + 1;
+        sample_centre(sample_centre > img_size(1)-normalisation_side_size(1)) = img_size(1) - normalisation_side_size(1) - 1;
+
+        patches = squeeze(examples(i, sample_centre(2) - normalisation_side_size:sample_centre(2) + normalisation_side_size, sample_centre(1) - normalisation_side_size:sample_centre(1) + normalisation_side_size));
+        side = (single_input_size - 1)/2;
+        responses = response(sample_centre(2) - side(2):sample_centre(2) + side(2), sample_centre(1) - side(1):sample_centre(1) + side(1));
+
+%         if(samples_unnormed_filled <= size(samples_unnormed,1))
+%             % even if correct size is not initialised Matlab will
+%             % sort that out (would only happen once anyway)
+%             samples_unnormed(samples_unnormed_filled,:) = patches(:);
+%             samples_unnormed_filled = samples_unnormed_filled + 1;
+%         end
+
+        % if we want to normalise each patch individualy do it here
+
+        patch = im2col(patches, patch_expert_support_size, 'sliding')';
+        response = im2col(responses, [1,1], 'sliding');
+
+        labels(samples_filled:samples_filled+size(patch,1)-1,:) = response;
+
+        samples(samples_filled:samples_filled+size(patch,1)-1,:) = patch;                             
+        samples_filled = samples_filled + size(patch,1);           
+
+    end
+end
+
+% Only keep the filled samples
+samples = samples(1:samples_filled-1,:);
+labels = labels(1:samples_filled-1,:);
+
+if(normalisation_options.useNormalisedCrossCorr == 1)
+
+    mean_curr = mean(samples, 2);
+    patch_normed = samples - repmat(mean_curr,1, patch_expert_support_size(1)*patch_expert_support_size(2));
+
+    % Normalising the patches using the L2 norm
+    scaling = sqrt(sum(patch_normed.^2,2));
+    scaling(scaling == 0) = 1;
+
+    patch_normed = patch_normed ./ repmat(scaling, 1, patch_expert_support_size(1)*patch_expert_support_size(2));
+
+    samples = patch_normed;
+    clear 'patch_normed';
+end
+
+
+% if((samples_filled-1)/(single_input_size(1)*single_input_size(2)) < size(samples_unnormed,1))
+%     samples_unnormed = samples_unnormed(1:(samples_filled-1)/(single_input_size(1)*single_input_size(2)),:);
+% end
+
+end
--- a/model_training/CCNF/patch_experts/collect_menpo_data/FindDistantLandmarks.m
+++ b/model_training/CCNF/patch_experts/collect_menpo_data/FindDistantLandmarks.m
@@ -0,0 +1,41 @@
+function [to_keep] = FindDistantLandmarks(landmarkLoc, landmark_num, num_to_keep)
+
+    % First align all of them
+    a = landmarkLoc(:,:,1);
+    b = landmarkLoc(:,:,2);
+
+    offset_x = mean(a,2);
+    offset_y = mean(b,2);
+
+    landmark_loc_off = cat(3, bsxfun(@plus, a, -offset_x), bsxfun(@plus, b, -offset_y));
+    fixed_x = landmark_loc_off(:,:,1);
+    fixed_y = landmark_loc_off(:,:,2);
+
+    % Extract the relevant landmarks
+    fixed_x_l = fixed_x(:,landmark_num);
+    fixed_y_l = fixed_y(:,landmark_num);
+    
+    obs = cat(2, fixed_x_l, fixed_y_l);
+    
+    % Discard landmarks that are very close to each other, so that we only
+    % keep more diverse images
+    D = squareform(pdist(obs));
+    
+    to_keep = true(size(landmarkLoc,1),1);
+    
+    for i = 1:(size(landmarkLoc,1) - num_to_keep)
+       
+        diversity_score = mean(D,2);
+        
+        a = min(diversity_score);
+        
+        lowest = find(diversity_score == a);
+        lowest = lowest(1);
+        
+        to_keep(lowest) = 0;
+        
+        D(:,~to_keep) = 0;
+        D(~to_keep,:) = 200;
+    end
+    
+end
--- a/model_training/CCNF/patch_experts/collect_menpo_data/Parse_settings.m
+++ b/model_training/CCNF/patch_experts/collect_menpo_data/Parse_settings.m
@@ -0,0 +1,120 @@
+function [ normalisation_options ] = Parse_settings( sigma, ratio_neg, num_samples, varargin)
+%PARSE_SETTINGS Summary of this function goes here
+%   Detailed explanation goes here
+
+    % creating the parameters to use when training colour (intensity) patches 
+    normalisation_options = struct;
+
+    % this is what currently is expected (although could potentially have
+    % bigger or smaller patches, this should not be bigger that the patch
+    % available in examples and negExamples
+    normalisation_options.patchSize = [11 11];    
+    
+    % The region size of a region that is taken for training around an
+    % aligned or misaligned landmark
+    if(sum(strcmp(varargin,'normalisation_size')))
+        ind = find(strcmp(varargin,'normalisation_size')) + 1;
+        normalisation_options.normalisationRegion = [varargin{ind}, varargin{ind}];
+    else
+        normalisation_options.normalisationRegion = [21 21];
+    end
+    
+    % This specifies the split of data ratios
+    normalisation_options.ccnf_ratio = 0.9; % proportion of data used for cross-validating CCNFs
+    % the rest is used for testing and provides the F1 and accuracy scores
+
+    if(any(strcmp(varargin, 'patch_types')))
+        ind = find(strcmp(varargin,'patch_types')) + 1;
+        normalisation_options.patch_type = varargin{ind}; 
+    else
+        normalisation_options.patch_type = {'reg'};         
+    end
+    
+    if(any(strcmp(varargin, 'sparsity_types')))
+        ind = find(strcmp(varargin,'sparsity_types')) + 1;
+        if(~isempty( varargin{ind}))
+            normalisation_options.sparsity = 1;
+            normalisation_options.sparsity_types = varargin{ind}; 
+        else
+            normalisation_options.sparsity = 0; 
+            normalisation_options.sparsity_types = [];
+        end
+    else
+        normalisation_options.sparsity = 0;
+        normalisation_options.sparsity_types = [];
+    end
+    
+    if(any(strcmp(varargin, 'lambda_a')))
+        ind = find(strcmp(varargin,'lambda_a')) + 1;
+        normalisation_options.lambda_a = varargin{ind};            
+    end
+    
+    if(any(strcmp(varargin, 'lambda_b')))
+        ind = find(strcmp(varargin,'lambda_b')) + 1;
+        normalisation_options.lambda_b = varargin{ind};            
+    end
+    
+    if(any(strcmp(varargin, 'lambda_th')))
+        ind = find(strcmp(varargin,'lambda_th')) + 1;
+        normalisation_options.lambda_th = varargin{ind};            
+    end
+    
+    if(any(strcmp(varargin, 'num_layers')))
+        ind = find(strcmp(varargin,'num_layers')) + 1;
+        normalisation_options.num_layers = varargin{ind};            
+    end
+    
+    if(any(strcmp(varargin, 'num_bins')))
+        ind = find(strcmp(varargin,'num_bins')) + 1;
+        normalisation_options.num_hog_bins = varargin{ind};            
+    else    
+        normalisation_options.num_hog_bins = 9;
+    end
+    
+    normalisation_options.numSamples = num_samples;
+    
+    normalisation_options.useZeroMeanPerPatch = 1;
+    normalisation_options.useNormalisedCrossCorr = 1;
+    normalisation_options.zscore = 0;
+
+    % Should invalid pixels be taken into account when normalising (yes in
+    % case of depth and no in case of colour)
+    normalisation_options.ignoreInvalidInMeanStd = 0; % we don't care about invalid pixels at this time (black is valid here) TODO background simulation?
+    normalisation_options.setIllegalToPost = 0;
+
+    if(sum(strcmp(varargin,'use_bu')))
+        ind = find(strcmp(varargin,'use_bu')) + 1;
+        normalisation_options.bu = varargin{ind};       
+    else
+        normalisation_options.bu = 1;
+    end
+    
+    if(sum(strcmp(varargin,'use_mpie')))
+        ind = find(strcmp(varargin,'use_mpie')) + 1;
+        normalisation_options.mpie = varargin{ind};       
+    else
+        normalisation_options.mpie = 1;
+    end
+    
+    if(sum(strcmp(varargin,'use_wild')))
+        ind = find(strcmp(varargin,'use_wild')) + 1;
+        normalisation_options.wild = varargin{ind};       
+    else
+        normalisation_options.wild = 0;
+    end    
+    
+    normalisation_options.sigma = sigma;
+    
+    normalisation_options.rate_negative = ratio_neg;
+    
+    % the similarities need to be tested separately (1,2,3 and 4) and
+    % together all, vs hor/ver and diags, and none of course
+    if(any(strcmp(varargin, 'similarity_types')))
+        ind = find(strcmp(varargin,'similarity_types')) + 1;
+        normalisation_options.similarity_types = varargin{ind};
+    else        
+        normalisation_options.similarity_types = [];
+    end    
+
+end
+
--- a/model_training/CCNF/patch_experts/collect_menpo_data/Script_collect_menpo.m
+++ b/model_training/CCNF/patch_experts/collect_menpo_data/Script_collect_menpo.m
@@ -0,0 +1,40 @@
+clear
+% define the root name of database
+root = '../data_preparation/prepared_data/';
+
+
+% which scales we're doing
+sigma = 1;
+num_samples = 4.5e6; % Making sure all data is used
+
+scales = [0.25,0.35,0.5,1.0];
+frontalView = 1;
+
+profileViewInds = [2,3,4];
+
+version = 'menpo_train';
+ratio_neg = 10;
+norm = 1;
+
+data_loc = 'menpo_train_';
+rng(0);
+
+for s=scales
+
+    Collect_all_patches(root, frontalView, profileViewInds,...
+        s, sigma, version, 'ratio_neg', ratio_neg,...
+        'num_samples', num_samples, 'data_loc', data_loc,...
+        'normalisation_size', 19);
+end
+
+version = 'menpo_valid';
+data_loc = 'menpo_valid_';
+rng(0);
+
+for s=scales
+
+    Collect_all_patches(root, frontalView, profileViewInds,...
+        s, sigma, version, 'ratio_neg', ratio_neg,...
+        'num_samples', num_samples, 'data_loc', data_loc,...
+        'normalisation_size', 19);
+end
--- a/model_training/CCNF/patch_experts/collect_menpo_data/readme.txt
+++ b/model_training/CCNF/patch_experts/collect_menpo_data/readme.txt
@@ -0,0 +1 @@
+Data collection for train CE-CLM on Menpo data.
--- a/model_training/CCNF/patch_experts/data_preparation/readme.txt
+++ b/model_training/CCNF/patch_experts/data_preparation/readme.txt
@@ -0,0 +1,11 @@
+The code here prepares the training images and labels into a format that can be easilly used to train SVR and CCNF regressors.
+
+Just run "scripts/Prepare_data_wild_all.m" for data needed to train patch experts for in-the-wild experiments.(you have to have the relevant datasets, but they are all available online at http://ibug.doc.ic.ac.uk/resources/facial-point-annotations/) 
+
+Run "scripts/Prepare_data_Multi_PIE_all.m" (you have to have the multi-pie dataset and labels)
+
+Run "scripts/Prepare_data_general_all.m" (you have to have both of the datasets, and you have to run "scripts/Prepare_data_wild_all.m" and scripts/Prepare_data_Multi_PIE_all.m" first.
+
+Run "scripts/Prepare_data_menpo_all.m" (you have to have the Menpo challenge training data - https://ibug.doc.ic.ac.uk/resources/2nd-facial-landmark-tracking-competition-menpo-ben/)
+
+PDM model used is trained on 2D landmark labels using Non-Rigid-Structure for motion (code can be found http://www.cl.cam.ac.uk/~tb346/res/ccnf/pdm_generation.zip)
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/AddOrthRow.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/AddOrthRow.m
@@ -0,0 +1,9 @@
+function [RotFull] = AddOrthRow(RotSmall)
+
+  % We can work out these values from the small version of the rotation matrix Rx * Ry * Rz (if you plug in values you can work it out, just slightly tedious)
+  RotFull = zeros(3,3);
+  RotFull(1:2, :) = RotSmall;
+  RotFull(3,1) = RotSmall(1, 2) * RotSmall(2, 3) - RotSmall(1, 3) * RotSmall(2, 2);
+  RotFull(3,2) = RotSmall(1, 3) * RotSmall(2, 1) - RotSmall(1, 1) * RotSmall(2, 3);
+  RotFull(3,3) = RotSmall(1, 1) * RotSmall(2, 2) - RotSmall(1, 2) * RotSmall(2, 1);
+  
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/AlignShapesKabsch.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/AlignShapesKabsch.m
@@ -0,0 +1,22 @@
+function [ R, T ] = AlignShapesKabsch ( alignFrom, alignTo )
+%ALIGN3DSHAPES Summary of this function goes here
+%   Detailed explanation goes here
+
+    dims = size(alignFrom, 2);
+
+    alignFromMean = alignFrom - repmat(mean(alignFrom), size(alignFrom,1),1);
+    alignToMean = alignTo - repmat(mean(alignTo), size(alignTo,1),1);
+
+    [U, ~, V] = svd( alignFromMean' * alignToMean);
+
+    % make sure no reflection is there
+    d = sign(det(V*U'));
+    corr = eye(dims);
+    corr(end,end) = d;
+    
+    R = V*corr*U';
+
+    T = mean(alignTo) - (R * mean(alignFrom)')';
+    T = T';
+end
+
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/AlignShapesWithScale.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/AlignShapesWithScale.m
@@ -0,0 +1,30 @@
+function [ A, T, error, alignedShape ] = AlignShapesWithScale( alignFrom, alignTo )
+%ALIGNSHAPESWITHSCALE Summary of this function goes here
+%   Detailed explanation goes here
+
+    numPoints = size(alignFrom,1);
+
+    meanFrom = mean(alignFrom);    
+    meanTo = mean(alignTo);
+    
+    alignFromMeanNormed = bsxfun(@minus, alignFrom, meanFrom);
+    alignToMeanNormed = bsxfun(@minus, alignTo, meanTo);
+
+    % scale now
+    sFrom = sqrt(sum(alignFromMeanNormed(:).^2)/numPoints);
+    sTo = sqrt(sum(alignToMeanNormed(:).^2)/numPoints);
+    
+    s = sTo / sFrom;
+    
+    alignFromMeanNormed = alignFromMeanNormed/sFrom;
+    alignToMeanNormed = alignToMeanNormed/sTo;
+    
+    [R, t] = AlignShapesKabsch(alignFromMeanNormed, alignToMeanNormed);
+    
+    A = s * R;
+    aligned = (A * alignFrom')';
+    T = mean(alignTo - aligned);
+    alignedShape = bsxfun(@plus, aligned, T);
+    error = mean(sum((alignedShape - alignTo).^2,2));
+    
+end
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/AxisAngle2Rot.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/AxisAngle2Rot.m
@@ -0,0 +1,11 @@
+function [Rot] = AxisAngle2Rot(axisAngle)
+
+    theta = norm(axisAngle, 2);
+    
+    nx = axisAngle / theta;
+    
+    nx = [    0  -nx(3)  nx(2);
+           nx(3)     0  -nx(1);
+          -nx(2)  nx(1)     0 ];
+    
+    Rot = eye(3) + sin(theta) * nx + (1-cos(theta))*nx^2;
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/Euler2Rot.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/Euler2Rot.m
@@ -0,0 +1,11 @@
+function [Rot] = Euler2Rot(euler)
+
+	rx = euler(1);
+	ry = euler(2);
+	rz = euler(3);
+
+	Rx = [1 0 0; 0 cos(rx) -sin(rx); 0 sin(rx) cos(rx)];
+	Ry = [cos(ry) 0 sin(ry); 0 1 0; -sin(ry) 0 cos(ry)];
+	Rz = [cos(rz) -sin(rz) 0; sin(rz) cos(rz) 0; 0 0 1];
+	
+	Rot = Rx * Ry * Rz;
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/GetShape3D.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/GetShape3D.m
@@ -0,0 +1,7 @@
+function [shape3D] = GetShape3D(M, V, p)
+
+    shape3D = M + V * p;
+
+    shape3D = reshape(shape3D, numel(shape3D) / 3, 3);
+    
+end
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/GetShapeOrtho.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/GetShapeOrtho.m
@@ -0,0 +1,20 @@
+function [shape2D] = GetShapeOrtho(M, V, p, global_params)
+
+    % M - mean shape vector
+    % V - eigenvectors
+    % p - parameters of non-rigid shape
+    % V_exp
+    % p_exp
+    % global_params includes scale, euler rotation, translation, 
+    % R - rotation matrix
+    % T - translation vector (tx, ty)    
+    
+    R = Euler2Rot(global_params(2:4));    
+    T = [global_params(5:6); 0];
+    a = global_params(1);
+    
+    shape3D = GetShape3D(M, V, p);
+    
+    shape2D = bsxfun(@plus, a * R*shape3D', T);
+    shape2D = shape2D';
+end
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/ProcrustesAnalysis.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/ProcrustesAnalysis.m
@@ -0,0 +1,103 @@
+function [normX, normY, meanShape, Transform] = ProcrustesAnalysis(x, y, options)
+
+% Translate all elements to origin and scale to 1
+normX = zeros(size(x));
+normY = zeros(size(y));
+
+for i = 1:size(x,1)
+    
+    offsetX = mean(x(i,:));
+    offsetY = mean(y(i,:));
+
+    Transform.offsetX(i) = offsetX;
+    Transform.offsetY(i) = offsetY;
+    
+    normX(i,:) = x(i,:) - offsetX;
+    normY(i,:) = y(i,:) - offsetY;
+
+    % Get the Frobenius norm, to scale the shapes to unit size
+    scale = norm([normX(i,:) normY(i,:)], 'fro');
+    
+    Transform.scale(i) = scale;
+    
+    normX(i,:) = normX(i,:)/scale;
+    normY(i,:) = normY(i,:)/scale;
+    
+end
+
+% Rotate elements untill all of them have the same orientation
+
+% the initial estimate of rotation would be the first element
+% if change is less than 1% stop (shouldn't take more than 2 steps)
+change = 0.1;
+
+meanShape = [ normX(1,:); normY(1,:) ]';
+
+Transform.Rotation = zeros(size(x,1),1);
+
+for i = 1:30
+    
+    % align all of the shapes to the mean shape
+    
+    % remember all orientations to get the mean one
+    orientations = zeros(size(normX,1),1);
+    
+    for j = 1:size(x,1)
+                
+        % do SVD of mean * X'
+        currentShape = [ normX(j,:); normY(j,:) ]';
+        [U, ~, V] = svd( meanShape' * currentShape);
+        rot = V*U';
+        
+        if(asin(rot(2,1)) > 0)
+           orientations(j) = real(acos(rot(1,1)));
+        else
+           orientations(j) = real(-acos(rot(1,1)));
+        end
+        
+        Transform.Rotation(j) = Transform.Rotation(j) + orientations(j);
+        
+        currentShape = currentShape * rot;                
+        
+        normX(j,:) = currentShape(:,1)';
+        normY(j,:) = currentShape(:,2)';
+        
+    end
+    
+    % recalculate the mean shape;
+    oldMean = meanShape;
+    meanShape = [mean(normX); mean(normY)]';
+    
+    % rotate the mean shape to mean rotation
+    meanOrientation = mean(orientations);
+    
+    % Do this only the first time
+    if(i==1)
+        
+        rotM = [ cos(-meanOrientation) -sin(-meanOrientation); sin(-meanOrientation) cos(-meanOrientation) ]; 
+        meanShape = meanShape * rotM;
+    end
+    % scale mean shape to unit
+    meanScale = norm(meanShape, 'fro');
+    meanShape = meanShape*(1/meanScale);
+    
+    % find frobenious norm
+    diff = norm(oldMean - meanShape, 'fro');
+    
+    if(diff/norm(oldMean,'fro') < change)
+        break;
+    end
+    
+end
+
+% transform to tangent space to preserve linearities
+
+% get the scaling factors for each shape
+if(options.TangentSpaceTransform)
+    scaling = [ normX normY ] * [ meanShape(:,1)' meanShape(:,2)']';
+    for i=1:size(x,1)
+        normX(i,:) = normX(i,:) * (1 / scaling(i));
+        normY(i,:) = normY(i,:) * (1 / scaling(i));
+    end
+end
+
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/ProcrustesAnalysis3D.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/ProcrustesAnalysis3D.m
@@ -0,0 +1,139 @@
+function [ normX, normY, normZ, meanShape, Transform ] = ProcrustesAnalysis3D( x, y, z, tangentSpace, meanShape )
+%PROCRUSTESANALYSIS3D Summary of this function goes here
+%   Detailed explanation goes here
+
+meanProvided = false;
+
+if(nargin > 4)
+    meanProvided = true;
+end
+
+% Translate all elements to origin
+normX = zeros(size(x));
+normY = zeros(size(y));
+normZ = zeros(size(z));
+
+for i = 1:size(x,1)
+    
+    offsetX = mean(x(i,:));
+    offsetY = mean(y(i,:));
+    offsetZ = mean(z(i,:));
+
+    Transform.offsetX(i) = offsetX;
+    Transform.offsetY(i) = offsetY;
+    Transform.offsetZ(i) = offsetZ;
+    
+    normX(i,:) = x(i,:) - offsetX;
+    normY(i,:) = y(i,:) - offsetY;
+    normZ(i,:) = z(i,:) - offsetZ;
+    
+end
+
+% Rotate elements untill all of them have the same orientation
+
+% the initial estimate of rotation would be the first element
+% if change is less than 1% stop (shouldn't take more than 2 steps)
+change = 0.1;
+
+if(~meanProvided)
+    meanShape = [ mean(normX); mean(normY); mean(normZ) ]';
+end
+% scale all the shapes to mean shape
+
+% Get the Frobenius norm, to scale the shapes to mean size (still want to
+% retain mm)
+meanScale = norm(meanShape, 'fro');    
+    
+for i = 1:size(x,1)
+    
+    scale = norm([normX(i,:) normY(i,:) normZ(i,:)], 'fro')/meanScale;
+    
+    normX(i,:) = normX(i,:)/scale;
+    normY(i,:) = normY(i,:)/scale;
+    normZ(i,:) = normZ(i,:)/scale;
+    
+end
+
+Transform.RotationX = zeros(size(x,1),1);
+Transform.RotationY = zeros(size(x,1),1);
+Transform.RotationZ = zeros(size(x,1),1);
+
+for i = 1:30
+    
+    % align all of the shapes to the mean shape
+    
+    % remember all orientations to get the mean one (in euler angle form, pitch, yaw roll)
+    orientationsX = zeros(size(normX,1),1);
+    orientationsY = zeros(size(normX,1),1);
+    orientationsZ = zeros(size(normX,1),1);
+    
+    for j = 1:size(x,1)
+                
+        currentShape = [normX(j,:); normY(j,:); normZ(j,:)]';
+        % we want to align the current shape to the mean one
+        [ R, T ] = AlignShapesKabsch(currentShape, meanShape);
+        
+        eulers = Rot2Euler(R);
+
+        orientationsX(j) = eulers(1);
+        orientationsY(j) = eulers(2);
+        orientationsZ(j) = eulers(3);
+
+        Transform.RotationX(j) = eulers(1);
+        Transform.RotationY(j) = eulers(2);
+        Transform.RotationZ(j) = eulers(3);
+        
+        currentShape = R * currentShape';                
+        
+        normX(j,:) = currentShape(1,:);
+        normY(j,:) = currentShape(2,:);
+        normZ(j,:) = currentShape(3,:);
+        
+    end
+    
+    % recalculate the mean shape
+%     if(~meanProvided)
+        oldMean = meanShape;
+        meanShape = [mean(normX); mean(normY); mean(normZ)]';
+        meanScale = norm(meanShape, 'fro');  
+%     end
+    
+    for j = 1:size(x,1)
+    
+        scale = norm([normX(j,:) normY(j,:) normZ(j,:)], 'fro')/meanScale;
+
+        normX(j,:) = normX(j,:)/scale;
+        normY(j,:) = normY(j,:)/scale;
+        normZ(j,:) = normZ(j,:)/scale;
+
+    end
+
+    if(i==1 && ~meanProvided)
+        
+        % rotate the mean shape to mean rotation
+        meanOrientationX = mean(orientationsX);
+        meanOrientationY = mean(orientationsY);
+        meanOrientationZ = mean(orientationsZ);
+
+        R = Euler2Rot([meanOrientationX, meanOrientationY, meanOrientationZ]);
+        meanShape = (R * meanShape')';
+    end
+    
+    % find frobenious norm
+    diff = norm(oldMean - meanShape, 'fro');
+    
+    if(diff/norm(oldMean,'fro') < change)
+        break;
+    end
+    
+end
+
+% transform to tangent space to preserve linearities
+
+% get the scaling factors for each shape
+if(tangentSpace)
+    [ normX, normY, normZ] = TangentSpaceTransform(normX, normY, normZ, meanShape);
+end
+
+end
+
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/Rot2AxisAngle.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/Rot2AxisAngle.m
@@ -0,0 +1,11 @@
+function [ axisAngle ] = Rot2AxisAngle( Rot )
+%ROT2AXISANGLE Summary of this function goes here
+%   Detailed explanation goes here
+
+    theta = acos((trace(Rot) - 1) / 2);
+
+    vec = 1.0/(2*sin(theta));
+    vec = vec * [Rot(3,2) - Rot(2,3), Rot(1,3) - Rot(3,1), Rot(2,1) - Rot(1,2)];
+    axisAngle = vec * theta;
+end
+
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/Rot2Euler.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/Rot2Euler.m
@@ -0,0 +1,12 @@
+function [euler] = Rot2Euler(R)
+
+    q0 = sqrt( 1 + R(1,1) + R(2,2) + R(3,3) ) / 2;
+    q1 = (R(3,2) - R(2,3)) / (4*q0) ;
+    q2 = (R(1,3) - R(3,1)) / (4*q0) ;
+    q3 = (R(2,1) - R(1,2)) / (4*q0) ;
+
+    yaw  = asin(2*(q0*q2 + q1*q3));
+    pitch= atan2(2*(q0*q1-q2*q3), q0*q0-q1*q1-q2*q2+q3*q3); 
+    roll = atan2(2*(q0*q3-q1*q2), q0*q0+q1*q1-q2*q2-q3*q3);
+    
+    euler = [pitch, yaw, roll];
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/TangentSpaceTransform.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/TangentSpaceTransform.m
@@ -0,0 +1,17 @@
+function [ transformedX, transformedY, transformedZ ] = TangentSpaceTransform( x, y, z, meanShape )
+%TANGENTSPACETRANSFORM Summary of this function goes here
+%   Detailed explanation goes here
+
+    scaling = [ x y z] * [ meanShape(:,1)' meanShape(:,2)' meanShape(:,3)']';
+    for i=1:size(x,1)
+        x(i,:) = x(i,:) * (1 / scaling(i));
+        y(i,:) = y(i,:) * (1 / scaling(i));
+        z(i,:) = z(i,:) * (1 / scaling(i));
+    end
+    
+    transformedX = x * mean(scaling);
+    transformedY = y * mean(scaling);
+    transformedZ = z * mean(scaling);
+    
+end
+
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/fit_PDM_ortho_proj_to_2D.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/fit_PDM_ortho_proj_to_2D.m
@@ -0,0 +1,345 @@
+function [ a, R, T, T3D, params, error, shapeOrtho ] = fit_PDM_ortho_proj_to_2D( M, E, V, shape2D, f, cx, cy)
+%FITPDMTO2DSHAPE Summary of this function goes here
+%   Detailed explanation goes here
+
+    params = zeros(size(E));
+
+    hidden = false;
+
+    % if some of the points are unavailable modify M, V, and shape2D (can
+    % later infer the actual shape from this)
+    if(sum(shape2D(:)==0) > 0)        
+
+        hidden = true;
+        % which indices to remove
+        inds_to_rem = shape2D(:,1) == 0 | shape2D(:,2) == 0;
+
+        shape2D = shape2D(~inds_to_rem,:);
+
+        inds_to_rem = repmat(inds_to_rem, 3, 1);
+
+        M_old = M;
+        V_old = V;
+
+        M = M(~inds_to_rem);
+        V = V(~inds_to_rem,:);
+
+    end
+    
+    num_points = numel(M) / 3;
+    
+    m = reshape(M, num_points, 3)';
+    width_model = max(m(1,:)) - min(m(1,:));
+    height_model = max(m(2,:)) - min(m(2,:));
+
+    bounding_box = [min(shape2D(:,1)), min(shape2D(:,2)),...
+                    max(shape2D(:,1)), max(shape2D(:,2))];
+    
+    a = (((bounding_box(3) - bounding_box(1)) / width_model) + ((bounding_box(4) - bounding_box(2))/ height_model)) / 2;
+        
+    tx = (bounding_box(3) + bounding_box(1))/2;
+    ty = (bounding_box(4) + bounding_box(2))/2;
+    
+    % correct it so that the bounding box is just around the minimum
+    % and maximum point in the initialised face
+    tx = tx - a*(min(m(1,:)) + max(m(1,:)))/2;
+    ty = ty - a*(min(m(2,:)) + max(m(2,:)))/2;    
+    
+    R = eye(3); 
+    T = [tx; ty];
+    
+    currShape = getShapeOrtho(M, V, params, R, T, a);
+    
+    currError = getRMSerror(currShape, shape2D);
+    
+    reg_rigid = zeros(6,1);
+    regFactor = 20;
+    regularisations = [reg_rigid; regFactor ./ E]; % the above version, however, does not perform as well
+    regularisations = diag(regularisations)*diag(regularisations);
+    
+    red_in_a_row = 0;
+    
+    for i=1:1000
+                      
+        shape3D = M + V * params;
+        shape3D = reshape(shape3D, numel(shape3D) / 3, 3);
+                
+        % Now find the current residual error        
+        currShape = a * R(1:2,:)*shape3D' + repmat(T, 1, numel(M)/3); 
+        currShape = currShape'; 
+        
+        error_res = shape2D - currShape;
+        
+        eul = Rot2Euler(R);
+        
+        p_global = [a; eul'; T];
+        
+        % get the Jacobians
+        J = CalcJacobian(M, V, params, p_global);
+        
+        % RLMS style update
+        p_delta = (J'*J + regularisations) \ (J'*error_res(:) - regularisations*[p_global;params]);
+                                
+        [params, p_global] = CalcReferenceUpdate(p_delta, params, p_global);
+        
+        a = p_global(1);
+        R = Euler2Rot(p_global(2:4));
+        T = p_global(5:6);
+        
+        shape3D = M + V * params;
+        shape3D = reshape(shape3D, numel(shape3D) / 3, 3);
+        currShape = a * R(1:2,:)*shape3D' + repmat(T, 1, numel(M)/3); 
+        currShape = currShape'; 
+             
+        error = getRMSerror(currShape, shape2D);
+        
+        if(0.999 * currError < error)
+            red_in_a_row = red_in_a_row + 1;
+            if(red_in_a_row == 5)
+                break;
+            end
+        end
+        
+        currError = error;
+        
+    end    
+    
+    if(hidden)
+        shapeOrtho = getShapeOrtho(M_old, V_old, params, R, T, a);
+    else
+        shapeOrtho = currShape;
+    end
+    if(nargin == 7)
+    
+        Zavg = f / a;
+        Xavg = (T(1) - cx) / a;
+        Yavg = (T(2) - cy) / a;
+
+        T3D = [Xavg;Yavg;Zavg];
+    else
+        T3D = [0;0;0];
+    end
+    
+end
+
+function [shape2D] = getShapeOrtho(M, V, p, R, T, a)
+
+    % M - mean shape vector
+    % V - eigenvectors
+    % p - parameters of non-rigid shape
+    % R - rotation matrix
+    % T - translation vector (tx, ty)
+    shape3D = getShape3D(M, V, p);
+    shape2D = a * R(1:2,:)*shape3D' + repmat(T, 1, numel(M)/3);
+    shape2D = shape2D';
+end
+
+function [shape2D] = getShapeOrthoFull(M, V, p, R, T, a)
+
+    % M - mean shape vector
+    % V - eigenvectors
+    % p - parameters of non-rigid shape
+    % R - rotation matrix
+    % T - translation vector (tx, ty)    
+    T = [T; 0];
+    shape3D = getShape3D(M, V, p);
+    shape2D = a * R*shape3D' + repmat(T, 1, numel(M)/3);
+    shape2D = shape2D';
+end
+
+function [shape3D] = getShape3D(M, V, params)
+
+    shape3D = M + V * params;
+    shape3D = reshape(shape3D, numel(shape3D) / 3, 3);
+    
+end
+
+function [error] = getRMSerror(shape2Dv1, shape2Dv2)
+
+    error = sqrt(mean(reshape(shape2Dv1 - shape2Dv2, numel(shape2Dv1), 1).^2));
+
+end
+
+% This calculates the combined rigid with non-rigid Jacobian
+function J = CalcJacobian(M, V, p, p_global)
+
+    n = size(M, 1)/3;
+    
+    non_rigid_modes = size(V,2);
+    
+    J = zeros(n*2, 6 + non_rigid_modes);
+    
+    
+    % now the layour is
+    % ---------- Rigid part -------------------|----Non rigid part--------|
+    % dx_1/ds, dx_1/dr1, ... dx_1/dtx, dx_1/dty dx_1/dp_1 ... dx_1/dp_m
+    % dx_2/ds, dx_2/dr1, ... dx_2/dtx, dx_2/dty dx_2/dp_1 ... dx_2/dp_m
+    % ...
+    % dx_n/ds, dx_n/dr1, ... dx_n/dtx, dx_n/dty dx_n/dp_1 ... dx_n/dp_m
+    % dy_1/ds, dy_1/dr1, ... dy_1/dtx, dy_1/dty dy_1/dp_1 ... dy_1/dp_m
+    % ...
+    % dy_n/ds, dy_n/dr1, ... dy_n/dtx, dy_n/dty dy_n/dp_1 ... dy_n/dp_m
+    
+    % getting the rigid part
+    J(:,1:6) = CalcRigidJacobian(M, V, p, p_global);
+    
+    % constructing the non-rigid part
+    R = Euler2Rot(p_global(2:4));
+    s = p_global(1);
+    
+    % 'rotate' and 'scale' the principal components
+    
+    % First reshape to 3D
+    V_X = V(1:n,:);
+    V_Y = V(n+1:2*n,:);
+    V_Z = V(2*n+1:end,:);
+    
+    J_x_non_rigid = s*(R(1,1)*V_X + R(1,2)*V_Y + R(1,3)*V_Z);
+    J_y_non_rigid = s*(R(2,1)*V_X + R(2,2)*V_Y + R(2,3)*V_Z);
+    
+    J(1:n, 7:end) = J_x_non_rigid;
+    J(n+1:end, 7:end) = J_y_non_rigid;
+    
+end
+
+function J = CalcRigidJacobian(M, V, p, p_global)
+
+ 	n = size(M, 1)/3;
+  
+	% Get the current 3D shape (not affected by global transform, as this
+	% is how the Jacobian was derived (for derivation please see
+	% ../derivations/orthoJacobian
+	shape3D = GetShape3D(M, V, p);
+
+	% Get the rotation matrix corresponding to current global orientation
+	R = Euler2Rot(p_global(2:4));
+	s = p_global(1);
+    
+    % Rigid Jacobian is laid out as follows
+    % dx_1/ds, dx_1/dr1, dx_1/dr2, dx_1/dr3, dx_1/dtx, dx_1/dty
+    % dx_2/ds, dx_2/dr1, dx_2/dr2, dx_2/dr3, dx_2/dtx, dx_2/dty
+    % ...
+    % dx_n/ds, dx_n/dr1, dx_n/dr2, dx_n/dr3, dx_n/dtx, dx_n/dty
+    % dy_1/ds, dy_1/dr1, dy_1/dr2, dy_1/dr3, dy_1/dtx, dy_1/dty
+    % ...
+    % dy_n/ds, dy_n/dr1, dy_n/dr2, dy_n/dr3, dy_n/dtx, dy_n/dty
+    
+    J = zeros(n*2, 6);
+    
+    % dx/ds = X * r11  + Y * r12 + Z * r13
+    % dx/dr1 =  s*(r13 * Y - r12 * Z)
+    % dx/dr2 = -s*(r13 * X - r11 * Z)
+    % dx/dr3 =  s*(r12 * X - r11 * Y)
+    % dx/dtx = 1
+    % dx/dty = 0
+    
+    % dy/ds = X * r21  + Y * r22 + Z * r23
+    % dy/dr1 =  s * (r23 * Y - r22 * Z)
+    % dy/dr2 = -s * (r23 * X - r21 * Z)
+    % dy/dr3 =  s * (r22 * X - r21 * Y)
+    % dy/dtx = 0
+    % dy/dty = 1
+        
+    % set the Jacobian for x's
+    
+    % with respect to scaling factor
+    J(1:n,1) = shape3D * R(1,:)';
+    
+    % with respect to angular rotation around x, y, and z axes
+    
+    % Change of x with respect to change in axis angle rotation
+	dxdR = [      0,  R(1,3), -R(1,2);
+            -R(1,3),       0,  R(1,1);
+             R(1,2), -R(1,1),      0];
+
+    J(1:n,2:4) = s*(dxdR * shape3D')';
+         
+    % with respect to translation
+    J(1:n,5) = 1;
+    J(1:n,6) = 0;
+    
+    % set the Jacobian for y's
+
+    % with respect to scaling factor
+    J(n+1:end,1) = shape3D * R(2,:)';
+
+     % with respect to angular rotation around x, y, and z axes
+    
+    % Change of y with respect to change in axis angle rotation
+    dydR = [      0,  R(2,3), -R(2,2);
+            -R(2,3),       0,  R(2,1);
+             R(2,2), -R(2,1),      0];
+         
+     J(n+1:end,2:4) = s*(dydR * shape3D')';
+
+     % with respect to translation
+     J(n+1:end,5) = 0;
+     J(n+1:end,6) = 1;
+
+end
+
+% This updates the parameters based on the updates from the RLMS
+function [non_rigid, rigid] = CalcReferenceUpdate(params_delta, current_non_rigid, current_global)
+
+
+    rigid = zeros(6, 1);
+    % Same goes for scaling and translation parameters
+    rigid(1) = current_global(1) + params_delta(1);
+    rigid(5) = current_global(5) + params_delta(5);
+    rigid(6) = current_global(6) + params_delta(6);
+    
+    % for rotation however, we want to make sure that the rotation matrix
+    % approximation we have 
+	% R' = [1, -wz, wy
+	%       wz, 1, -wx
+	%       -wy, wx, 1]	
+    % is a legal rotation matrix, and then we combine it with current
+    % rotation (through matrix multiplication) to acquire the new rotation
+    
+	R = Euler2Rot(current_global(2:4));
+
+    wx = params_delta(2);
+    wy = params_delta(3);
+    wz = params_delta(4);
+    
+    R_delta = [1, -wz, wy;
+               wz, 1, -wx;
+               -wy, wx, 1];
+	
+	% Make sure R_delta is orthonormal
+	R_delta = OrthonormaliseRotation(R_delta);
+	
+    % Combine rotations
+	R_final = R * R_delta;
+
+	% Extract euler angle
+	euler = Rot2Euler(R_final);	
+	
+	rigid(2:4) = euler;
+    
+    if(length(params_delta) > 6)
+        % non-rigid parameters can just be added together
+        non_rigid = params_delta(7:end) +  current_non_rigid;
+    else
+        non_rigid = current_non_rigid;
+    end
+    
+end
+
+function R_ortho = OrthonormaliseRotation(R)
+  
+    % U * V' is basically what we want, as it's guaranteed to be
+    % orthonormal
+    [U, ~, V] = svd(R);
+
+    % We also want to make sure no reflection happened
+    
+    % get the orthogonal matrix from the initial rotation matrix
+    X = U*V';
+
+    % This makes sure that the handedness is preserved and no reflection happened
+    % by making sure the determinant is 1 and not -1
+    W = eye(3);
+    W(3,3) = det(X);
+    R_ortho = U*W*V';
+end
--- a/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/iterate_piece_wise.m
+++ b/model_training/CCNF/patch_experts/data_preparation/scripts/PDM_helpers/iterate_piece_wise.m
@@ -0,0 +1,32 @@
+function [ pts_new ] = iterate_piece_wise( pts_orig, new_num_points )
+%ITERATE_PIECE_WISE Summary of this function goes here
+%   Detailed explanation goes here
+
+    % Reinterpolate the new points, but make sure they are still on the
+    % same lines
+    num_orig = size(pts_orig,1);
+
+    % Divide the number of original segments by number of new segments
+    step_size = (num_orig - 1) / (new_num_points-1);
+    
+    pts_new = zeros(new_num_points,2);
+    
+    % Clamp the beginning and end, as they will be the same
+    pts_new(1,:) = pts_orig(1,:);
+    pts_new(end,:) = pts_orig(end,:);
+    
+    for i=1:new_num_points-2
+       
+        low_point = floor(1 + i * step_size);
+        high_point = ceil(1 + i * step_size);
+        
+        coeff_1 = floor(1 + i * step_size) - i * step_size;
+        coeff_2 = 1 - coeff_1;
+
+        new_pt = coeff_1 * pts_orig(low_point,:) + coeff_2 * pts_orig(high_point,:);
+        pts_new(i+1,:) = new_pt;
+        
+    end
+    
+end
+
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`Data collection for train CE-CLM on Menpo data.`