surrogate.m
surrogate.m

% SURROGATE
% Generates random data resembling the input data.
%
% surrogateSet = surrogate(inputSet)
%
% where
%
% INPUTSET is a real (d x n)-matrix, containing n samples of
% a d-dimensional signal.
%
% Return values
% -------------
%
% SURROGATESET is a real (d x n)-matrix, containing data that resembles
% the INPUTSET, where the sense of resemblance depends on the chosen 
% algorithm. 
%
% Optional arguments
% ------------------
%
% ALGORITHM ('algorithm') is a string which specifies the algorithm to
% use for generating the surrogate data set. Must be one of
%
% ### preserve_correlations
%
% Preserves the auto-correlations in a row and the 
% cross-correlations between the rows. If INPUTSET is
% a wide-sense stationary stochastic process, then so is
% SURROGATESET. If INPUTSET is a gaussian process, then so
% is SURROGATESET. This algorithm is described in
% "Generating Surrogate Data for Time series with Several 
% Simultaneously Measured Variables", Dean Prichard, James Theiler, 
% Physical Review Letters, Volume 73, Number 7, 1994. 
%
% ### preserve_distribution_and_correlations
%
% This algorithm is described for the 1-dimensional case in
% "Improved Surrogate Data for Nonlinearity Tests",
% Thomas Schreiber, Andreas Schmitz, Physical Review Letters,
% Volume 77, Number 4, 1996.
%
% ### preserve_dynamics
%
% Preserves the underlying dynamical system. The surrogate
% data is generated according to the examples set by the
% input set. It is assumed that the input data already resides
% in a state space which represents the dynamical system well
% (i.e. has been delay-embedded properly).
%
% Optional arguments for correlation-preserving algorithms
% --------------------------------------------------------
%
% FREQUENCY_RANGE ('frequencyRange') is a pair [a, b] of real numbers, 
% which denotes the interval [a, b] of normalized frequencies whose 
% phases to randomize. The normalized frequency 1 corresponds to half 
% the sampling rate. It must hold that 0 <= a <= b <= 1. This allows
% for truncated randomization, as described in xxx.
% Default: [0, 1]
%
% Optional arguments for dynamics-preserving algorithms
% -----------------------------------------------------
%
% K ('k') is a positive integer which specifies the
% number of nearest neighbors to use for prediction.

function surrogateSet = surrogate(inputSet, varargin)

import([tim_package, '.*']);

concept_check(nargin, 'inputs', 1);
concept_check(nargout, 'outputs', 0 : 1);

% Optional input arguments
algorithm = 'preserve_correlations';
frequencyRange = [0, 1];
k = 1;
eval(process_options({...
    'algorithm', ...
    'frequencyRange', ...
    'k'}, varargin));

[d, n] = size(inputSet);
minFrequency = frequencyRange(1);
maxFrequency = frequencyRange(2);
kNearest = k;

if strcmp(algorithm, 'preserve_dynamics')
    % Create a kd-tree from the points in the input set.
    kdTree = pastelmatlab.PointKdTree(d);
    idSet = kdTree.insert(inputSet);
    kdTree.refine();

    % Hide the last point in the time-series.
    % This is because it can not be used for prediction.
    kdTree.hide(idSet(end));

    % Pick a neighborhood of k points from the trajectory.
    startSet = kdTree.search_nearest(...
        kdTree.as_points(randi([1, n])), ...
        'maxDistanceSet', Inf, ...
        'kNearest', kNearest);

    % Choose the starting point as a random convex combination
    % of the points in the neighborhood.
    factorSet = rand(kNearest, 1);
    startPoint = kdTree.as_points(startSet) * (factorSet / sum(factorSet));

    % Generate surrogate data using non-linear prediction.
    currentPoint = startPoint;
    for i = 1 : n
        surrogateSet(:, i) = currentPoint;

        % Pick a neighborhood of k points from the trajectory
        % around the current point.
        neighborSet = kdTree.search_nearest(...
            currentPoint, ...
            'maxDistanceSet', Inf, ...
            'kNearest', kNearest);

        % Evolve the neighborhood in time. This is always
        % defined since we hided the last point in time.
        neighborSet = neighborSet + 1;

        % Choose the predicted point as a convex combination
        % of the predicted neighborhood.
        %factorSet = rand(kNearest, 1);
        factorSet = ones(kNearest, 1);
        currentPoint = kdTree.as_points(neighborSet) * ...
            (factorSet / sum(factorSet));
    end

    clear kdTree;
end

if strcmp(algorithm, 'preserve_correlations')
    % Let
    % 
    %   X : [1, n] subset ZZ --> RR^d 
    %
    % be a stochastic process, such that
    %
    %   X(t) = [X_1(t), ... X_d(t)].
    %
    % Compute the discrete Fourier transform of X_i as 
    %
    %   F_i : [1, n] subset ZZ --> CC.
    %
    % Let R : [1, n] subset ZZ --> CC be a function such that
    % R(f) is a random unit complex number for all f in [1, n],
    % and R(mod(-f + 1, n) + 1) = -R(f). The surrogate data 
    % Y_i : [1, n] subset ZZ --> RR is then given by
    %
    %   Y_i = T^{-1}[abs(F_i) * R],
    %
    % where T^{-1} is the inverse discrete Fourier transform.

    nHalf = floor((n - 1) / 2);
    minIndex = 1 + round((nHalf - 1) * minFrequency);
    maxIndex = 1 + round((nHalf - 1) * maxFrequency);

    % Only the phases for the requested frequency-range
    % will be randomized.
    halfAngleSet = [...
        zeros(1, minIndex - 1), ...
        rand(1, maxIndex - minIndex + 1) * 2 * pi, ...
        zeros(1, nHalf - maxIndex)];

    % It can be proved that the result of the inverse discrete 
    % Fourier transform is real if and only if
    %
    %   |F(N - k)| = |F(k)|, and
    %   arg(F(N - k)) = -arg(F(k)), 
    %
    % for all k in [1, N - 1]. Since the discrete Fourier transform
    % is invertible, also the discrete Fourier transform of a 
    % real sequence has these properties.
    %
    % Since the surrogate data was requested to be real,
    % the phase-randomization must be done under this 
    % constraint. The value of arg(F(0)) does not matter;
    % it is the phase of the mean.

    % Enforce the real-condition on the angle-shifts.
    if mod(n, 2) == 1
        angleSet = [0, halfAngleSet, -fliplr(halfAngleSet)];
    else
        angleSet = [0, halfAngleSet, 0, -fliplr(halfAngleSet)];
    end

    surrogateSet = zeros(d, n);
    for i = 1 : d
        % The k:th component of the discrete Fourier transform of
        % inputSet(i, :) is given alpha_k addition in angle. This 
        % addition is independent of i, which guarantees that the 
        % cross-correlations are preserved.
        randomizedFftSet = fft(inputSet(i, :)) .* exp(j * angleSet);
        surrogateSet(i, :) = real(ifft(randomizedFftSet));
    end
end