Source code for hypertools.tools.procrustes

#!/usr/bin/env python
from __future__ import division
from builtins import zip
from builtins import range

import numpy as np

from .format_data import format_data as formatter


[docs]def procrustes(source, target, scaling=True, reflection=True, reduction=False,
               oblique=False, oblique_rcond=-1, format_data=True):
    """
    Function to project from one space to another using Procrustean
    transformation (shift + scaling + rotation + reflection).

    The implementation of this function was based on the ProcrusteanMapper in
    pyMVPA: https://github.com/PyMVPA/PyMVPA

    See also: http://en.wikipedia.org/wiki/Procrustes_transformation

    Parameters
    ----------
    source : Numpy array
        Array to be aligned to target's coordinate system.

    target: Numpy array
        Source is aligned to this target space

    scaling : bool
        Estimate a global scaling factor for the transformation
        (no longer rigid body)

    reflection : bool
        Allow for the data to be reflected (so it might not be
        a rotation. Effective only for non-oblique transformations.

    reduction : bool
        If true, it is allowed to map into lower-dimensional
        space. Forward transformation might be suboptimal then and
        reverse transformation might not recover all original
        variance.

    oblique : bool
        Either to allow non-orthogonal transformation -- might
        heavily overfit the data if there is less samples than
        dimensions. Use `oblique_rcond`.

    oblique_rcond : float
        Cutoff for 'small' singular values to regularize the
        inverse. See :class:`~numpy.linalg.lstsq` for more
        information.

    Returns
    ----------
    aligned_source : Numpy array
        The array source is aligned to target and returned

    """

    def fit(source, target):

        datas = (source, target)
        sn, sm = source.shape
        tn, tm = target.shape

        # Check the sizes
        if sn != tn:
            raise ValueError("Data for both spaces should have the same " \
                  "number of samples. Got %d in template and %d in target space" \
                  % (sn, tn))

        # Sums of squares
        ssqs = [np.sum(d**2, axis=0) for d in datas]

        # XXX check for being invariant?
        #     needs to be tuned up properly and not raise but handle
        for i in range(2):
            if np.all(ssqs[i] <= np.abs((np.finfo(datas[i].dtype).eps
                                       * sn )**2)):
                raise ValueError("For now do not handle invariant in time datasets")

        norms = [ np.sqrt(np.sum(ssq)) for ssq in ssqs ]
        normed = [ data/norm for (data, norm) in zip(datas, norms) ]

        # add new blank dimensions to template space if needed
        if sm < tm:
            normed[0] = np.hstack( (normed[0], np.zeros((sn, tm-sm))) )

        if sm > tm:
            if reduction:
                normed[1] = np.hstack( (normed[1], np.zeros((sn, sm-tm))) )
            else:
                raise ValueError("reduction=False, so mapping from " \
                      "higher dimensionality " \
                      "template space is not supported. template space had %d " \
                      "while target %d dimensions (features)" % (sm, tm))

        source, target = normed
        if oblique:
            # Just do silly linear system of equations ;) or naive
            # inverse problem
            if sn == sm and tm == 1:
                T = np.linalg.solve(source, target)
            else:
                T = np.linalg.lstsq(source, target, rcond=oblique_rcond)[0]
            ss = 1.0
        else:
            # Orthogonal transformation
            # figure out optimal rotation
            U, s, Vh = np.linalg.svd(np.dot(target.T, source),
                                     full_matrices=False)
            T = np.dot(Vh.T, U.T)

            if not reflection:
                # then we need to assure that it is only rotation
                # "recipe" from
                # http://en.wikipedia.org/wiki/Orthogonal_Procrustes_problem
                # for more and info and original references, see
                # http://dx.doi.org/10.1007%2FBF02289451
                nsv = len(s)
                s[:-1] = 1
                s[-1] = np.linalg.det(T)
                T = np.dot(U[:, :nsv] * s, Vh)

            # figure out scale and final translation
            # XXX with reflection False -- not sure if here or there or anywhere...
            ss = sum(s)

        # if we were to collect standardized distance
        # std_d = 1 - sD**2

        # select out only relevant dimensions
        if sm != tm:
            T = T[:sm, :tm]

        # Assign projection
        if scaling:
            scale = ss * norms[1] / norms[0]
            proj = scale * T
        else:
            proj = T
        return proj

    def transform(data, proj):
        if proj is None:
            raise RuntimeError("Mapper needs to be trained before use.")

        d = np.asmatrix(data)

        # Do projection
        res = (d * proj).A

        return res

    if format_data:
        source, target = formatter([source, target])

    # fit and transform
    proj = fit(source, target)
    return transform(source, proj)