Source code for hypertools.tools.reduce

#!/usr/bin/env python

import warnings
from sklearn.decomposition import PCA, FastICA, IncrementalPCA, KernelPCA, FactorAnalysis, TruncatedSVD, SparsePCA, MiniBatchSparsePCA, DictionaryLearning, MiniBatchDictionaryLearning
from sklearn.manifold import TSNE, MDS, SpectralEmbedding, LocallyLinearEmbedding, Isomap
from umap import UMAP
from .._shared.helpers import *
from .normalize import normalize as normalizer
from .align import align as aligner
from .format_data import format_data as formatter

# dictionary of models
models = {
    'PCA': PCA,
    'IncrementalPCA': IncrementalPCA,
    'SparsePCA': SparsePCA,
    'MiniBatchSparsePCA': MiniBatchSparsePCA,
    'KernelPCA': KernelPCA,
    'FastICA': FastICA,
    'FactorAnalysis': FactorAnalysis,
    'TruncatedSVD': TruncatedSVD,
    'DictionaryLearning': DictionaryLearning,
    'MiniBatchDictionaryLearning': MiniBatchDictionaryLearning,
    'TSNE': TSNE,
    'Isomap': Isomap,
    'SpectralEmbedding': SpectralEmbedding,
    'LocallyLinearEmbedding': LocallyLinearEmbedding,
    'MDS': MDS,
    'UMAP': UMAP
}

# main function
[docs]@memoize
def reduce(x, reduce='IncrementalPCA', ndims=None, normalize=None, align=None,
           model=None, model_params=None, internal=False, format_data=True):
    """
    Reduces dimensionality of an array, or list of arrays

    Parameters
    ----------
    x : Numpy array or list of arrays
        Dimensionality reduction using PCA is performed on this array.

    reduce : str or dict
        Decomposition/manifold learning model to use.  Models supported: PCA,
        IncrementalPCA, SparsePCA, MiniBatchSparsePCA, KernelPCA, FastICA,
        FactorAnalysis, TruncatedSVD, DictionaryLearning, MiniBatchDictionaryLearning,
        TSNE, Isomap, SpectralEmbedding, LocallyLinearEmbedding, MDS and UMAP.
        Can be passed as a string, but for finer control of the model
        parameters, pass as a dictionary, e.g. reduce={'model' : 'PCA',
        'params' : {'whiten' : True}}. See scikit-learn specific model docs
        for details on parameters supported for each model.

    ndims : int
        Number of dimensions to reduce

    format_data : bool
        Whether or not to first call the format_data function (default: True).

    model : None
        Deprecated argument.  Please use reduce.

    model_params : None
        Deprecated argument.  Please use reduce.

    align : None
        Deprecated argument.  Please use new analyze function to perform
        combinations of transformations

    normalize : None
        Deprecated argument.  Please use new analyze function to perform
        combinations of transformations

    Returns
    ----------
    x_reduced : Numpy array or list of arrays
        The reduced data with ndims dimensionality is returned.  If the input
        is a list, a list is returned.

    """

    # deprecation warning
    if (model is not None) or (model_params is not None):
        warnings.warn('Model and model params will be deprecated.  Please use the \
                      reduce keyword.  See API docs for more info: http://hypertools.readthedocs.io/en/latest/hypertools.tools.reduce.html#hypertools.tools.reduce')
        reduce = {
            'model': model,
            'params': model_params
        }

    # if model is None, just return data
    if reduce is None:
        return x

    elif isinstance(reduce, (str, np.string_)):
        model_name = reduce
        model_params = {
            'n_components': ndims
        }

    elif isinstance(reduce, dict):
        try:
            model_name = reduce['model']
            model_params = reduce['params']
        except KeyError:
            raise ValueError('If passing a dictionary, pass the model as the value of the "model" key and a \
            dictionary of custom params as the value of the "params" key.')

    else:
        # handle other possibilities below
        model_name = reduce

    try:
        # if the model passed is a string, make sure it's one of the supported options
        if isinstance(model_name, (str, np.string_)):
            model = models[model_name]
        # otherwise check any custom object for necessary methods
        else:
            model = model_name
            getattr(model, 'fit_transform')
            getattr(model, 'n_components')
    except (KeyError, AttributeError):
        raise ValueError('reduce must be one of the supported options or support n_components and fit_transform \
         methods. See http://hypertools.readthedocs.io/en/latest/hypertools.tools.reduce.html#hypertools.tools.reduce \
         for supported models')

    # check for multiple values from n_components & ndims args
    if 'n_components' in model_params:
        if (ndims is None) or (ndims == model_params['n_components']):
            pass
        else:
            warnings.warn('Unequal values passed to dims and n_components. Using ndims parameter.')
            model_params['n_components'] = ndims
    else:
        model_params['n_components'] = ndims

    # convert to common format
    if format_data:
        x = formatter(x, ppca=True)

    # if ndims/n_components is not passed or all data is < ndims-dimensional, just return it
    if model_params['n_components'] is None or all([i.shape[1] <= model_params['n_components'] for i in x]):
        return x

    stacked_x = np.vstack(x)
    if stacked_x.shape[0] == 1:
        warnings.warn('Cannot reduce the dimensionality of a single row of'
                      ' data. Return zeros length of ndims')
        return [np.zeros((1, model_params['n_components']))]


    elif stacked_x.shape[0] < model_params['n_components']:
            warnings.warn('The number of rows in your data is less than ndims.'
                          ' The data will be reduced to the number of rows.')

    # deprecation warnings
    if normalize is not None:
        warnings.warn('The normalize argument will be deprecated for this function.  Please use the \
                      analyze function to perform combinations of these transformations.  See API docs for more info: http://hypertools.readthedocs.io/en/latest/hypertools.analyze.html#hypertools.analyze')
        x = normalizer(x, normalize=normalize)

    if align is not None:
        warnings.warn('The align argument will be deprecated for this function.  Please use the \
                      analyze function to perform combinations of these transformations.  See API docs for more info: http://hypertools.readthedocs.io/en/latest/hypertools.analyze.html#hypertools.analyze')
        x = aligner(x, align=align)

    # initialize model
    model = model(**model_params)

    # reduce data
    x_reduced = reduce_list(x, model)

    # return data
    if internal or len(x_reduced) > 1:
        return x_reduced
    else:
        return x_reduced[0]


# sub functions
def reduce_list(x, model):
    split = np.cumsum([len(xi) for xi in x])[:-1]
    x_r = np.vsplit(model.fit_transform(np.vstack(x)), split)
    if len(x) > 1:
        return [xi for xi in x_r]
    else:
        return [x_r[0]]