Source code for biosppy.metrics

# -*- coding: utf-8 -*-
"""
biosppy.metrics
---------------

This module provides pairwise distance computation methods.

:copyright: (c) 2015-2018 by Instituto de Telecomunicacoes
:license: BSD 3-clause, see LICENSE for more details.
"""

# Imports
# compat
from __future__ import absolute_import, division, print_function
import six

# 3rd party
import numpy as np
import scipy.spatial.distance as ssd
from scipy import linalg


[docs]def pcosine(u, v): """Computes the Cosine distance (positive space) between 1-D arrays. The Cosine distance (positive space) between `u` and `v` is defined as .. math:: d(u, v) = 1 - abs \\left( \\frac{u \\cdot v}{||u||_2 ||v||_2} \\right) where :math:`u \\cdot v` is the dot product of :math:`u` and :math:`v`. Parameters ---------- u : array Input array. v : array Input array. Returns ------- cosine : float Cosine distance between `u` and `v`. """ # validate vectors like scipy does u = ssd._validate_vector(u) v = ssd._validate_vector(v) dist = 1. - np.abs(np.dot(u, v) / (linalg.norm(u) * linalg.norm(v))) return dist
[docs]def pdist(X, metric='euclidean', **kwargs): """Pairwise distances between observations in n-dimensional space. Wraps scipy.spatial.distance.pdist. Parameters ---------- X : array An m by n array of m original observations in an n-dimensional space. metric : str, function, optional The distance metric to use; the distance can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'pcosine', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'. Possible kwargs: p : float The p-norm to apply (for Minkowski, weighted and unweighted). w : array The weight vector (for weighted Minkowski). V : array The variance vector (for standardized Euclidean). VI : array The inverse of the covariance matrix (for Mahalanobis). Returns ------- Y : array Returns a condensed distance matrix Y. For each :math:`i` and :math:`j` (where :math:`i<j<n`), the metric ``dist(u=X[i], v=X[j])`` is computed and stored in entry ``ij``. """ if isinstance(metric, six.string_types): if metric == 'pcosine': metric = pcosine return ssd.pdist(X, metric, **kwargs)
[docs]def cdist(XA, XB, metric='euclidean', **kwargs): """Computes distance between each pair of the two collections of inputs. Wraps scipy.spatial.distance.cdist. Parameters ---------- XA : array An :math:`m_A` by :math:`n` array of :math:`m_A` original observations in an :math:`n`-dimensional space. XB : array An :math:`m_B` by :math:`n` array of :math:`m_B` original observations in an :math:`n`-dimensional space. metric : str, function, optional The distance metric to use; the distance can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'pcosine', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'. Possible kwargs: p : float The p-norm to apply (for Minkowski, weighted and unweighted). w : array The weight vector (for weighted Minkowski). V : array The variance vector (for standardized Euclidean). VI : array The inverse of the covariance matrix (for Mahalanobis). Returns ------- Y : array An :math:`m_A` by :math:`m_B` distance matrix is returned. For each :math:`i` and :math:`j`, the metric ``dist(u=XA[i], v=XB[j])`` is computed and stored in the :math:`ij` th entry. """ if isinstance(metric, six.string_types): if metric == 'pcosine': metric = pcosine return ssd.cdist(XA, XB, metric, **kwargs)
[docs]def squareform(X, force="no", checks=True): """Converts a vector-form distance vector to a square-form distance matrix, and vice-versa. Wraps scipy.spatial.distance.squareform. Parameters ---------- X : array Either a condensed or redundant distance matrix. force : str, optional As with MATLAB(TM), if force is equal to 'tovector' or 'tomatrix', the input will be treated as a distance matrix or distance vector respectively. checks : bool, optional If `checks` is set to False, no checks will be made for matrix symmetry nor zero diagonals. This is useful if it is known that ``X - X.T1`` is small and ``diag(X)`` is close to zero. These values are ignored any way so they do not disrupt the squareform transformation. Returns ------- Y : array If a condensed distance matrix is passed, a redundant one is returned, or if a redundant one is passed, a condensed distance matrix is returned. """ return ssd.squareform(X, force, checks)