# -*- coding: utf-8 -*-
"""
biosppy.metrics
---------------
This module provides pairwise distance computation methods.
:copyright: (c) 2015-2018 by Instituto de Telecomunicacoes
:license: BSD 3-clause, see LICENSE for more details.
"""
# Imports
# compat
from __future__ import absolute_import, division, print_function
import six
# 3rd party
import numpy as np
import scipy.spatial.distance as ssd
from scipy import linalg
[docs]def pcosine(u, v):
"""Computes the Cosine distance (positive space) between 1-D arrays.
The Cosine distance (positive space) between `u` and `v` is defined as
.. math::
d(u, v) = 1 - abs \\left( \\frac{u \\cdot v}{||u||_2 ||v||_2} \\right)
where :math:`u \\cdot v` is the dot product of :math:`u` and :math:`v`.
Parameters
----------
u : array
Input array.
v : array
Input array.
Returns
-------
cosine : float
Cosine distance between `u` and `v`.
"""
# validate vectors like scipy does
u = ssd._validate_vector(u)
v = ssd._validate_vector(v)
dist = 1. - np.abs(np.dot(u, v) / (linalg.norm(u) * linalg.norm(v)))
return dist
[docs]def pdist(X, metric='euclidean', **kwargs):
"""Pairwise distances between observations in n-dimensional space.
Wraps scipy.spatial.distance.pdist.
Parameters
----------
X : array
An m by n array of m original observations in an n-dimensional space.
metric : str, function, optional
The distance metric to use; the distance can be 'braycurtis',
'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice',
'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',
'matching', 'minkowski', 'pcosine', 'rogerstanimoto', 'russellrao',
'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
Possible kwargs:
p : float
The p-norm to apply (for Minkowski, weighted and unweighted).
w : array
The weight vector (for weighted Minkowski).
V : array
The variance vector (for standardized Euclidean).
VI : array
The inverse of the covariance matrix (for Mahalanobis).
Returns
-------
Y : array
Returns a condensed distance matrix Y. For each :math:`i` and
:math:`j` (where :math:`i<j<n`), the metric ``dist(u=X[i], v=X[j])``
is computed and stored in entry ``ij``.
"""
if isinstance(metric, six.string_types):
if metric == 'pcosine':
metric = pcosine
return ssd.pdist(X, metric, **kwargs)
[docs]def cdist(XA, XB, metric='euclidean', **kwargs):
"""Computes distance between each pair of the two collections of inputs.
Wraps scipy.spatial.distance.cdist.
Parameters
----------
XA : array
An :math:`m_A` by :math:`n` array of :math:`m_A` original observations
in an :math:`n`-dimensional space.
XB : array
An :math:`m_B` by :math:`n` array of :math:`m_B` original observations
in an :math:`n`-dimensional space.
metric : str, function, optional
The distance metric to use; the distance can be 'braycurtis',
'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice',
'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis',
'matching', 'minkowski', 'pcosine', 'rogerstanimoto', 'russellrao',
'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'yule'.
Possible kwargs:
p : float
The p-norm to apply (for Minkowski, weighted and unweighted).
w : array
The weight vector (for weighted Minkowski).
V : array
The variance vector (for standardized Euclidean).
VI : array
The inverse of the covariance matrix (for Mahalanobis).
Returns
-------
Y : array
An :math:`m_A` by :math:`m_B` distance matrix is returned. For each
:math:`i` and :math:`j`, the metric ``dist(u=XA[i], v=XB[j])``
is computed and stored in the :math:`ij` th entry.
"""
if isinstance(metric, six.string_types):
if metric == 'pcosine':
metric = pcosine
return ssd.cdist(XA, XB, metric, **kwargs)