# -*- coding: utf-8 -*-
"""
biosppy.features.cepstral
-------------------------
This module provides methods to extract cepstral features.
:copyright: (c) 2015-2023 by Instituto de Telecomunicacoes
:license: BSD 3-clause, see LICENSE for more details.
"""
# Imports
# 3rd party
import numpy as np
from scipy import fft
# local
from .. import utils
from . import time
from ..signals import tools as st
[docs]def cepstral(signal=None, sampling_rate=1000.):
"""Compute quefrency metrics describing the signal.
Parameters
----------
signal : array
Input signal.
sampling_rate : int, float, optional
Sampling frequency (Hz).
Returns
-------
feats : ReturnTuple object
Time features computed over the signal mel-frequency cepstral coefficients.
Notes
-----
Check biosppy.features.time for the list of time features.
"""
# check inputs
if signal is None:
raise TypeError("Please specify an input signal.")
# ensure numpy
signal = np.array(signal)
# initialize output
feats = utils.ReturnTuple((), ())
# compute mel coefficients
mel_coeff = mfcc(signal, sampling_rate)["mfcc"]
# time features
time_feats = time.time(signal=mel_coeff, sampling_rate=sampling_rate)
for arg, name in zip(time_feats, time_feats.keys()):
feats = feats.append(arg, 'mfcc_' + name)
return feats
[docs]def freq_to_mel(hertz):
"""Converts mel-frequencies to hertz frequencies [Kool12]_.
Parameters
----------
hertz : array
Hertz frequencies.
Returns
-------
mel frequencies : array
Mel frequencies.
References
----------
.. [Kool12] Shashidhar G. Koolagudi, Deepika Rastogi, K. Sreenivasa Rao, Identification of Language using
Mel-Frequency Cepstral Coefficients (MFCC), Procedia Engineering, Volume 38, 2012, Pages 3391-3398, ISSN 1877-7058
"""
return 1125 * np.log(1 + hertz / 700)
[docs]def mel_to_freq(mel):
"""Converts mel-frequencies to hertz frequencies.
Parameters
----------
mel : array
Mel frequencies.
Returns
-------
hertz frequencies : array
Hertz frequencies.
References
----------
.. [Kool12] Shashidhar G. Koolagudi, Deepika Rastogi, K. Sreenivasa Rao, Identification of Language using
Mel-Frequency Cepstral Coefficients (MFCC), Procedia Engineering, Volume 38, 2012, Pages 3391-3398, ISSN 1877-7058
"""
return 700 * (np.exp(mel / 1125) - 1)
[docs]def mfcc(signal=None, sampling_rate=1000., window_size=100, num_filters=10):
"""Computes the mel-frequency cepstral coefficients.
Parameters
----------
signal : array
Input signal.
sampling_rate : int, float, optional
Sampling frequency (Hz).
window_size : int
DFT window size.
num_filters : int
Number of filters.
Returns
-------
mfcc : array
Signal mel-frequency cepstral coefficients.
References
----------
.. [Haytham16] Fayek, Haytham. "Speech Processing for Machine Learning: Filter banks,
Mel-Frequency Cepstral Coefficients (MFCCs) and What's In-Between."Blog post. 2016.
https://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html
.. [Brihijoshi] 'Vanilla STFT and MFCC' by brihijoshi,
accessed in october 2022:https://github.com/brihijoshi/vanilla-stft-mfcc/
.. [Tsfel] 'Time Series Feature Extraction Library' by fraunhoferportugal,
accessed in october 2022: https://github.com/fraunhoferportugal/tsfel/
.. [Ilyamich] 'MFCC implementation and tutorial' by ilyamich, accessed in october 2022:
https://www.kaggle.com/code/ilyamich/mfcc-implementation-and-tutorial
"""
# check inputs
if signal is None:
raise TypeError("Please specify an input signal.")
# ensure numpy
signal = np.array(signal)
# compute power spectrum
freqs, power = st.power_spectrum(signal, sampling_rate=sampling_rate, decibel=False)
# filter bank
low_f = 0
high_f = freqs[-1]
# convert to mel
low_f_mel = freq_to_mel(low_f)
high_f_mel = freq_to_mel(high_f)
# linearly spaced array between the two MEL frequencies
lin_mel = np.linspace(low_f_mel, high_f_mel, num=num_filters+2)
# convert the array to the frequency space
lin_hz = np.array([mel_to_freq(d) for d in lin_mel])
# normalize the array to the FFT size and choose the associated FFT values
filter_bins_hz = np.floor((window_size + 1) / sampling_rate * lin_hz).astype(int)
# filter bank
filter_banks = []
# iterate bins
for b in range(len(filter_bins_hz)-2):
_f = [0]*(filter_bins_hz[b])
_f += np.linspace(0, 1, filter_bins_hz[b + 1] - filter_bins_hz[b]).tolist()
_f += np.linspace(1, 0, filter_bins_hz[b + 2] - filter_bins_hz[b + 1]).tolist()
pad = len(freqs) - filter_bins_hz[b + 2]
if pad > 0:
_f += [0]*pad
else:
_f = _f[:len(freqs)]
filter_banks += [np.array(_f)]
filter_banks = np.array(filter_banks)
enorm = 2.0 / (lin_hz[2:num_filters+2] - lin_hz[:num_filters])
filter_banks *= enorm[:, np.newaxis]
signal_power = np.abs(power)**2*(1/len(power))
filter_banks = np.dot(filter_banks, signal_power.T)
filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)
filter_banks = 20 * np.log10(filter_banks) # dB
mel_coeff = fft.dct(filter_banks)[1:] # Keep 2-13
mel_coeff -= (np.mean(mel_coeff, axis=0) + 1e-8) # norm
# sinusoidal liftering to the MFCCs to de-emphasize higher MFCCs
n = np.arange(len(mel_coeff))
cep_lifter = 22
lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter)
mel_coeff *= lift
# output
args = (mel_coeff,)
names = ("mfcc",)
return utils.ReturnTuple(args, names)