Source code for biosppy.features.cepstral

# -*- coding: utf-8 -*-
"""
biosppy.features.cepstral
-------------------------

This module provides methods to extract cepstral features.

:copyright: (c) 2015-2023 by Instituto de Telecomunicacoes
:license: BSD 3-clause, see LICENSE for more details.
"""

# Imports
# 3rd party
import numpy as np
from scipy import fft

# local
from .. import utils
from . import time
from ..signals import tools as st


[docs]def cepstral(signal=None, sampling_rate=1000.): """Compute quefrency metrics describing the signal. Parameters ---------- signal : array Input signal. sampling_rate : int, float, optional Sampling frequency (Hz). Returns ------- feats : ReturnTuple object Time features computed over the signal mel-frequency cepstral coefficients. Notes ----- Check biosppy.features.time for the list of time features. """ # check inputs if signal is None: raise TypeError("Please specify an input signal.") # ensure numpy signal = np.array(signal) # initialize output feats = utils.ReturnTuple((), ()) # compute mel coefficients mel_coeff = mfcc(signal, sampling_rate)["mfcc"] # time features time_feats = time.time(signal=mel_coeff, sampling_rate=sampling_rate) for arg, name in zip(time_feats, time_feats.keys()): feats = feats.append(arg, 'mfcc_' + name) return feats
[docs]def freq_to_mel(hertz): """Converts mel-frequencies to hertz frequencies [Kool12]_. Parameters ---------- hertz : array Hertz frequencies. Returns ------- mel frequencies : array Mel frequencies. References ---------- .. [Kool12] Shashidhar G. Koolagudi, Deepika Rastogi, K. Sreenivasa Rao, Identification of Language using Mel-Frequency Cepstral Coefficients (MFCC), Procedia Engineering, Volume 38, 2012, Pages 3391-3398, ISSN 1877-7058 """ return 1125 * np.log(1 + hertz / 700)
[docs]def mel_to_freq(mel): """Converts mel-frequencies to hertz frequencies. Parameters ---------- mel : array Mel frequencies. Returns ------- hertz frequencies : array Hertz frequencies. References ---------- .. [Kool12] Shashidhar G. Koolagudi, Deepika Rastogi, K. Sreenivasa Rao, Identification of Language using Mel-Frequency Cepstral Coefficients (MFCC), Procedia Engineering, Volume 38, 2012, Pages 3391-3398, ISSN 1877-7058 """ return 700 * (np.exp(mel / 1125) - 1)
[docs]def mfcc(signal=None, sampling_rate=1000., window_size=100, num_filters=10): """Computes the mel-frequency cepstral coefficients. Parameters ---------- signal : array Input signal. sampling_rate : int, float, optional Sampling frequency (Hz). window_size : int DFT window size. num_filters : int Number of filters. Returns ------- mfcc : array Signal mel-frequency cepstral coefficients. References ---------- .. [Haytham16] Fayek, Haytham. "Speech Processing for Machine Learning: Filter banks, Mel-Frequency Cepstral Coefficients (MFCCs) and What's In-Between."Blog post. 2016. https://haythamfayek.com/2016/04/21/speech-processing-for-machine-learning.html .. [Brihijoshi] 'Vanilla STFT and MFCC' by brihijoshi, accessed in october 2022:https://github.com/brihijoshi/vanilla-stft-mfcc/ .. [Tsfel] 'Time Series Feature Extraction Library' by fraunhoferportugal, accessed in october 2022: https://github.com/fraunhoferportugal/tsfel/ .. [Ilyamich] 'MFCC implementation and tutorial' by ilyamich, accessed in october 2022: https://www.kaggle.com/code/ilyamich/mfcc-implementation-and-tutorial """ # check inputs if signal is None: raise TypeError("Please specify an input signal.") # ensure numpy signal = np.array(signal) # compute power spectrum freqs, power = st.power_spectrum(signal, sampling_rate=sampling_rate, decibel=False) # filter bank low_f = 0 high_f = freqs[-1] # convert to mel low_f_mel = freq_to_mel(low_f) high_f_mel = freq_to_mel(high_f) # linearly spaced array between the two MEL frequencies lin_mel = np.linspace(low_f_mel, high_f_mel, num=num_filters+2) # convert the array to the frequency space lin_hz = np.array([mel_to_freq(d) for d in lin_mel]) # normalize the array to the FFT size and choose the associated FFT values filter_bins_hz = np.floor((window_size + 1) / sampling_rate * lin_hz).astype(int) # filter bank filter_banks = [] # iterate bins for b in range(len(filter_bins_hz)-2): _f = [0]*(filter_bins_hz[b]) _f += np.linspace(0, 1, filter_bins_hz[b + 1] - filter_bins_hz[b]).tolist() _f += np.linspace(1, 0, filter_bins_hz[b + 2] - filter_bins_hz[b + 1]).tolist() pad = len(freqs) - filter_bins_hz[b + 2] if pad > 0: _f += [0]*pad else: _f = _f[:len(freqs)] filter_banks += [np.array(_f)] filter_banks = np.array(filter_banks) enorm = 2.0 / (lin_hz[2:num_filters+2] - lin_hz[:num_filters]) filter_banks *= enorm[:, np.newaxis] signal_power = np.abs(power)**2*(1/len(power)) filter_banks = np.dot(filter_banks, signal_power.T) filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks) filter_banks = 20 * np.log10(filter_banks) # dB mel_coeff = fft.dct(filter_banks)[1:] # Keep 2-13 mel_coeff -= (np.mean(mel_coeff, axis=0) + 1e-8) # norm # sinusoidal liftering to the MFCCs to de-emphasize higher MFCCs n = np.arange(len(mel_coeff)) cep_lifter = 22 lift = 1 + (cep_lifter / 2) * np.sin(np.pi * n / cep_lifter) mel_coeff *= lift # output args = (mel_coeff,) names = ("mfcc",) return utils.ReturnTuple(args, names)