Source code for pycochleagram.subband

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import warnings
import numpy as np

from pycochleagram import utils


[docs]def reshape_signal_canonical(signal):
  """Convert the signal into a canonical shape for use with cochleagram.py
  functions.

  This first verifies that the signal contains only one data channel, which can
  be in a row, a column, or a flat array. Then it flattens the signal array.

  Args:
    signal (array): The sound signal (waveform) in the time domain. Should be
      either a flattened array with shape (n_samples,), a row vector with shape
      (1, n_samples), or a column vector with shape (n_samples, 1).

  Returns:
    array:
    **out_signal**: If the input `signal` has a valid shape, returns a
      flattened version of the signal.

  Raises:
    ValueError: Raises an error of the input `signal` has invalid shape.
  """
  if signal.ndim == 1:  # signal is a flattened array
    out_signal = signal
  elif signal.ndim == 2:  # signal is a row or column vector
    if signal.shape[0] == 1:
      out_signal = signal.flatten()
    elif signal.shape[1] == 1:
      out_signal = signal.flatten()
    else:
      raise ValueError('signal must be a row or column vector; found shape: %s' % signal.shape)
  else:
    raise ValueError('signal must be a row or column vector; found shape: %s' % signal.shape)
  return out_signal


[docs]def reshape_signal_batch(signal):
  """Convert the signal into a standard batch shape for use with cochleagram.py
  functions. The first dimension is the batch dimension.

  Args:
    signal (array): The sound signal (waveform) in the time domain. Should be
      either a flattened array with shape (n_samples,), a row vector with shape
      (1, n_samples), a column vector with shape (n_samples, 1), or a 2D
      matrix of the form [batch, waveform].

  Returns:
    array:
    **out_signal**: If the input `signal` has a valid shape, returns a
      2D version of the signal with the first dimension as the batch
      dimension.

  Raises:
    ValueError: Raises an error of the input `signal` has invalid shape.
  """
  if signal.ndim == 1:  # signal is a flattened array
    out_signal = signal.reshape((1, -1))
  elif signal.ndim == 2:  # signal is a row or column vector
    if signal.shape[0] == 1:
      out_signal = signal
    elif signal.shape[1] == 1:
      out_signal = signal.reshape((1, -1))
    else:  # first dim is batch dim
      out_signal = signal
  else:
    raise ValueError('signal should be flat array, row or column vector, or a 2D matrix with dimensions [batch, waveform]; found %s' % signal.ndim)
  return out_signal


[docs]def generate_subband_envelopes_fast(signal, filters, padding_size=None, fft_mode='auto', debug_ret_all=False):
  """Generate the subband envelopes (i.e., the cochleagram) of the signal by
  applying the provided filters.

  This method returns *only* the envelopes of the subband decomposition.
  The signal can be optionally zero-padded before the decomposition. The
  resulting envelopes can be optionally downsampled and then modified with a
  nonlinearity.

  This function expedites the calculation of the subbands envelopes by:
    1) using the rfft rather than standard fft to compute the dft for
       real-valued signals
    2) hand-computing the Hilbert transform, to avoid unnecessary calls
       to fft/ifft.

  See utils.rfft, utils.irfft, and utils.fhilbert for more details on the
  methods used for speed-up.

  Args:
    signal (array): The sound signal (waveform) in the time domain. Should be
      flattened, i.e., the shape is (n_samples,).
    filters (array): The filterbank, in frequency space, used to generate the
      cochleagram. This should be the full filter-set output of
      erbFilter.make_erb_cos_filters_nx, or similar.
    padding_size (int, optional): Factor that determines if the signal will be
      zero-padded before generating the subbands. If this is None,
      or less than 1, no zero-padding will be used. Otherwise, zeros are added
      to the end of the input signal until is it of length
      `padding_size * length(signal)`. This padded region will be removed after
      performing the subband decomposition.
    fft_mode ({'auto', 'fftw', 'np'}, optional): Determine what implementation
      to use for FFT-like operations. 'auto' will attempt to use pyfftw, but
      will fallback to numpy, if necessary.

  Returns:
    array:
    **subband_envelopes**: The subband envelopes (i.e., cochleagram) resulting from
      the subband decomposition. This should have the same shape as `filters`.
  """
  # convert the signal to a canonical representation
  signal_flat = reshape_signal_canonical(signal)

  if padding_size is not None and padding_size > 1:
    signal_flat, padding = pad_signal(signal_flat, padding_size)

  if np.isrealobj(signal_flat):  # attempt to speed up computation with rfft
    fft_sample = utils.rfft(signal_flat, mode=fft_mode)
    nr = fft_sample.shape[0]
    # prep for hilbert transform by extending to negative freqs
    subbands = np.zeros(filters.shape, dtype=complex)
    subbands[:, :nr] = _real_freq_filter(fft_sample, filters)
  else:
    fft_sample = utils.fft(signal_flat, mode=fft_mode)
    subbands = filters * fft_sample

  analytic_subbands = utils.fhilbert(subbands, mode=fft_mode)
  subband_envelopes = np.abs(analytic_subbands)

  if padding_size is not None and padding_size > 1:
    analytic_subbands = analytic_subbands[:, :signal_flat.shape[0] - padding]  # i dont know if this is correct
    subband_envelopes = subband_envelopes[:, :signal_flat.shape[0] - padding]  # i dont know if this is correct

  if debug_ret_all is True:
    out_dict = {}
    # add all local variables to out_dict
    for k in dir():
      if k != 'out_dict':
        out_dict[k] = locals()[k]
    return out_dict
  else:
    return subband_envelopes


[docs]def generate_subbands(signal, filters, padding_size=None, fft_mode='auto', debug_ret_all=False):
  """Generate the subband decomposition of the signal by applying the provided
  filters.

  The input filters are applied to the signal to perform subband decomposition.
  The signal can be optionally zero-padded before the decomposition.

  Args:
    signal (array): The sound signal (waveform) in the time domain.
    filters (array): The filterbank, in frequency space, used to generate the
      cochleagram. This should be the full filter-set output of
      erbFilter.make_erb_cos_filters_nx, or similar.
    padding_size (int, optional): Factor that determines if the signal will be
      zero-padded before generating the subbands. If this is None,
      or less than 1, no zero-padding will be used. Otherwise, zeros are added
      to the end of the input signal until is it of length
      `padding_size * length(signal)`. This padded region will be removed after
      performing the subband decomposition.
    fft_mode ({'auto', 'fftw', 'np'}, optional): Determine what implementation
      to use for FFT-like operations. 'auto' will attempt to use pyfftw, but
      will fallback to numpy, if necessary.

  Returns:
    array:
    **subbands**: The subbands resulting from the subband decomposition. This
      should have the same shape as `filters`.
  """
  # note: numpy defaults to row vecs
  # if padding_size is not None and padding_size >= 1:
  #   padding = signal.shape[0] * padding_size - signal.shape[0]
  #   print('padding ', padding)
  #   signal = np.concatenate((signal, np.zeros(padding)))

  # convert the signal to a canonical representation
  signal_flat = reshape_signal_canonical(signal)

  if padding_size is not None and padding_size > 1:
    signal_flat, padding = pad_signal(signal_flat, padding_size)

  is_signal_even = signal_flat.shape[0] % 2 == 0
  if np.isrealobj(signal_flat) and is_signal_even:  # attempt to speed up computation with rfft
    if signal_flat.shape[0] % 2 == 0:
      fft_sample = utils.rfft(signal_flat, mode=fft_mode)
      subbands = _real_freq_filter(fft_sample, filters)
      subbands = utils.irfft(subbands, mode=fft_mode)  # operates row-wise
    else:
      warnings.warn('Consider using even-length signal for a rfft speedup', RuntimeWarning, stacklevel=2)
      fft_sample = utils.fft(signal_flat, mode=fft_mode)
      subbands = filters * fft_sample
      subbands = np.real(utils.ifft(subbands, mode=fft_mode))  # operates row-wise
  else:
    fft_sample = utils.fft(signal_flat, mode=fft_mode)
    subbands = filters * fft_sample
    subbands = np.real(utils.ifft(subbands, mode=fft_mode))  # operates row-wise

  if padding_size is not None and padding_size > 1:
    subbands = subbands[:, :signal_flat.shape[0] - padding]  # i dont know if this is correct

  if debug_ret_all is True:
    out_dict = {}
    # add all local variables to out_dict
    for k in dir():
      if k != 'out_dict':
        out_dict[k] = locals()[k]
    return out_dict
  else:
    return subbands


[docs]def generate_analytic_subbands(signal, filters, padding_size=None, fft_mode='auto'):
  """Generate the analytic subbands (i.e., hilbert transform) of the signal by
    applying the provided filters.

    The input filters are applied to the signal to perform subband decomposition.
    The signal can be optionally zero-padded before the decomposition. For full
    cochleagram generation, see generate_subband_envelopes.

  Args:
    signal (array): The sound signal (waveform) in the time domain.
    filters (array): The filterbank, in frequency space, used to generate the
      cochleagram. This should be the full filter-set output of
      erbFilter.make_erb_cos_filters_nx, or similar.
    padding_size (int, optional): Factor that determines if the signal will be zero-padded
      before generating the subbands. If this is None, or less than 1, no
      zero-padding will be used. Otherwise, zeros are added to the end of the
      input signal until is it of length `padding_size * length(signal)`. This
      padded region will be removed after performing the subband
      decomposition.
    fft_mode ({'auto', 'fftw', 'np'}, optional): Determine what implementation
      to use for FFT-like operations. 'auto' will attempt to use pyfftw, but
      will fallback to numpy, if necessary.
      TODO: fix zero-padding

  Returns:
    array:
    **analytic_subbands**: The analytic subbands (i.e., hilbert transform) resulting
      of the subband decomposition. This should have the same shape as
      `filters`.
  """
  signal_flat = reshape_signal_canonical(signal)

  if padding_size is not None and padding_size > 1:
    signal_flat, padding = pad_signal(signal_flat, padding_size)

  fft_sample = utils.fft(signal_flat, mode=fft_mode)
  subbands = filters * fft_sample
  analytic_subbands = utils.fhilbert(subbands, mode=fft_mode)

  if padding_size is not None and padding_size > 1:
    analytic_subbands = analytic_subbands[:, :signal_flat.shape[0] - padding]  # i dont know if this is correct

  return analytic_subbands


[docs]def generate_subband_envelopes(signal, filters, padding_size=None, debug_ret_all=False):
  """Generate the subband envelopes (i.e., the cochleagram) of the signal by
    applying the provided filters.

  The input filters are applied to the signal to perform subband decomposition.
  The signal can be optionally zero-padded before the decomposition.

  Args:
    signal (array): The sound signal (waveform) in the time domain.
    filters (array): The filterbank, in frequency space, used to generate the
      cochleagram. This should be the full filter-set output of
      erbFilter.make_erb_cos_filters_nx, or similar.
    padding_size (int, optional): Factor that determines if the signal will be zero-padded
      before generating the subbands. If this is None, or less than 1, no
      zero-padding will be used. Otherwise, zeros are added to the end of the
      input signal until is it of length `padding_size * length(signal)`. This
      padded region will be removed after performing the subband
      decomposition.
    fft_mode ({'auto', 'fftw', 'np'}, optional): Determine what implementation
      to use for FFT-like operations. 'auto' will attempt to use pyfftw, but
      will fallback to numpy, if necessary.

  Returns:
    array:
    **subband_envelopes**: The subband envelopes (i.e., cochleagram) resulting from
      the subband decomposition. This should have the same shape as `filters`.
  """
  analytic_subbands = generate_analytic_subbands(signal, filters, padding_size=padding_size)
  subband_envelopes = np.abs(analytic_subbands)

  if debug_ret_all is True:
    out_dict = {}
    # add all local variables to out_dict
    for k in dir():
      if k != 'out_dict':
        out_dict[k] = locals()[k]
    return out_dict
  else:
    return subband_envelopes


[docs]def collapse_subbands(subbands, filters, fft_mode='auto'):
  """Collapse the subbands into a waveform by (re)applying the filterbank.

  Args:
    subbands (array): The subband decomposition (i.e., cochleagram) to collapse.
    filters (array): The filterbank, in frequency space, used to generate the
      cochleagram. This should be the full filter-set output of
      erbFilter.make_erb_cos_filters_nx, or similar, that was used to create
      `subbands`.
    fft_mode ({'auto', 'fftw', 'np'}, optional): Determine what implementation
      to use for FFT-like operations. 'auto' will attempt to use pyfftw, but
      will fallback to numpy, if necessary.

  Returns:
    array:
    **signal**: The signal resulting from collapsing the subbands.
  """
  fft_subbands = filters * utils.fft(subbands, mode=fft_mode)
  # subbands = utils.ifft(fft_subbands)
  subbands = np.real(utils.ifft(fft_subbands, mode=fft_mode))
  signal = subbands.sum(axis=0)
  return signal


[docs]def pad_signal(signal, padding_size, axis=0):
  """Pad the signal by appending zeros to the end. The padded signal has
  length `padding_size * length(signal)`.

  Args:
    signal (array): The signal to be zero-padded.
    padding_size (int): Factor that determines the size of the padded signal.
      The padded signal has length `padding_size * length(signal)`.
    axis (int): Specifies the axis to pad; defaults to 0.

  Returns:
    tuple:
      **pad_signal** (*array*): The zero-padded signal.
      **padding_size** (*int*): The length of the zero-padding added to the array.
  """
  if padding_size is not None and padding_size >= 1:
    pad_shape = list(signal.shape)
    pad_shape[axis] = padding_size
    pad_signal = np.concatenate((signal, np.zeros(pad_shape)))
  else:
    padding_size = 0
    pad_signal = signal
  return (pad_signal, padding_size)


def _real_freq_filter(rfft_signal, filters):
  """Helper function to apply a full filterbank to a rfft signal
  """
  nr = rfft_signal.shape[0]
  subbands = filters[:, :nr] * rfft_signal
  return subbands