Source code for pycochleagram.demo

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os
from random import choice
from time import sleep, time
import numpy as np
from scipy.signal import welch, decimate

from pycochleagram import cochleagram as cgram
from pycochleagram import erbfilter as erb
from pycochleagram import utils


if utils.check_if_display_exists():
  import matplotlib.pyplot as plt
  from matplotlib.pyplot import imshow, show
else:
  import matplotlib
  matplotlib.use('Agg')
  import matplotlib.pyplot as plt
  from matplotlib.pyplot import imshow, show


### Cochleagram Generation from Waveform ###
[docs]def demo_human_cochleagram(signal=None, sr=None, n=None):
  """Demo to generate the human cochleagrams, displaying various nonlinearity
  and downsampling options. If a signal is not provided, a tone synthesized
  with 40 harmonics and an f0=100 will be used.

  Args:
    signal (array, optional): Signal containing waveform data.
    sr (int, optional): Sampling rate of the input signal.
    n (int, optional): Number of filters to use in the filterbank.

  Returns:
    None
  """
  # get a signal if one isn't provided
  if signal is None:
    signal, signal_params = make_harmonic_stack()
    sr = signal_params['sr']
    n = signal_params['n']
  else:
    assert sr is not None
    assert n is not None

  ### Demo Cochleagram Generation with Predefined Nonlinearities ###
  # no nonlinearity
  coch = demo_human_cochleagram_helper(signal, sr, n, nonlinearity=None)
  # convert to decibel
  coch_log = demo_human_cochleagram_helper(signal, sr, n, nonlinearity='db')
  # 3/10 power compression
  coch_pow = demo_human_cochleagram_helper(signal, sr, n, nonlinearity='power')

  plt.subplot(321)
  plt.title('Signal waveform')
  plt.plot(signal)
  plt.ylabel('amplitude')
  plt.xlabel('time')

  plt.subplot(323)
  plt.title('Signal Frequency Content')
  f, Pxx_den = welch(signal.flatten(), sr, nperseg=1024)
  plt.semilogy(f, Pxx_den)
  plt.xlabel('frequency [Hz]')
  plt.ylabel('PSD [V**2/Hz]')

  plt.subplot(322)
  plt.title('Cochleagram with no nonlinearity')
  plt.ylabel('filter #')
  plt.xlabel('time')
  utils.cochshow(np.flipud(coch), interact=False)
  plt.gca().invert_yaxis()

  plt.subplot(324)
  plt.title('Cochleagram with nonlinearity: "log"')
  plt.ylabel('filter #')
  plt.xlabel('time')
  utils.cochshow(np.flipud(coch_log), interact=False)
  plt.gca().invert_yaxis()

  plt.subplot(326)
  plt.title('Cochleagram with nonlinearity: "power"')
  plt.ylabel('filter #')
  plt.xlabel('time')
  utils.cochshow(np.flipud(coch_pow), interact=False)
  plt.gca().invert_yaxis()
  plt.tight_layout()

  ### Demo Cochleagram Generation with Downsampling ###
  plt.figure()
  # no downsampling
  # cochd = demo_human_cochleagram_helper(signal, sr, n, downsample=None)
  # predefined polyphase resampling with upsample factor = 10000, downsample factor = `sr`
  cochd_poly = demo_human_cochleagram_helper(signal, sr, n, downsample=10000)
  # custom downsampling function to use decimate with a downsampling factor of 2
  custom_downsample_fx = lambda x: decimate(x, 2, axis=1, ftype='fir', zero_phase=True)
  cochd_decimate = demo_human_cochleagram_helper(signal, sr, n, downsample=custom_downsample_fx)

  plt.subplot(221)
  plt.title('Signal waveform')
  plt.plot(signal)
  plt.ylabel('amplitude')
  plt.xlabel('time')

  plt.subplot(223)
  plt.title('Signal Frequency Content')
  f, Pxx_den = welch(signal.flatten(), sr, nperseg=1024)
  plt.semilogy(f, Pxx_den)
  plt.xlabel('frequency [Hz]')
  plt.ylabel('PSD [V**2/Hz]')

  plt.subplot(222)
  plt.title('Cochleagram with 2x default\n(polyphase) downsampling')
  plt.ylabel('filter #')
  plt.xlabel('time')
  utils.cochshow(np.flipud(cochd_poly), interact=False)
  plt.gca().invert_yaxis()

  plt.subplot(224)
  plt.title('Cochleagram with 2x custom\n(decimate) downsampling')
  plt.ylabel('filter #')
  plt.xlabel('time')
  utils.cochshow(np.flipud(cochd_decimate), interact=False)
  plt.gca().invert_yaxis()
  plt.tight_layout()
  plt.show()


[docs]def demo_human_cochleagram_helper(signal, sr, n, sample_factor=2, downsample=None, nonlinearity=None):
  """Demo the cochleagram generation.

    signal (array): If a time-domain signal is provided, its
      cochleagram will be generated with some sensible parameters. If this is
      None, a synthesized tone (harmonic stack of the first 40 harmonics) will
      be used.
    sr: (int): If `signal` is not None, this is the sampling rate
      associated with the signal.
    n (int): number of filters to use.
    sample_factor (int): Determines the density (or "overcompleteness") of the
      filterbank. Original MATLAB code supported 1, 2, 4.
    downsample({None, int, callable}, optional): Determines downsampling method to apply.
      If None, no downsampling will be applied. If this is an int, it will be
      interpreted as the upsampling factor in polyphase resampling
      (with `sr` as the downsampling factor). A custom downsampling function can
      be provided as a callable. The callable will be called on the subband
      envelopes.
    nonlinearity({None, 'db', 'power', callable}, optional): Determines
      nonlinearity method to apply. None applies no nonlinearity. 'db' will
      convert output to decibels (truncated at -60). 'power' will apply 3/10
      power compression.

    Returns:
      array:
        **cochleagram**: The cochleagram of the input signal, created with
          largely default parameters.
  """
  human_coch = cgram.human_cochleagram(signal, sr, n=n, sample_factor=sample_factor,
      downsample=downsample, nonlinearity=nonlinearity, strict=False)
  img = np.flipud(human_coch)  # the cochleagram is upside down (i.e., in image coordinates)
  return img


### Waveform Generation from Cochleagram (Inversion) ###
[docs]def demo_invert_cochleagram(signal=None, sr=None, n=None, playback=False):
  """Demo that will generate a cochleagram from a signal, then invert this
  cochleagram to produce a waveform signal.

  Args:
    signal (array, optional): Signal containing waveform data.
    sr (int, optional): Sampling rate of the input signal.
    n (int, optional): Number of filters to use in the filterbank.
    playback (bool, optional): Determines if audio signals will be played
      (using pyaudio). If False, only plots will be created. If True, the
      original signal and inverted cochleagram signal will be played. NOTE:
      Be careful with the volume when using playback, things can get
      *very loud*.

  Returns:
    None
  """
  # get a signal if one isn't provided
  if signal is None:
    signal, signal_params = make_harmonic_stack()
    sr = signal_params['sr']
    n = signal_params['n']
    low_lim = signal_params['low_lim']
    hi_lim = signal_params['hi_lim']
  else:
    assert sr is not None
    assert n is not None
    low_lim = 50  # this is the default for cochleagram.human_cochleagram
    hi_lim = 20000  # this is the default for cochleagram.human_cochleagram

  # generate a cochleagram from the signal
  sample_factor = 2  # this is the default for cochleagram.human_cochleagram
  coch = demo_human_cochleagram_helper(signal, sr, n, sample_factor=sample_factor)
  print('Generated cochleagram with shape: ', coch.shape)

  # invert the cochleagram to get a signal
  coch = np.flipud(coch)  # the ouput of demo_human_cochleagram_helper is flipped
  inv_coch_sig, inv_coch = cgram.invert_cochleagram(coch, sr, n, low_lim, hi_lim, sample_factor, n_iter=10, strict=False)

  print('Generated inverted cochleagram')
  print('Original signal shape: %s, Inverted cochleagram signal shape: %s' % (signal.shape, inv_coch_sig.shape))

  plt.subplot(211)
  plt.title('Cochleagram of original signal')
  utils.cochshow(coch, interact=False)  # this signal is already flipped
  plt.ylabel('filter #')
  plt.xlabel('time')
  plt.gca().invert_yaxis()

  plt.subplot(212)
  plt.title('Cochleagram of inverted signal')
  utils.cochshow(inv_coch, interact=False)  # this signal needs to be flipped
  plt.ylabel('filter #')
  plt.xlabel('time')
  plt.gca().invert_yaxis()
  plt.tight_layout()
  plt.show()

  if playback:
    print('playing original signal...')
    utils.play_array(signal, pyaudio_params={'rate': sr}, ignore_warning=True)
    sleep(1)
    print('playing inverted cochleagram signal...')
    utils.play_array(inv_coch_sig, pyaudio_params={'rate': sr}, ignore_warning=True)


[docs]def demo_playback(signal, sr, ignore_warning=False):
  """Demo audio playback with pyaudio.

  Args:
    signal (array, optional): Signal containing waveform data.
    sr (int, optional): Sampling rate of the input signal.
    ignore_warning (bool, optional): Determines if audio signals will be played
      (using pyaudio). NOTE: Be careful with the volume when using playback,
      things can get *very loud*.

  Returns:
    None
  """
  # get a signal if one isn't provided
  if signal is None:
    signal, signal_params = make_harmonic_stack()
    sr = signal_params['sr']
  else:
    assert sr is not None

  # audio playback
  pyaudio_params={'channels': utils.get_channels(signal),
                  'rate': sr,
                  'output': True,
                  'output_device_index': 1}
  print(signal.shape)
  utils.play_array(signal, pyaudio_params=pyaudio_params, ignore_warning=ignore_warning)


[docs]def make_harmonic_stack(f0=100, n_harm=40, dur=0.25001, sr=20000, low_lim=50, hi_lim=20000, n=None):
  """Synthesize a tone created with a stack of harmonics.

  Args:
    f0 (int, optional): Fundamental frequency.
    n_harm (int, optional): Number of harmonics to include.
    dur (float, optional): Duration, in milliseconds. Note, the default value
      was chosen to create a signal length that is compatible with the
      predefined downsampling method.
    sr (int, optional): Sampling rate.
    low_lim (int, optional): Lower limit for filterbank.
    hi_lim (int, optional): Upper limit for filerbank.
    n (None, optional): Number of filters in filterbank.

  Returns:
    tuple:
      **signal** (array): Synthesized tone.
      **signal_params** (dict): A dictionary containing all of the parameters
        used to synthesize the tone.
  """
  # i don't know where this came from, but choose a number of filters
  if n is None:
    n = int(np.floor(erb.freq2erb(hi_lim) - erb.freq2erb(low_lim)) - 1)

  # synthesize tone from harmonic stack
  t = np.arange(0, dur + 1 / sr, 1 / sr)
  signal = np.zeros_like(t)
  for i in range(1, n_harm + 1):
    signal += np.sin(2 * np.pi * f0 * i * t)  # zero-phase

  # store all the params in a dictionary
  signal_params = {
      'f0': f0,
      'n_harm': n_harm,
      'dur': dur,
      'sr': sr,
      'low_lim': low_lim,
      'hi_lim': hi_lim,
      'n': n
  }

  return signal, signal_params


[docs]def main(ignore_playback_warning=False, mode='rand_sound'):
  """Run all demo functions.

  Args:
    ignore_playback_warning (bool, optional): To use audio playback, you must
      acknowledge that things can get *very loud* by setting
      `ignore_playback_warning` to True.
    mode ({'rand_sound', other}): Set the mode for the demo. If this is
      'rand_sound', a sound from the demo_stim/ directory will be chosen
      at random and used for the demos. If this is anything else, a harmonic
      stack of 40 harmonics and an f0=100Hz will be generated and used.

  Returns:
    None
  """
  mode = mode.lower()
  from os.path import dirname, join, realpath
  DEMO_PATH = join(dirname(realpath(__file__)), 'demo_stim')
  if mode == 'rand_sound':
    rfn = choice([os.path.join(DEMO_PATH, f)for f in os.listdir(DEMO_PATH) if f.endswith('.wav')])
    print(os.listdir(DEMO_PATH))
    # rfn = [os.path.join(DEMO_PATH, f)for f in os.listdir(DEMO_PATH)][1]
    print('Running demo with sound file: %s ' % rfn)
    demo_stim, demo_sr = utils.wav_to_array(rfn)
    demo_n = 38  # default filter for low_lim=50 hi_lim=20000
  elif mode == 'batch':
    demo_stim = np.load('demo_stim/wavs_speech_n10_2s_16k.npy')
    demo_sr = 16000
    demo_n = 38  # default filter for low_lim=50 hi_lim=20000
    start_time = time()
    demo_human_cochleagram_helper(demo_stim, demo_sr, demo_n, downsample=200, nonlinearity='power')
    total_time = time() - start_time
    print('Improved Batch --> %s, %ss per coch' % (total_time, total_time / 10))
    return
  elif mode == 'naive_batch':
    demo_stim = np.load('demo_stim/wavs_speech_n10_2s_16k.npy')
    demo_sr = 16000
    demo_n = 38  # default filter for low_lim=50 hi_lim=20000
    start_time = time()
    for i in range(demo_stim.shape[0]):
      # print('%s/%s' % (i+1, demo_stim.shape[0]))
      temp_signal = demo_stim[i]
      demo_human_cochleagram_helper(temp_signal, demo_sr, demo_n, downsample=200, nonlinearity='power')
    total_time = time() - start_time
    print('Naive Batch --> %s, %ss per coch' % (total_time, total_time / 10))
    return
  else:
    demo_stim, demo_sr, demo_n = None, None, None

  print('\n### DEMO: COCHLEAGRAM GENERATION ###')
  print('====================================')
  demo_human_cochleagram(demo_stim, demo_sr, demo_n)

  print('\n### DEMO: AUDIO PLAYBACK ###')
  print('============================')
  demo_playback(demo_stim, demo_sr, ignore_warning=ignore_playback_warning)

  print('\n### DEMO: COCHLEAGRAM INVERSION ###\n')
  print('===================================')
  demo_invert_cochleagram(demo_stim, demo_sr, demo_n, playback=ignore_playback_warning)


if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  parser.add_argument('-p', '--playback', action='store_true',
      help='If True, will playback audio signals as part of demo. NOTE: This can get LOUD.')
  parser.add_argument('-m', '--mode', default='rand_sound',
      help='Determines what type of signals to use for the demo. Can be "harm_stack" for a synthesized tone, or "rand_sound" '+
      'for a random soundfile in the demo_stim/ directory. Defaults to "rand_sound".')
  args = parser.parse_args()

  if not args.playback:
    print('\nNOTE: Audio playback has been disabled for this demo. Enable with the flag -p. NOTE: This can get LOUD.\n')
  main(args.playback, mode=args.mode)