This notebook presents the different approaches used to find a good way to remove melodic information from an audio signal.
Sumarizing, here are examplified the constructions below:
# Print
from __future__ import print_function
# We'll need numpy for some mathematical operations
import numpy as np
# matplotlib for displaying the output
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')
%matplotlib inline
# and IPython.display for audio output
import IPython.display
# Librosa for audio
import librosa
# And the display module for visualization
import librosa.display
# Functions of time
import time
# Open annotation file
import csv
# Melodia vamp plugin
import vamp
# Scientific python
import scipy
# Get signal processing functions
import scipy.signal as signal
# Make melodic mask
import src.core as utils
# Parameters to calculate the spectrogram
sr = 44100 # Sample rate
win_length = 2048 # FFT Window size
fb = sr/win_length # FFT bins
fs = win_length/sr # Frame size
hop_length = win_length/4 # Hop size
# You can choose an other example to calculate and listen the results
audio_path = 'audios/short.wav'
# Load audio signal to y and sample rate to sr
audio, sr = librosa.load(audio_path, sr=sr)
# Calculate the complex spectrogram
D = librosa.stft(audio, window=signal.cosine)
melody = utils.calculateMelodicLineMELODIA(audio)
# Generate a melodic mask based on Melodia contours
specMelodic = utils.generateMelodicMask(D, melody, kind=1)
plt.figure(figsize=(13,4))
librosa.display.specshow(specMelodic, y_axis='log', x_axis='time', sr=sr)
plt.colorbar(format='%+2.0f dB')
plt.title('Melodic mask with only fundamental frequencies')
plt.show()
# Save the audio signal melodic
y_m = librosa.core.istft(specMelodic.astype(float)*D)
librosa.output.write_wav('example_1_melody.wav', y_m, sr, norm=False)
IPython.display.Audio(data=y_m, rate=sr)
del y_m
# Generate a melodic mask based on Melodia contours
specMelodic = utils.generateMelodicMask(D, melody, kind=2, n_harm=50)
plt.figure(figsize=(13,4))
librosa.display.specshow(specMelodic, y_axis='log', x_axis='time', sr=sr)
plt.colorbar(format='%+2.0f dB')
plt.title('Melodic mask with fundamental frequencies and harmonics')
plt.show()
# Save the audio signal melodic
y_m = librosa.core.istft(specMelodic.astype(float)*D)
librosa.output.write_wav('example_2_melody.wav', y_m, sr, norm=False)
IPython.display.Audio(data=y_m, rate=sr)
del y_m
# Generate a melodic mask based on Melodia contours
# You also can define the number of harmonics
specMelodic = utils.generateMelodicMask(D, melody, kind=3, n_harm=50)
plt.figure(figsize=(13,4))
librosa.display.specshow(specMelodic, y_axis='log', x_axis='time', sr=sr)
plt.colorbar(format='%+2.0f dB')
plt.title('Melodic mask with harmonics and dilated')
plt.show()
# Save the audio signal melodic
y_m = librosa.core.istft(specMelodic.astype(float)*D)
librosa.output.write_wav('example_3_melody.wav', y_m, sr, norm=False)
IPython.display.Audio(data=y_m, rate=sr)
del y_m
# Generate a melodic mask based on Melodia contours
specMelodic = utils.generateMelodicMask(D, melody, kind=3, n_harm=50)
specMelodic = utils.spectralNoveltyFunction(D, specMelodic)
plt.figure(figsize=(13,4))
librosa.display.specshow(specMelodic, y_axis='log', x_axis='time', sr=sr)
plt.colorbar(format='%+2.0f dB')
plt.title('Melodic mask dilated in start notes')
plt.show()
# Save the audio signal melodic
y_m = librosa.core.istft(specMelodic.astype(float)*D)
librosa.output.write_wav('example_4_melody.wav', y_m, sr, norm=False)
IPython.display.Audio(data=y_m, rate=sr)
del y_m
# Generate a melodic mask based on Melodia contours
specMelodic = utils.generateMelodicMask(D, melody, kind=3, n_harm=50)
specDilated = utils.spectralNoveltyFunction(D, specMelodic)
# Getting masks from librosa
mask_H, mask_P = librosa.decompose.hpss(D, mask=True)
specPercuss = np.maximum(np.multiply(specDilated, mask_P), specMelodic)
plt.figure(figsize=(13,4))
librosa.display.specshow(specPercuss, y_axis='log', x_axis='time', sr=sr)
plt.colorbar(format='%+2.0f dB')
plt.title('Melodic mask dilated in start notes percussive')
plt.show()
# Save the audio signal melodic
y_m = librosa.core.istft(specMelodic.astype(float)*D)
librosa.output.write_wav('example_5_melody.wav', y_m, sr, norm=False)
IPython.display.Audio(data=y_m, rate=sr)
del y_m
# Generate a melodic mask based on Melodia contours
# You also can define the number of harmonics
specMelodic = utils.generateMelodicMask(D, melody, kind=3, n_harm=50)
specHit, specHitDilated, specMax = utils.hitMissDilateMask(specMelodic)
del specHit, specHitDilated
plt.figure(figsize=(13,4))
librosa.display.specshow(specMax, y_axis='log', x_axis='time', sr=sr)
plt.colorbar(format='%+2.0f dB')
plt.title('Melodic mask dilated openning spectrum')
plt.show()
# Save the audio signal melodic
y_m = librosa.core.istft(specMelodic.astype(float)*D)
librosa.output.write_wav('example_6_melody.wav', y_m, sr, norm=False)
IPython.display.Audio(data=y_m, rate=sr)
del y_m
# Generate a melodic mask based on Melodia contours
# You also can define the number of harmonics
specMelodic = utils.generateMelodicMask(D, melody, kind=3, n_harm=50)
specHit, specHitDilated, specMax = utils.hitMissDilateMask(specMelodic)
del specHit, specHitDilated
# Getting masks from librosa
mask_H, mask_P = librosa.decompose.hpss(D, mask=True)
specPercuss = np.maximum(np.multiply(specMax, mask_P), specMelodic)
plt.figure(figsize=(13,4))
librosa.display.specshow(specPercuss, y_axis='log', x_axis='time', sr=sr)
plt.colorbar(format='%+2.0f dB')
plt.title('Melodic mask dilated openning spectrum percussive')
plt.show()
# Save the audio signal melodic
y_m = librosa.core.istft(specMelodic.astype(float)*D)
librosa.output.write_wav('example_7_melody.wav', y_m, sr, norm=False)
IPython.display.Audio(data=y_m, rate=sr)
del y_m