Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,7 @@ carts/
experiments/


.obsidian/
.obsidian/

*.ipynb
/ray/
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@

.PHONY: install_data_deps install_train_deps


install_data_deps:
poetry install --only vis --only data


render_multivoice_notebook:
jupytext --to ipynb notebooks/dataloaders/multi_voice2voice.py
jupyter nbconvert --to markdown --execute notebooks/dataloaders/multi_voice2voice.ipynb --no-input --output-dir='./docs/experiment_logs/dataset_visualisations'
4 changes: 2 additions & 2 deletions dataloader_burn_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
logging.getLogger("ray.data._internal.execution.streaming_executor").setLevel(logging.ERROR)

# logger.setLevel(logging.ERROR)
from s4_dx7.lib.data.audio_data_module import AudioDataModule
from s4_dx7.lightning.data.single_voice_to_voice import SingleVoice2VoiceDataModule
import ray
ray.init(log_to_driver=False)
bit_rate=8
sr=8000
sample_size=8
data_module = AudioDataModule(bit_rate=bit_rate, sr=sr, limit=6400)
data_module = SingleVoice2VoiceDataModule(bit_rate=bit_rate, sr=sr, limit=6400)
# Iterate over each batch (assuming a single batch here for simplicity)
for i in tqdm(count()):
c=count()
Expand Down
412 changes: 412 additions & 0 deletions docs/experiment_logs/dataset_visualisations/multi_voice2voice.md

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
104 changes: 104 additions & 0 deletions docs/experiment_logs/s4-dx7-vc-fir-03.md

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions notebooks/data_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import duckdb
from matplotlib import pyplot as plt
import torch
from s4_dx7.lib.data.audio_data_module import AudioDataModule
from s4_dx7.lightning.data.single_voice_to_voice import SingleVoice2VoiceDataModule
from s4_dx7.lib.render import render_batch, to_midi
from s4_dx7.lib.s4.generate import load_experiment
from s4_dx7.lib.visualistaion.audio import create_melspec_figure, waveform_segment_figure, render_piano_roll, render_voice
Expand Down Expand Up @@ -116,7 +116,7 @@ def wrapper(*args, **kwargs):
load_data=False,
experiment_root=f"{model_path}/s4-dx7-vc-fir"
)
source_patch, target_patch = AudioDataModule.get_voices()
source_patch, target_patch = SingleVoice2VoiceDataModule.get_voices()
# %%
import base64
from mido import MidiFile, MidiTrack, Message
Expand Down Expand Up @@ -156,7 +156,7 @@ def normalize_signal(signal, bit_rate):
def greedy_decode(signal):
return signal.argmax(-1)
def corrupt_signal(signal):
return AudioDataModule.clean_signal(signal, 7, 8)
return SingleVoice2VoiceDataModule.clean_signal(signal, 7, 8)
return torch.clamp(signal + int(signal.float().mean()), 0, 2**(8-1))
# return torch.clamp(signal, 0, 2**8-1)
# Function to create Mel Spectrogram plot
Expand All @@ -174,8 +174,8 @@ def s4_dx7_vc_fir_00(*args, **kwargs):
source_signal = render_voice(ar['notes'], source_patch, config.dataset)
broken_source_signal = corrupt_signal(source_signal)[..., :-1]
broken_target_signal = corrupt_signal(target_signal)[..., 1:]
source_signal = AudioDataModule.clean_signal(source_signal, 8, 8)[..., :-1]
target_signal = AudioDataModule.clean_signal(target_signal, 8, 8)[..., 1:]
source_signal = SingleVoice2VoiceDataModule.clean_signal(source_signal, 8, 8)[..., :-1]
target_signal = SingleVoice2VoiceDataModule.clean_signal(target_signal, 8, 8)[..., 1:]

with torch.no_grad():
generated_signal_logits = s4_dx7_vc_fir_00((source_signal, target_signal.unsqueeze(-1)))[0]
Expand Down
119 changes: 119 additions & 0 deletions notebooks/dataloaders/multi_voice2voice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#%% [markdown]
# This page is a render of the notebook found at `notebooks/dataloaders/multi_voice2voice.py` in the [source repo](https://github.com/Nintorac/s4_dx7)
#
# > [!warning]
# > **The audio can be quite loud**: Turn down your volume
#
# It shows several random examples of data used in the training of [[s4-dx7-vc-fir-03]]
# %%
import torch
import librosa
import librosa.display
import matplotlib.pyplot as plt
from IPython.display import Audio, display
import numpy as np
import pandas as pd
import scipy.signal as s
from s4_dx7.lightning.data import MultiVoice2VoiceDataModule
from s4_dx7.lib.visualistaion.audio import waveform_segment_figure
from torchaudio.functional import mu_law_decoding
from s4_dx7.notebook.mel_spec_audio import plot_melspectrogram_and_play_button
## TODO figure out how to remove error ouput from rendered document
#%%
#%%
# Define a helper function to plot the mel spectrogram and create the play button
bit_rate=8
sr=40000
baud=8000
sample_size=100

dt = int(1/baud*sr) # samples per bit
encoding_duration = (dt*8*155)/sr
encoding_samples = dt*8*155

data_module = MultiVoice2VoiceDataModule(bit_rate=bit_rate, limit=sample_size, sr=sr, patch_baud_rate=baud)
# Iterate over each batch (assuming a single batch here for simplicity)
loader = data_module.get_train_dataloader(sample_size)

#%%
for batch in loader:
batch_size = batch['x'].shape[0]

for i in range(5):
# Select the i-th sample from both 'x' and 'y' signals in the batch
x_signal = batch['x'][i].cpu()
y_signal = batch['y'][i].squeeze(-1).cpu()
# raise ValueError(y_signal.min(), y_signal.max())
y_signal = mu_law_decoding(y_signal, bit_rate)
x_signal = mu_law_decoding(x_signal, bit_rate)

# print(f"source voice - {batch['source_voice_id'][i]}")
# print(f"target voice - {batch['target_voice_id'][i]}")
# print(f"phrase - {batch['phrase_id'][i]}")
# Plot and display encoding
print(f"Playing and displaying encoding")
plot_melspectrogram_and_play_button(batch['encoding'][i].cpu(), sr)

# Plot and display x
print(f"Playing and displaying x[{i}]")
plot_melspectrogram_and_play_button(x_signal, sr)

# plot and display y
print(f"playing and displaying y[{i}]")
plot_melspectrogram_and_play_button(y_signal, sr)

wave_fig = waveform_segment_figure(
x_signal,
sr,
(
(0, (dt*16*8)/sr), # first 16 bytes of encoding
(encoding_duration, encoding_duration+1), # 1s of audio
(encoding_duration+1, encoding_duration+1.1), # 0.1s of audio
(encoding_duration+1, encoding_duration+1.01), # 0.01s of audio
),
title="source waveforms - first 16 bytes of encoding, 1s of audio, 0.1s of audio, 0.01s of audio"
)
display(wave_fig)
plt.close("all") # supress inlined plots https://stackoverflow.com/questions/49545003/how-to-suppress-matplotlib-inline-for-a-single-cell-in-jupyter-notebooks-lab
wave_fig = waveform_segment_figure(
y_signal,
sr,
(
(encoding_duration, encoding_duration+1), # first second of audio source
(encoding_duration+1, encoding_duration+1.1), # 0.1s of audio
(encoding_duration+1, encoding_duration+1.01), # 0.01s of audio
),
title="target waveforms - 1s of audio, 0.1s of audio, 0.01s of audio"
)
display(wave_fig)
plt.close("all") # supress inlined plots https://stackoverflow.com/questions/49545003/how-to-suppress-matplotlib-inline-for-a-single-cell-in-jupyter-notebooks-lab

t = np.arange(0, x_signal.shape[0] / sr, 1.0 / sr)
f, t_s, Zxx = s.stft(x_signal, nfft=128, fs=sr, nperseg=4, noverlap=3, padded=True)

plt.pcolormesh(t_s[:encoding_samples//8], f, np.abs(Zxx[...,:encoding_samples//8]))
plt.title(f"{encoding_samples//8} (~19 bytes) samples of the patch encoding as a spectrogram")
plt.show()

plt.close("all") # supress inlined plots https://stackoverflow.com/questions/49545003/how-to-suppress-matplotlib-inline-for-a-single-cell-in-jupyter-notebooks-lab
break

# %%
for i in loader:
pass
# %%
print(loader.stats())
# %%
pipeline = data_module._dataset('train')
# %%
dir(pipeline)
# %%
pipeline.take_batch()
# %%
print(pipeline.stats())
# %%
for i in pipeline.iter_torch_batches(batch_size=14):
pass
# %%
print(pipeline.stats())
# %%
70 changes: 70 additions & 0 deletions notebooks/dataloaders/multi_voice2voice_perf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# %%
from datetime import datetime
from time import sleep
import torch
import librosa
import librosa.display
import matplotlib.pyplot as plt
from IPython.display import Audio, display
import numpy as np
import pandas as pd
import scipy.signal as s
from s4_dx7.lightning.data import MultiVoice2VoiceDataModule
from s4_dx7.lib.visualistaion.audio import waveform_segment_figure
from torchaudio.functional import mu_law_decoding
from s4_dx7.lightning.data.multi_voice_to_voice import PipelineConfiguration
from s4_dx7.notebook.mel_spec_audio import plot_melspectrogram_and_play_button
from tqdm import tqdm_notebook
## TODO figure out how to remove error ouput from rendered document
import ray
#%%
ray.init(_temp_dir='/app/ray')
#%%
for i in tqdm_notebook(range(3)): pass
#%%
# Define a helper function to plot the mel spectrogram and create the play button
bit_rate=8
sr=40000
baud=8000
sample_size=10000
batch_size=2

dt = int(1/baud*sr) # samples per bit
encoding_duration = (dt*8*155)/sr
encoding_samples = dt*8*155
pipe_config = PipelineConfiguration(
f_batch_size=None,
partitions=5000,
f_concurrency=3,
override_read_blocks=1,
f_num_cpus=1,
loader_prefetch = None
)


data_module = MultiVoice2VoiceDataModule(
bit_rate=bit_rate,
limit=sample_size,
sr=sr,
patch_baud_rate=baud,
pipeline_config=pipe_config
)
# %%
pipeline = data_module._dataset('train')
# %%
# %%
times = [datetime.now().timestamp()]
for i, x in tqdm_notebook(enumerate(pipeline.iter_torch_batches(batch_size=batch_size))):
sleep(0.5) # model.forward().backwared()
times.append(datetime.now().timestamp())

print(len(x['rowid']))
if not i % 1000:
plt.scatter((range(len(times))),np.gradient(np.array(times)))
plt.show()
plt.close('all')
pass
# %%
print(pipeline.stats())
# %%
# %%
Loading