Skip to content
Snippets Groups Projects
Commit 9a2c78bd authored by Chin-Yun Yu's avatar Chin-Yun Yu
Browse files

draft: autoregressive predict

parent f23e7827
No related branches found
No related tags found
No related merge requests found
......@@ -65,47 +65,51 @@ class HDemucs(IdentitySeparationModel):
[
mixed_sound_array,
mixed_diff.unsqueeze(0),
mixed_mean.unsqueeze(0),
],
dim=0,
)
dialog_idx = self.instruments_idx["dialog"]
sep_l, sep_r, sep_diff = self.hdemucs(mixed_sound_array).squeeze()
dialog = (sep_l[dialog_idx] + sep_r[dialog_idx]) / 2
mixed_mean[0] -= dialog
separated = self.hdemucs(mixed_mean.unsqueeze(0)).squeeze()
separated_music_arrays = {}
output_sample_rates = {}
sep_l, sep_r, sep_diff, sep_mean = self.hdemucs(mixed_sound_array).squeeze()
# sep_l, sep_r, sep_diff, sep_mean = self.hdemucs(mixed_sound_array).squeeze()
dialog_idx = self.instruments_idx["dialog"]
dialog = (sep_l[dialog_idx] + sep_r[dialog_idx] + sep_mean[dialog_idx]) / 3
separated_music_arrays["dialog"] = dialog.unsqueeze(1).repeat(1, 2).cpu().numpy()
# dialog = (sep_l[dialog_idx] + sep_r[dialog_idx] + sep_mean[dialog_idx]) / 3
separated_music_arrays["dialog"] = (
dialog.unsqueeze(1).repeat(1, 2).cpu().numpy()
)
output_sample_rates["dialog"] = sample_rate
mixed_mean[0] = mixed_mean[0] - dialog
effect_idx = self.instruments_idx["effect"]
music_idx = self.instruments_idx["music"]
effect_diff = mixed_diff[0] - sep_diff[music_idx]
effect_l = effect_diff + sep_mean[effect_idx]
effect_r = sep_mean[effect_idx] - effect_diff
effect_l = 0.5 * (effect_l + sep_l[effect_idx])
effect_r = 0.5 * (effect_r + sep_r[effect_idx])
# effect_diff = mixed_diff[0] - sep_diff[music_idx]
effect_l = sep_diff[effect_idx] + separated[effect_idx]
effect_r = separated[effect_idx] - sep_diff[effect_idx]
# effect_l = 0.5 * (effect_l + sep_l[effect_idx])
# effect_r = 0.5 * (effect_r + sep_r[effect_idx])
separated_music_arrays["effect"] = (
torch.stack([effect_l, effect_r], 1).cpu().numpy()
)
output_sample_rates["effect"] = sample_rate
music_diff = mixed_diff[0] - sep_diff[effect_idx]
music_l = music_diff + sep_mean[music_idx]
music_r = sep_mean[music_idx] - music_diff
music_l = 0.5 * (music_l + sep_l[music_idx])
music_r = 0.5 * (music_r + sep_r[music_idx])
# music_diff = mixed_diff[0] - sep_diff[effect_idx]
music_l = sep_diff[music_idx] + separated[music_idx]
music_r = separated[music_idx] - sep_diff[music_idx]
# music_l = 0.5 * (music_l + sep_l[music_idx])
# music_r = 0.5 * (music_r + sep_r[music_idx])
separated_music_arrays["music"] = (
torch.stack([music_l, music_r], 1).cpu().numpy()
)
output_sample_rates["music"] = sample_rate
# seperated = (
# self.hdemucs(mixed_sound_array).squeeze().permute(1, 2, 0).cpu().numpy()
# )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment