Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
music-demixing-challenge-starter-kit
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
yoyololicon
music-demixing-challenge-starter-kit
Commits
12a73d9c
Commit
12a73d9c
authored
3 years ago
by
StefanUhlich
Browse files
Options
Downloads
Patches
Plain Diff
Add X-UMX baseline
parent
4cd3be89
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
apt.txt
+2
-0
2 additions, 0 deletions
apt.txt
predict.py
+5
-2
5 additions, 2 deletions
predict.py
requirements.txt
+3
-0
3 additions, 0 deletions
requirements.txt
test_xumx.py
+170
-0
170 additions, 0 deletions
test_xumx.py
with
180 additions
and
2 deletions
apt.txt
+
2
−
0
View file @
12a73d9c
build-essential
git
ffmpeg
libsndfile1
sox
...
...
This diff is collapsed.
Click to expand it.
predict.py
+
5
−
2
View file @
12a73d9c
from
test
import
CopyPredictor
,
ScaledMixturePredictor
from
test_umx
import
UMXPredictor
from
test_xumx
import
XUMXPredictor
# Predictor which does nothing
copy_predictor
=
CopyPredictor
()
# Predictor which uses 1/4*mixture as separations
scaledmixture_predictor
=
ScaledMixturePredictor
()
scaledmixture_predictor
=
ScaledMixturePredictor
()
# UMX need
.cache
folder to be present in your submission, check test_umx.py to learn more
# UMX need
s `models`
folder to be present in your submission, check test_umx.py to learn more
umx_predictor
=
UMXPredictor
()
# X-UMX needs `models` folder to be present in your submission, check test_xumx.py to learn more
xumx_predictor
=
XUMXPredictor
()
"""
PARTICIPANT_TODO: The implementation you want to submit as your submission
...
...
This diff is collapsed.
Click to expand it.
requirements.txt
+
3
−
0
View file @
12a73d9c
...
...
@@ -6,3 +6,6 @@ boto3
openunmix
musdb
SoundFile
scipy
norbert
git+https://github.com/asteroid-team/asteroid.git
\ No newline at end of file
This diff is collapsed.
Click to expand it.
test_xumx.py
0 → 100644
+
170
−
0
View file @
12a73d9c
#!/usr/bin/env python
#
# This file uses CrossNet-UMX (X-UMX) for music demixing.
# It is one of the official baselines for the Music Demixing challenge.
#
# NOTE: X-UMX needs the model to be submitted along with your code.
#
# Making submission using X-UMX:
# 1. Change the model in `predict.py` to XUMXPredictor.
# 2. Download the pre-trained model from Zenodo into the folder `./models`
# #> mkdir models
# #> wget https://zenodo.org/record/4740378/files/pretrained_xumx_musdb18HQ.pth
# 3. Submit your code using git-lfs
# #> git lfs install
# #> git lfs track "*.pth"
# #> git add .gitattributes
# #> git add models
#
from
asteroid.models
import
XUMX
from
asteroid.complex_nn
import
torch_complex_from_magphase
import
norbert
import
numpy
as
np
import
scipy
import
soundfile
as
sf
import
torch
from
evaluator.music_demixing
import
MusicDemixingPredictor
# Inverse STFT - taken from
# https://github.com/asteroid-team/asteroid/blob/master/egs/musdb18/X-UMX/eval.py
def
istft
(
X
,
rate
=
44100
,
n_fft
=
4096
,
n_hopsize
=
1024
):
t
,
audio
=
scipy
.
signal
.
istft
(
X
/
(
n_fft
/
2
),
rate
,
nperseg
=
n_fft
,
noverlap
=
n_fft
-
n_hopsize
,
boundary
=
True
)
return
audio
# Separation function - taken from
# https://github.com/asteroid-team/asteroid/blob/master/egs/musdb18/X-UMX/eval.py
def
separate
(
audio
,
x_umx_target
,
instruments
,
niter
=
1
,
softmask
=
False
,
alpha
=
1.0
,
residual_model
=
False
,
device
=
"
cpu
"
,
):
"""
Performing the separation on audio input
Parameters
----------
audio: np.ndarray [shape=(nb_samples, nb_channels, nb_timesteps)]
mixture audio
x_umx_target: asteroid.models
X-UMX model used for separating
instruments: list
The list of instruments, e.g., [
"
bass
"
,
"
drums
"
,
"
vocals
"
]
niter: int
Number of EM steps for refining initial estimates in a
post-processing stage, defaults to 1.
softmask: boolean
if activated, then the initial estimates for the sources will
be obtained through a ratio mask of the mixture STFT, and not
by using the default behavior of reconstructing waveforms
by using the mixture phase, defaults to False
alpha: float
changes the exponent to use for building ratio masks, defaults to 1.0
residual_model: boolean
computes a residual target, for custom separation scenarios
when not all targets are available, defaults to False
device: str
set torch device. Defaults to `cpu`.
Returns
-------
estimates: `dict` [`str`, `np.ndarray`]
dictionary with all estimates obtained by the separation model.
"""
# convert numpy audio to torch
audio_torch
=
torch
.
tensor
(
audio
.
T
[
None
,
...]).
float
().
to
(
device
)
source_names
=
[]
V
=
[]
masked_tf_rep
,
_
=
x_umx_target
(
audio_torch
)
# shape: (Sources, frames, batch, channels, fbin)
for
j
,
target
in
enumerate
(
instruments
):
Vj
=
masked_tf_rep
[
j
,
Ellipsis
].
cpu
().
detach
().
numpy
()
if
softmask
:
# only exponentiate the model if we use softmask
Vj
=
Vj
**
alpha
# output is nb_frames, nb_samples, nb_channels, nb_bins
V
.
append
(
Vj
[:,
0
,
Ellipsis
])
# remove sample dim
source_names
+=
[
target
]
V
=
np
.
transpose
(
np
.
array
(
V
),
(
1
,
3
,
2
,
0
))
# convert to complex numpy type
tmp
=
x_umx_target
.
encoder
(
audio_torch
)
X
=
torch_complex_from_magphase
(
tmp
[
0
].
permute
(
1
,
2
,
3
,
0
),
tmp
[
1
])
X
=
X
.
detach
().
cpu
().
numpy
()
X
=
X
[
0
].
transpose
(
2
,
1
,
0
)
if
residual_model
or
len
(
instruments
)
==
1
:
V
=
norbert
.
residual_model
(
V
,
X
,
alpha
if
softmask
else
1
)
source_names
+=
[
"
residual
"
]
if
len
(
instruments
)
>
1
else
[
"
accompaniment
"
]
Y
=
norbert
.
wiener
(
V
,
X
.
astype
(
np
.
complex128
),
niter
,
use_softmask
=
softmask
)
estimates
=
{}
for
j
,
name
in
enumerate
(
source_names
):
audio_hat
=
istft
(
Y
[...,
j
].
T
,
rate
=
x_umx_target
.
sample_rate
,
n_fft
=
x_umx_target
.
in_chan
,
n_hopsize
=
x_umx_target
.
n_hop
,
)
estimates
[
name
]
=
audio_hat
.
T
return
estimates
class
XUMXPredictor
(
MusicDemixingPredictor
):
def
prediction_setup
(
self
):
# Load your model here.
self
.
separator
=
XUMX
.
from_pretrained
(
"
./models/pretrained_xumx_musdb18HQ.pth
"
)
def
prediction
(
self
,
mixture_file_path
,
bass_file_path
,
drums_file_path
,
other_file_path
,
vocals_file_path
,
):
# Step 1: Load mixture
x
,
rate
=
sf
.
read
(
mixture_file_path
)
# mixture is stereo with sample rate of 44.1kHz
# Step 2: Perform separation
estimates
=
separate
(
x
,
self
.
separator
,
self
.
separator
.
sources
)
# Step 3: Store results
target_file_map
=
{
"
vocals
"
:
vocals_file_path
,
"
drums
"
:
drums_file_path
,
"
bass
"
:
bass_file_path
,
"
other
"
:
other_file_path
,
}
for
target
,
path
in
target_file_map
.
items
():
sf
.
write
(
path
,
estimates
[
target
],
rate
)
print
(
x
.
shape
,
estimates
[
"
bass
"
].
shape
)
if
__name__
==
"
__main__
"
:
submission
=
XUMXPredictor
()
submission
.
run
()
print
(
"
Successfully generated predictions!
"
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment