cottom/uvr-api
Public
31
runs
Run cottom/uvr-api with an API
Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.
Input schema
The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.
| Field | Type | Default value | Description |
|---|---|---|---|
| audio |
string
|
Audio file to separate (upload)
|
|
| audio_url |
string
|
URL to audio file (alternative to upload)
|
|
| model |
None
|
demucs:hdemucs_mmi
|
Model to use for separation
|
| output_mode |
None
|
vocals_instrumental
|
Output mode: vocals+instrumental or all stems
|
| output_format |
None
|
mp3
|
Output audio format
|
| vr_aggressiveness |
number
|
0.05
Max: 1 |
[VR Network] Aggressiveness (0.0-1.0). Higher = more vocal removal
|
| vr_window_size |
None
|
512
|
[VR Network] Window size. Larger = better quality but slower
|
| vr_batch_size |
None
|
4
|
[VR Network] Batch size for inference
|
| vr_high_end_process |
boolean
|
False
|
[VR Network] Enable high-end frequency processing
|
| mdx_segment_size |
None
|
256
|
[MDX] Segment size. Larger = better quality but more memory
|
| mdx_overlap |
number
|
0.75
Max: 0.99 |
[MDX] Overlap between segments (0.0-0.99)
|
| mdx_denoise |
boolean
|
False
|
[MDX] Enable denoising
|
| mdxc_segment_size |
None
|
256
|
[MDXC] Segment size. Larger = better quality but more memory
|
| mdxc_overlap |
None
|
8
|
[MDXC] Overlap between segments
|
| mdxc_batch_size |
None
|
1
|
[MDXC] Batch size for inference
|
{
"type": "object",
"title": "Input",
"properties": {
"audio": {
"type": "string",
"title": "Audio",
"format": "uri",
"x-order": 0,
"description": "Audio file to separate (upload)"
},
"model": {
"enum": [
"demucs:hdemucs_mmi",
"vr_network:1_HP-UVR",
"mdx:UVR-MDX-NET-Inst_1",
"mdxc:MDX23C-8KFFT-InstVoc_HQ"
],
"type": "string",
"title": "model",
"description": "Model to use for separation",
"default": "demucs:hdemucs_mmi",
"x-order": 2
},
"audio_url": {
"type": "string",
"title": "Audio Url",
"x-order": 1,
"description": "URL to audio file (alternative to upload)"
},
"mdx_denoise": {
"type": "boolean",
"title": "Mdx Denoise",
"default": false,
"x-order": 11,
"description": "[MDX] Enable denoising"
},
"mdx_overlap": {
"type": "number",
"title": "Mdx Overlap",
"default": 0.75,
"maximum": 0.99,
"minimum": 0,
"x-order": 10,
"description": "[MDX] Overlap between segments (0.0-0.99)"
},
"output_mode": {
"enum": [
"vocals_instrumental",
"all"
],
"type": "string",
"title": "output_mode",
"description": "Output mode: vocals+instrumental or all stems",
"default": "vocals_instrumental",
"x-order": 3
},
"mdxc_overlap": {
"enum": [
"2",
"4",
"8",
"16",
"32"
],
"type": "string",
"title": "mdxc_overlap",
"description": "[MDXC] Overlap between segments",
"default": "8",
"x-order": 13
},
"output_format": {
"enum": [
"mp3",
"wav",
"flac"
],
"type": "string",
"title": "output_format",
"description": "Output audio format",
"default": "mp3",
"x-order": 4
},
"vr_batch_size": {
"enum": [
"1",
"2",
"4",
"8"
],
"type": "string",
"title": "vr_batch_size",
"description": "[VR Network] Batch size for inference",
"default": "4",
"x-order": 7
},
"vr_window_size": {
"enum": [
"320",
"512",
"1024"
],
"type": "string",
"title": "vr_window_size",
"description": "[VR Network] Window size. Larger = better quality but slower",
"default": "512",
"x-order": 6
},
"mdxc_batch_size": {
"enum": [
"1",
"2",
"4",
"8"
],
"type": "string",
"title": "mdxc_batch_size",
"description": "[MDXC] Batch size for inference",
"default": "1",
"x-order": 14
},
"mdx_segment_size": {
"enum": [
"64",
"128",
"256",
"512"
],
"type": "string",
"title": "mdx_segment_size",
"description": "[MDX] Segment size. Larger = better quality but more memory",
"default": "256",
"x-order": 9
},
"mdxc_segment_size": {
"enum": [
"64",
"128",
"256",
"512"
],
"type": "string",
"title": "mdxc_segment_size",
"description": "[MDXC] Segment size. Larger = better quality but more memory",
"default": "256",
"x-order": 12
},
"vr_aggressiveness": {
"type": "number",
"title": "Vr Aggressiveness",
"default": 0.05,
"maximum": 1,
"minimum": 0,
"x-order": 5,
"description": "[VR Network] Aggressiveness (0.0-1.0). Higher = more vocal removal"
},
"vr_high_end_process": {
"type": "boolean",
"title": "Vr High End Process",
"default": false,
"x-order": 8,
"description": "[VR Network] Enable high-end frequency processing"
}
}
}
Output schema
The shape of the response you’ll get when you run this model with an API.
Schema
{
"type": "object",
"title": "Output",
"additionalProperties": {
"type": "string",
"format": "uri"
}
}