cottom/uvr-api | API reference

Public

31 runs

Run cottom/uvr-api with an API

Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.

Input schema

The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.

Field	Type	Default value	Description
audio	string		Audio file to separate (upload)
audio_url	string		URL to audio file (alternative to upload)
model	None	demucs:hdemucs_mmi	Model to use for separation
output_mode	None	vocals_instrumental	Output mode: vocals+instrumental or all stems
output_format	None	mp3	Output audio format
vr_aggressiveness	number	0.05 Max: 1	[VR Network] Aggressiveness (0.0-1.0). Higher = more vocal removal
vr_window_size	None	512	[VR Network] Window size. Larger = better quality but slower
vr_batch_size	None	4	[VR Network] Batch size for inference
vr_high_end_process	boolean	False	[VR Network] Enable high-end frequency processing
mdx_segment_size	None	256	[MDX] Segment size. Larger = better quality but more memory
mdx_overlap	number	0.75 Max: 0.99	[MDX] Overlap between segments (0.0-0.99)
mdx_denoise	boolean	False	[MDX] Enable denoising
mdxc_segment_size	None	256	[MDXC] Segment size. Larger = better quality but more memory
mdxc_overlap	None	8	[MDXC] Overlap between segments
mdxc_batch_size	None	1	[MDXC] Batch size for inference

{
  "type": "object",
  "title": "Input",
  "properties": {
    "audio": {
      "type": "string",
      "title": "Audio",
      "format": "uri",
      "x-order": 0,
      "description": "Audio file to separate (upload)"
    },
    "model": {
      "enum": [
        "demucs:hdemucs_mmi",
        "vr_network:1_HP-UVR",
        "mdx:UVR-MDX-NET-Inst_1",
        "mdxc:MDX23C-8KFFT-InstVoc_HQ"
      ],
      "type": "string",
      "title": "model",
      "description": "Model to use for separation",
      "default": "demucs:hdemucs_mmi",
      "x-order": 2
    },
    "audio_url": {
      "type": "string",
      "title": "Audio Url",
      "x-order": 1,
      "description": "URL to audio file (alternative to upload)"
    },
    "mdx_denoise": {
      "type": "boolean",
      "title": "Mdx Denoise",
      "default": false,
      "x-order": 11,
      "description": "[MDX] Enable denoising"
    },
    "mdx_overlap": {
      "type": "number",
      "title": "Mdx Overlap",
      "default": 0.75,
      "maximum": 0.99,
      "minimum": 0,
      "x-order": 10,
      "description": "[MDX] Overlap between segments (0.0-0.99)"
    },
    "output_mode": {
      "enum": [
        "vocals_instrumental",
        "all"
      ],
      "type": "string",
      "title": "output_mode",
      "description": "Output mode: vocals+instrumental or all stems",
      "default": "vocals_instrumental",
      "x-order": 3
    },
    "mdxc_overlap": {
      "enum": [
        "2",
        "4",
        "8",
        "16",
        "32"
      ],
      "type": "string",
      "title": "mdxc_overlap",
      "description": "[MDXC] Overlap between segments",
      "default": "8",
      "x-order": 13
    },
    "output_format": {
      "enum": [
        "mp3",
        "wav",
        "flac"
      ],
      "type": "string",
      "title": "output_format",
      "description": "Output audio format",
      "default": "mp3",
      "x-order": 4
    },
    "vr_batch_size": {
      "enum": [
        "1",
        "2",
        "4",
        "8"
      ],
      "type": "string",
      "title": "vr_batch_size",
      "description": "[VR Network] Batch size for inference",
      "default": "4",
      "x-order": 7
    },
    "vr_window_size": {
      "enum": [
        "320",
        "512",
        "1024"
      ],
      "type": "string",
      "title": "vr_window_size",
      "description": "[VR Network] Window size. Larger = better quality but slower",
      "default": "512",
      "x-order": 6
    },
    "mdxc_batch_size": {
      "enum": [
        "1",
        "2",
        "4",
        "8"
      ],
      "type": "string",
      "title": "mdxc_batch_size",
      "description": "[MDXC] Batch size for inference",
      "default": "1",
      "x-order": 14
    },
    "mdx_segment_size": {
      "enum": [
        "64",
        "128",
        "256",
        "512"
      ],
      "type": "string",
      "title": "mdx_segment_size",
      "description": "[MDX] Segment size. Larger = better quality but more memory",
      "default": "256",
      "x-order": 9
    },
    "mdxc_segment_size": {
      "enum": [
        "64",
        "128",
        "256",
        "512"
      ],
      "type": "string",
      "title": "mdxc_segment_size",
      "description": "[MDXC] Segment size. Larger = better quality but more memory",
      "default": "256",
      "x-order": 12
    },
    "vr_aggressiveness": {
      "type": "number",
      "title": "Vr Aggressiveness",
      "default": 0.05,
      "maximum": 1,
      "minimum": 0,
      "x-order": 5,
      "description": "[VR Network] Aggressiveness (0.0-1.0). Higher = more vocal removal"
    },
    "vr_high_end_process": {
      "type": "boolean",
      "title": "Vr High End Process",
      "default": false,
      "x-order": 8,
      "description": "[VR Network] Enable high-end frequency processing"
    }
  }
}

Output schema

The shape of the response you’ll get when you run this model with an API.

Schema

{
  "type": "object",
  "title": "Output",
  "additionalProperties": {
    "type": "string",
    "format": "uri"
  }
}