owkai/voice-cloning-yt | API reference

owkai / voice-cloning-yt

Public
3 runs

Run owkai/voice-cloning-yt with an API

Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.

Input schema

The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.

Field	Type	Default value	Description
song_input	string		your youtube audio link here. like: https://www.youtube.com/watch?v=3KFvoDDs0XM
rvc_model	string (enum)	Squidward Options: Squidward, MrKrabs, Plankton, Drake, Vader, Trump, Biden, Obama, Guitar, Voilin, CUSTOM, SamA	RVC model for a specific voice. If using a custom model, this should match the name of the downloaded model. If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.
custom_rvc_model_download_url	string		URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value.
pitch_change	string (enum)	no-change Options: no-change, male-to-female, female-to-male	Adjust pitch of AI vocals. Options: `no-change`, `male-to-female`, `female-to-male`.
index_rate	number	0.5 Max: 1	Control how much of the AI's accent to leave in the vocals.
filter_radius	integer	3 Max: 7	If >=3: apply median filtering median filtering to the harvested pitch results.
rms_mix_rate	number	0.25 Max: 1	Control how much to use the original vocal's loudness (0) or a fixed loudness (1).
pitch_detection_algorithm	string (enum)	rmvpe Options: rmvpe, mangio-crepe	Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).
crepe_hop_length	integer	128	When `pitch_detection_algo` is set to `mangio-crepe`, this controls how often it checks for pitch changes in milliseconds. Lower values lead to longer conversions and higher risk of voice cracks, but better pitch accuracy.
protect	number	0.33 Max: 0.5	Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable.
main_vocals_volume_change	number	0	Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels.
backup_vocals_volume_change	number	0	Control volume of backup AI vocals.
instrumental_volume_change	number	0	Control volume of the background music/instrumentals.
pitch_change_all	number	0	Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly.
reverb_size	number	0.15 Max: 1	The larger the room, the longer the reverb time.
reverb_wetness	number	0.2 Max: 1	Level of AI vocals with reverb.
reverb_dryness	number	0.8 Max: 1	Level of AI vocals without reverb.
reverb_damping	number	0.7 Max: 1	Absorption of high frequencies in the reverb.
output_format	string (enum)	mp3 Options: mp3, wav	wav for best quality and large file size, mp3 for decent quality and small file size.

{
  "type": "object",
  "title": "Input",
  "properties": {
    "protect": {
      "type": "number",
      "title": "Protect",
      "default": 0.33,
      "maximum": 0.5,
      "minimum": 0,
      "x-order": 9,
      "description": "Control how much of the original vocals' breath and voiceless consonants to leave in the AI vocals. Set 0.5 to disable."
    },
    "rvc_model": {
      "enum": [
        "Squidward",
        "MrKrabs",
        "Plankton",
        "Drake",
        "Vader",
        "Trump",
        "Biden",
        "Obama",
        "Guitar",
        "Voilin",
        "CUSTOM",
        "SamA"
      ],
      "type": "string",
      "title": "rvc_model",
      "description": "RVC model for a specific voice. If using a custom model, this should match the name of the downloaded model. If a 'custom_rvc_model_download_url' is provided, this will be automatically set to the name of the downloaded model.",
      "default": "Squidward",
      "x-order": 1
    },
    "index_rate": {
      "type": "number",
      "title": "Index Rate",
      "default": 0.5,
      "maximum": 1,
      "minimum": 0,
      "x-order": 4,
      "description": "Control how much of the AI's accent to leave in the vocals."
    },
    "song_input": {
      "type": "string",
      "title": "Song Input",
      "x-order": 0,
      "description": "your youtube audio link here. like: https://www.youtube.com/watch?v=3KFvoDDs0XM"
    },
    "reverb_size": {
      "type": "number",
      "title": "Reverb Size",
      "default": 0.15,
      "maximum": 1,
      "minimum": 0,
      "x-order": 14,
      "description": "The larger the room, the longer the reverb time."
    },
    "pitch_change": {
      "enum": [
        "no-change",
        "male-to-female",
        "female-to-male"
      ],
      "type": "string",
      "title": "pitch_change",
      "description": "Adjust pitch of AI vocals. Options: `no-change`, `male-to-female`, `female-to-male`.",
      "default": "no-change",
      "x-order": 3
    },
    "rms_mix_rate": {
      "type": "number",
      "title": "Rms Mix Rate",
      "default": 0.25,
      "maximum": 1,
      "minimum": 0,
      "x-order": 6,
      "description": "Control how much to use the original vocal's loudness (0) or a fixed loudness (1)."
    },
    "filter_radius": {
      "type": "integer",
      "title": "Filter Radius",
      "default": 3,
      "maximum": 7,
      "minimum": 0,
      "x-order": 5,
      "description": "If >=3: apply median filtering median filtering to the harvested pitch results."
    },
    "output_format": {
      "enum": [
        "mp3",
        "wav"
      ],
      "type": "string",
      "title": "output_format",
      "description": "wav for best quality and large file size, mp3 for decent quality and small file size.",
      "default": "mp3",
      "x-order": 18
    },
    "reverb_damping": {
      "type": "number",
      "title": "Reverb Damping",
      "default": 0.7,
      "maximum": 1,
      "minimum": 0,
      "x-order": 17,
      "description": "Absorption of high frequencies in the reverb."
    },
    "reverb_dryness": {
      "type": "number",
      "title": "Reverb Dryness",
      "default": 0.8,
      "maximum": 1,
      "minimum": 0,
      "x-order": 16,
      "description": "Level of AI vocals without reverb."
    },
    "reverb_wetness": {
      "type": "number",
      "title": "Reverb Wetness",
      "default": 0.2,
      "maximum": 1,
      "minimum": 0,
      "x-order": 15,
      "description": "Level of AI vocals with reverb."
    },
    "crepe_hop_length": {
      "type": "integer",
      "title": "Crepe Hop Length",
      "default": 128,
      "x-order": 8,
      "description": "When `pitch_detection_algo` is set to `mangio-crepe`, this controls how often it checks for pitch changes in milliseconds. Lower values lead to longer conversions and higher risk of voice cracks, but better pitch accuracy."
    },
    "pitch_change_all": {
      "type": "number",
      "title": "Pitch Change All",
      "default": 0,
      "x-order": 13,
      "description": "Change pitch/key of background music, backup vocals and AI vocals in semitones. Reduces sound quality slightly."
    },
    "main_vocals_volume_change": {
      "type": "number",
      "title": "Main Vocals Volume Change",
      "default": 0,
      "x-order": 10,
      "description": "Control volume of main AI vocals. Use -3 to decrease the volume by 3 decibels, or 3 to increase the volume by 3 decibels."
    },
    "pitch_detection_algorithm": {
      "enum": [
        "rmvpe",
        "mangio-crepe"
      ],
      "type": "string",
      "title": "pitch_detection_algorithm",
      "description": "Best option is rmvpe (clarity in vocals), then mangio-crepe (smoother vocals).",
      "default": "rmvpe",
      "x-order": 7
    },
    "instrumental_volume_change": {
      "type": "number",
      "title": "Instrumental Volume Change",
      "default": 0,
      "x-order": 12,
      "description": "Control volume of the background music/instrumentals."
    },
    "backup_vocals_volume_change": {
      "type": "number",
      "title": "Backup Vocals Volume Change",
      "default": 0,
      "x-order": 11,
      "description": "Control volume of backup AI vocals."
    },
    "custom_rvc_model_download_url": {
      "type": "string",
      "title": "Custom Rvc Model Download Url",
      "x-order": 2,
      "description": "URL to download a custom RVC model. If provided, the model will be downloaded (if it doesn't already exist) and used for prediction, regardless of the 'rvc_model' value."
    }
  }
}

Output schema

The shape of the response you’ll get when you run this model with an API.

Schema

{
  "type": "string",
  "title": "Output",
  "format": "uri"
}