tmappdev/cosy_voice_cloner

Public

54 runs

Run tmappdev/cosy_voice_cloner with an API

Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.

Input schema

The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.

Field	Type	Default value	Description
ref_audio	string		Reference audio file (3-10 seconds)
prompt_text	string		Text of the reference audio (optional)
prompt_language	None	粤语	Language of reference audio
text	string		Text to synthesize
text_language	None	粤语	Language of the text to synthesize
how_to_cut	None	按标点符号切	How to split text
top_k	integer	15 Min: 1 Max: 100	GPT top_k parameter
top_p	number	1 Max: 1	GPT top_p parameter
temperature	number	1 Max: 1	GPT temperature parameter
ref_free	boolean	False	Enable reference-free mode
speed	number	1 Min: 0.6 Max: 1.65	Speech speed adjustment
reference_files	array		Optional additional reference files to blend

{
  "type": "object",
  "title": "Input",
  "required": [
    "ref_audio",
    "text"
  ],
  "properties": {
    "text": {
      "type": "string",
      "title": "Text",
      "x-order": 3,
      "description": "Text to synthesize"
    },
    "speed": {
      "type": "number",
      "title": "Speed",
      "default": 1,
      "maximum": 1.65,
      "minimum": 0.6,
      "x-order": 10,
      "description": "Speech speed adjustment"
    },
    "top_k": {
      "type": "integer",
      "title": "Top K",
      "default": 15,
      "maximum": 100,
      "minimum": 1,
      "x-order": 6,
      "description": "GPT top_k parameter"
    },
    "top_p": {
      "type": "number",
      "title": "Top P",
      "default": 1,
      "maximum": 1,
      "minimum": 0,
      "x-order": 7,
      "description": "GPT top_p parameter"
    },
    "ref_free": {
      "type": "boolean",
      "title": "Ref Free",
      "default": false,
      "x-order": 9,
      "description": "Enable reference-free mode"
    },
    "ref_audio": {
      "type": "string",
      "title": "Ref Audio",
      "format": "uri",
      "x-order": 0,
      "description": "Reference audio file (3-10 seconds)"
    },
    "how_to_cut": {
      "enum": [
        "\u4e0d\u5207",
        "\u51d1\u56db\u53e5\u4e00\u5207",
        "\u51d150\u5b57\u4e00\u5207",
        "\u6309\u4e2d\u6587\u53e5\u53f7\u3002\u5207",
        "\u6309\u82f1\u6587\u53e5\u53f7.\u5207",
        "\u6309\u6807\u70b9\u7b26\u53f7\u5207"
      ],
      "type": "string",
      "title": "how_to_cut",
      "description": "How to split text",
      "default": "\u6309\u6807\u70b9\u7b26\u53f7\u5207",
      "x-order": 5
    },
    "prompt_text": {
      "type": "string",
      "title": "Prompt Text",
      "default": "",
      "x-order": 1,
      "description": "Text of the reference audio (optional)"
    },
    "temperature": {
      "type": "number",
      "title": "Temperature",
      "default": 1,
      "maximum": 1,
      "minimum": 0,
      "x-order": 8,
      "description": "GPT temperature parameter"
    },
    "text_language": {
      "enum": [
        "\u4e2d\u6587",
        "\u82f1\u6587",
        "\u65e5\u6587",
        "\u7ca4\u8bed",
        "\u97e9\u6587",
        "\u4e2d\u82f1\u6df7\u5408",
        "\u65e5\u82f1\u6df7\u5408",
        "\u7ca4\u82f1\u6df7\u5408",
        "\u97e9\u82f1\u6df7\u5408",
        "\u591a\u8bed\u79cd\u6df7\u5408",
        "\u591a\u8bed\u79cd\u6df7\u5408(\u7ca4\u8bed)"
      ],
      "type": "string",
      "title": "text_language",
      "description": "Language of the text to synthesize",
      "default": "\u7ca4\u8bed",
      "x-order": 4
    },
    "prompt_language": {
      "enum": [
        "\u4e2d\u6587",
        "\u82f1\u6587",
        "\u65e5\u6587",
        "\u7ca4\u8bed",
        "\u97e9\u6587",
        "\u4e2d\u82f1\u6df7\u5408",
        "\u65e5\u82f1\u6df7\u5408",
        "\u7ca4\u82f1\u6df7\u5408",
        "\u97e9\u82f1\u6df7\u5408",
        "\u591a\u8bed\u79cd\u6df7\u5408",
        "\u591a\u8bed\u79cd\u6df7\u5408(\u7ca4\u8bed)"
      ],
      "type": "string",
      "title": "prompt_language",
      "description": "Language of reference audio",
      "default": "\u7ca4\u8bed",
      "x-order": 2
    },
    "reference_files": {
      "type": "array",
      "items": {
        "type": "string",
        "format": "uri"
      },
      "title": "Reference Files",
      "x-order": 11,
      "description": "Optional additional reference files to blend"
    }
  }
}

Output schema

The shape of the response you’ll get when you run this model with an API.

Schema

{
  "type": "string",
  "title": "Output",
  "format": "uri"
}