aodianyun/lemas-tts | API reference

Public

762 runs

Run aodianyun/lemas-tts with an API

Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.

Input schema

The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.

Field	Type	Default value	Description
ref_text	string		参考文本，用于提取语音特征（如果为空，将使用参考音频自动识别）
ref_audio	string		参考音频文件（WAV格式），用于语音克隆。如果 no_ref_audio=True，此参数将被忽略
gen_text	string		要生成的文本内容
project	None	multilingual_prosody	模型类型
nfe_step	integer	64 Min: 16 Max: 128	NFE 步数，控制生成质量（越大质量越好但速度越慢）
use_ema	boolean	True	是否使用 EMA（指数移动平均）模型
separate_langs	boolean	True	是否分离语言标记
frontend	None	phone	前端类型
speed	number	1 Min: 0.5 Max: 1.5	语速（0.5-1.5）
cfg_strength	number	5 Max: 10	CFG 强度（0-10），控制与参考音频的相似度
use_acc_grl	boolean	True	是否使用加速 GRL
ref_ratio	number	1 Max: 1	参考比例（0-1），控制参考音频的影响程度
no_ref_audio	boolean	False	是否不使用参考音频（仅使用文本）
sway_sampling_coef	number	3 Min: 2 Max: 5	采样系数（2-5）
use_prosody_encoder	boolean	False	是否使用韵律编码器（仅对 prosody 模型有效）
seed	integer	-1	随机种子（-1 表示随机）
use_denoise	boolean	False	是否对参考音频进行降噪处理

{
  "type": "object",
  "title": "Input",
  "required": [
    "ref_audio",
    "gen_text"
  ],
  "properties": {
    "seed": {
      "type": "integer",
      "title": "Seed",
      "default": -1,
      "x-order": 15,
      "description": "\u968f\u673a\u79cd\u5b50\uff08-1 \u8868\u793a\u968f\u673a\uff09"
    },
    "speed": {
      "type": "number",
      "title": "Speed",
      "default": 1,
      "maximum": 1.5,
      "minimum": 0.5,
      "x-order": 8,
      "description": "\u8bed\u901f\uff080.5-1.5\uff09"
    },
    "project": {
      "enum": [
        "multilingual_grl",
        "multilingual_prosody"
      ],
      "type": "string",
      "title": "project",
      "description": "\u6a21\u578b\u7c7b\u578b",
      "default": "multilingual_prosody",
      "x-order": 3
    },
    "use_ema": {
      "type": "boolean",
      "title": "Use Ema",
      "default": true,
      "x-order": 5,
      "description": "\u662f\u5426\u4f7f\u7528 EMA\uff08\u6307\u6570\u79fb\u52a8\u5e73\u5747\uff09\u6a21\u578b"
    },
    "frontend": {
      "enum": [
        "phone"
      ],
      "type": "string",
      "title": "frontend",
      "description": "\u524d\u7aef\u7c7b\u578b",
      "default": "phone",
      "x-order": 7
    },
    "gen_text": {
      "type": "string",
      "title": "Gen Text",
      "x-order": 2,
      "description": "\u8981\u751f\u6210\u7684\u6587\u672c\u5185\u5bb9"
    },
    "nfe_step": {
      "type": "integer",
      "title": "Nfe Step",
      "default": 64,
      "maximum": 128,
      "minimum": 16,
      "x-order": 4,
      "description": "NFE \u6b65\u6570\uff0c\u63a7\u5236\u751f\u6210\u8d28\u91cf\uff08\u8d8a\u5927\u8d28\u91cf\u8d8a\u597d\u4f46\u901f\u5ea6\u8d8a\u6162\uff09"
    },
    "ref_text": {
      "type": "string",
      "title": "Ref Text",
      "default": "",
      "x-order": 0,
      "description": "\u53c2\u8003\u6587\u672c\uff0c\u7528\u4e8e\u63d0\u53d6\u8bed\u97f3\u7279\u5f81\uff08\u5982\u679c\u4e3a\u7a7a\uff0c\u5c06\u4f7f\u7528\u53c2\u8003\u97f3\u9891\u81ea\u52a8\u8bc6\u522b\uff09"
    },
    "ref_audio": {
      "type": "string",
      "title": "Ref Audio",
      "format": "uri",
      "x-order": 1,
      "description": "\u53c2\u8003\u97f3\u9891\u6587\u4ef6\uff08WAV\u683c\u5f0f\uff09\uff0c\u7528\u4e8e\u8bed\u97f3\u514b\u9686\u3002\u5982\u679c no_ref_audio=True\uff0c\u6b64\u53c2\u6570\u5c06\u88ab\u5ffd\u7565"
    },
    "ref_ratio": {
      "type": "number",
      "title": "Ref Ratio",
      "default": 1,
      "maximum": 1,
      "minimum": 0,
      "x-order": 11,
      "description": "\u53c2\u8003\u6bd4\u4f8b\uff080-1\uff09\uff0c\u63a7\u5236\u53c2\u8003\u97f3\u9891\u7684\u5f71\u54cd\u7a0b\u5ea6"
    },
    "use_acc_grl": {
      "type": "boolean",
      "title": "Use Acc Grl",
      "default": true,
      "x-order": 10,
      "description": "\u662f\u5426\u4f7f\u7528\u52a0\u901f GRL"
    },
    "use_denoise": {
      "type": "boolean",
      "title": "Use Denoise",
      "default": false,
      "x-order": 16,
      "description": "\u662f\u5426\u5bf9\u53c2\u8003\u97f3\u9891\u8fdb\u884c\u964d\u566a\u5904\u7406"
    },
    "cfg_strength": {
      "type": "number",
      "title": "Cfg Strength",
      "default": 5,
      "maximum": 10,
      "minimum": 0,
      "x-order": 9,
      "description": "CFG \u5f3a\u5ea6\uff080-10\uff09\uff0c\u63a7\u5236\u4e0e\u53c2\u8003\u97f3\u9891\u7684\u76f8\u4f3c\u5ea6"
    },
    "no_ref_audio": {
      "type": "boolean",
      "title": "No Ref Audio",
      "default": false,
      "x-order": 12,
      "description": "\u662f\u5426\u4e0d\u4f7f\u7528\u53c2\u8003\u97f3\u9891\uff08\u4ec5\u4f7f\u7528\u6587\u672c\uff09"
    },
    "separate_langs": {
      "type": "boolean",
      "title": "Separate Langs",
      "default": true,
      "x-order": 6,
      "description": "\u662f\u5426\u5206\u79bb\u8bed\u8a00\u6807\u8bb0"
    },
    "sway_sampling_coef": {
      "type": "number",
      "title": "Sway Sampling Coef",
      "default": 3,
      "maximum": 5,
      "minimum": 2,
      "x-order": 13,
      "description": "\u91c7\u6837\u7cfb\u6570\uff082-5\uff09"
    },
    "use_prosody_encoder": {
      "type": "boolean",
      "title": "Use Prosody Encoder",
      "default": false,
      "x-order": 14,
      "description": "\u662f\u5426\u4f7f\u7528\u97f5\u5f8b\u7f16\u7801\u5668\uff08\u4ec5\u5bf9 prosody \u6a21\u578b\u6709\u6548\uff09"
    }
  }
}

Output schema

The shape of the response you’ll get when you run this model with an API.

Schema

{
  "type": "string",
  "title": "Output",
  "format": "uri"
}