aodianyun/lemas-tts
Run aodianyun/lemas-tts with an API
Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.
Input schema
The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.
| Field | Type | Default value | Description |
|---|---|---|---|
| ref_text |
string
|
|
参考文本,用于提取语音特征(如果为空,将使用参考音频自动识别)
|
| ref_audio |
string
|
参考音频文件(WAV格式),用于语音克隆。如果 no_ref_audio=True,此参数将被忽略
|
|
| gen_text |
string
|
要生成的文本内容
|
|
| project |
None
|
multilingual_prosody
|
模型类型
|
| nfe_step |
integer
|
64
Min: 16 Max: 128 |
NFE 步数,控制生成质量(越大质量越好但速度越慢)
|
| use_ema |
boolean
|
True
|
是否使用 EMA(指数移动平均)模型
|
| separate_langs |
boolean
|
True
|
是否分离语言标记
|
| frontend |
None
|
phone
|
前端类型
|
| speed |
number
|
1
Min: 0.5 Max: 1.5 |
语速(0.5-1.5)
|
| cfg_strength |
number
|
5
Max: 10 |
CFG 强度(0-10),控制与参考音频的相似度
|
| use_acc_grl |
boolean
|
True
|
是否使用加速 GRL
|
| ref_ratio |
number
|
1
Max: 1 |
参考比例(0-1),控制参考音频的影响程度
|
| no_ref_audio |
boolean
|
False
|
是否不使用参考音频(仅使用文本)
|
| sway_sampling_coef |
number
|
3
Min: 2 Max: 5 |
采样系数(2-5)
|
| use_prosody_encoder |
boolean
|
False
|
是否使用韵律编码器(仅对 prosody 模型有效)
|
| seed |
integer
|
-1
|
随机种子(-1 表示随机)
|
| use_denoise |
boolean
|
False
|
是否对参考音频进行降噪处理
|
{
"type": "object",
"title": "Input",
"required": [
"ref_audio",
"gen_text"
],
"properties": {
"seed": {
"type": "integer",
"title": "Seed",
"default": -1,
"x-order": 15,
"description": "\u968f\u673a\u79cd\u5b50\uff08-1 \u8868\u793a\u968f\u673a\uff09"
},
"speed": {
"type": "number",
"title": "Speed",
"default": 1,
"maximum": 1.5,
"minimum": 0.5,
"x-order": 8,
"description": "\u8bed\u901f\uff080.5-1.5\uff09"
},
"project": {
"enum": [
"multilingual_grl",
"multilingual_prosody"
],
"type": "string",
"title": "project",
"description": "\u6a21\u578b\u7c7b\u578b",
"default": "multilingual_prosody",
"x-order": 3
},
"use_ema": {
"type": "boolean",
"title": "Use Ema",
"default": true,
"x-order": 5,
"description": "\u662f\u5426\u4f7f\u7528 EMA\uff08\u6307\u6570\u79fb\u52a8\u5e73\u5747\uff09\u6a21\u578b"
},
"frontend": {
"enum": [
"phone"
],
"type": "string",
"title": "frontend",
"description": "\u524d\u7aef\u7c7b\u578b",
"default": "phone",
"x-order": 7
},
"gen_text": {
"type": "string",
"title": "Gen Text",
"x-order": 2,
"description": "\u8981\u751f\u6210\u7684\u6587\u672c\u5185\u5bb9"
},
"nfe_step": {
"type": "integer",
"title": "Nfe Step",
"default": 64,
"maximum": 128,
"minimum": 16,
"x-order": 4,
"description": "NFE \u6b65\u6570\uff0c\u63a7\u5236\u751f\u6210\u8d28\u91cf\uff08\u8d8a\u5927\u8d28\u91cf\u8d8a\u597d\u4f46\u901f\u5ea6\u8d8a\u6162\uff09"
},
"ref_text": {
"type": "string",
"title": "Ref Text",
"default": "",
"x-order": 0,
"description": "\u53c2\u8003\u6587\u672c\uff0c\u7528\u4e8e\u63d0\u53d6\u8bed\u97f3\u7279\u5f81\uff08\u5982\u679c\u4e3a\u7a7a\uff0c\u5c06\u4f7f\u7528\u53c2\u8003\u97f3\u9891\u81ea\u52a8\u8bc6\u522b\uff09"
},
"ref_audio": {
"type": "string",
"title": "Ref Audio",
"format": "uri",
"x-order": 1,
"description": "\u53c2\u8003\u97f3\u9891\u6587\u4ef6\uff08WAV\u683c\u5f0f\uff09\uff0c\u7528\u4e8e\u8bed\u97f3\u514b\u9686\u3002\u5982\u679c no_ref_audio=True\uff0c\u6b64\u53c2\u6570\u5c06\u88ab\u5ffd\u7565"
},
"ref_ratio": {
"type": "number",
"title": "Ref Ratio",
"default": 1,
"maximum": 1,
"minimum": 0,
"x-order": 11,
"description": "\u53c2\u8003\u6bd4\u4f8b\uff080-1\uff09\uff0c\u63a7\u5236\u53c2\u8003\u97f3\u9891\u7684\u5f71\u54cd\u7a0b\u5ea6"
},
"use_acc_grl": {
"type": "boolean",
"title": "Use Acc Grl",
"default": true,
"x-order": 10,
"description": "\u662f\u5426\u4f7f\u7528\u52a0\u901f GRL"
},
"use_denoise": {
"type": "boolean",
"title": "Use Denoise",
"default": false,
"x-order": 16,
"description": "\u662f\u5426\u5bf9\u53c2\u8003\u97f3\u9891\u8fdb\u884c\u964d\u566a\u5904\u7406"
},
"cfg_strength": {
"type": "number",
"title": "Cfg Strength",
"default": 5,
"maximum": 10,
"minimum": 0,
"x-order": 9,
"description": "CFG \u5f3a\u5ea6\uff080-10\uff09\uff0c\u63a7\u5236\u4e0e\u53c2\u8003\u97f3\u9891\u7684\u76f8\u4f3c\u5ea6"
},
"no_ref_audio": {
"type": "boolean",
"title": "No Ref Audio",
"default": false,
"x-order": 12,
"description": "\u662f\u5426\u4e0d\u4f7f\u7528\u53c2\u8003\u97f3\u9891\uff08\u4ec5\u4f7f\u7528\u6587\u672c\uff09"
},
"separate_langs": {
"type": "boolean",
"title": "Separate Langs",
"default": true,
"x-order": 6,
"description": "\u662f\u5426\u5206\u79bb\u8bed\u8a00\u6807\u8bb0"
},
"sway_sampling_coef": {
"type": "number",
"title": "Sway Sampling Coef",
"default": 3,
"maximum": 5,
"minimum": 2,
"x-order": 13,
"description": "\u91c7\u6837\u7cfb\u6570\uff082-5\uff09"
},
"use_prosody_encoder": {
"type": "boolean",
"title": "Use Prosody Encoder",
"default": false,
"x-order": 14,
"description": "\u662f\u5426\u4f7f\u7528\u97f5\u5f8b\u7f16\u7801\u5668\uff08\u4ec5\u5bf9 prosody \u6a21\u578b\u6709\u6548\uff09"
}
}
}
Output schema
The shape of the response you’ll get when you run this model with an API.
{
"type": "string",
"title": "Output",
"format": "uri"
}