aodianyun/f5-tts-thai
Run aodianyun/f5-tts-thai with an API
Use one of our client libraries to get started quickly. Clicking on a library will take you to the Playground tab where you can tweak different inputs, see the results, and copy the corresponding code to use in your own project.
Input schema
The fields you can use to run this model with an API. If you don't give a value for a field its default value will be used.
| Field | Type | Default value | Description |
|---|---|---|---|
| audio |
string
|
参考音频文件(WAV/MP3)。当 no_ref_audio=True 时此参数将被忽略。
|
|
| text |
string
|
要生成的文本内容
|
|
| ref_text |
string
|
|
参考文本(可选;为空则自动识别参考音频)
|
| remove_silence |
boolean
|
True
|
是否移除生成音频中的静音段
|
| speed |
number
|
1
Min: 0.3 Max: 2 |
语速(0.3-2.0)
|
| nfe_step |
integer
|
32
Min: 4 Max: 64 |
NFE 步数(越大质量越好但越慢)
|
| cross_fade_duration |
number
|
0.15
Max: 1 |
分段拼接淡入淡出(秒)
|
| cfg_strength |
number
|
2
Max: 10 |
CFG 强度(控制与参考音频相似度)
|
| sway_sampling_coef |
number
|
-1
Min: -5 Max: 5 |
采样系数(建议 -1 或 2-5)
|
| max_chars |
integer
|
250
Min: 50 Max: 800 |
长文本分段最大字节数(越小越稳但越慢)
|
| seed |
integer
|
-1
|
随机种子(-1 表示随机)
|
| no_ref_audio |
boolean
|
False
|
是否不使用参考音频(纯文本生成)
|
{
"type": "object",
"title": "Input",
"required": [
"audio",
"text"
],
"properties": {
"seed": {
"type": "integer",
"title": "Seed",
"default": -1,
"x-order": 10,
"description": "\u968f\u673a\u79cd\u5b50\uff08-1 \u8868\u793a\u968f\u673a\uff09"
},
"text": {
"type": "string",
"title": "Text",
"x-order": 1,
"description": "\u8981\u751f\u6210\u7684\u6587\u672c\u5185\u5bb9"
},
"audio": {
"type": "string",
"title": "Audio",
"format": "uri",
"x-order": 0,
"description": "\u53c2\u8003\u97f3\u9891\u6587\u4ef6\uff08WAV/MP3\uff09\u3002\u5f53 no_ref_audio=True \u65f6\u6b64\u53c2\u6570\u5c06\u88ab\u5ffd\u7565\u3002"
},
"speed": {
"type": "number",
"title": "Speed",
"default": 1,
"maximum": 2,
"minimum": 0.3,
"x-order": 4,
"description": "\u8bed\u901f\uff080.3-2.0\uff09"
},
"nfe_step": {
"type": "integer",
"title": "Nfe Step",
"default": 32,
"maximum": 64,
"minimum": 4,
"x-order": 5,
"description": "NFE \u6b65\u6570\uff08\u8d8a\u5927\u8d28\u91cf\u8d8a\u597d\u4f46\u8d8a\u6162\uff09"
},
"ref_text": {
"type": "string",
"title": "Ref Text",
"default": "",
"x-order": 2,
"description": "\u53c2\u8003\u6587\u672c\uff08\u53ef\u9009\uff1b\u4e3a\u7a7a\u5219\u81ea\u52a8\u8bc6\u522b\u53c2\u8003\u97f3\u9891\uff09"
},
"max_chars": {
"type": "integer",
"title": "Max Chars",
"default": 250,
"maximum": 800,
"minimum": 50,
"x-order": 9,
"description": "\u957f\u6587\u672c\u5206\u6bb5\u6700\u5927\u5b57\u8282\u6570\uff08\u8d8a\u5c0f\u8d8a\u7a33\u4f46\u8d8a\u6162\uff09"
},
"cfg_strength": {
"type": "number",
"title": "Cfg Strength",
"default": 2,
"maximum": 10,
"minimum": 0,
"x-order": 7,
"description": "CFG \u5f3a\u5ea6\uff08\u63a7\u5236\u4e0e\u53c2\u8003\u97f3\u9891\u76f8\u4f3c\u5ea6\uff09"
},
"no_ref_audio": {
"type": "boolean",
"title": "No Ref Audio",
"default": false,
"x-order": 11,
"description": "\u662f\u5426\u4e0d\u4f7f\u7528\u53c2\u8003\u97f3\u9891\uff08\u7eaf\u6587\u672c\u751f\u6210\uff09"
},
"remove_silence": {
"type": "boolean",
"title": "Remove Silence",
"default": true,
"x-order": 3,
"description": "\u662f\u5426\u79fb\u9664\u751f\u6210\u97f3\u9891\u4e2d\u7684\u9759\u97f3\u6bb5"
},
"sway_sampling_coef": {
"type": "number",
"title": "Sway Sampling Coef",
"default": -1,
"maximum": 5,
"minimum": -5,
"x-order": 8,
"description": "\u91c7\u6837\u7cfb\u6570\uff08\u5efa\u8bae -1 \u6216 2-5\uff09"
},
"cross_fade_duration": {
"type": "number",
"title": "Cross Fade Duration",
"default": 0.15,
"maximum": 1,
"minimum": 0,
"x-order": 6,
"description": "\u5206\u6bb5\u62fc\u63a5\u6de1\u5165\u6de1\u51fa\uff08\u79d2\uff09"
}
}
}
Output schema
The shape of the response you’ll get when you run this model with an API.
{
"type": "string",
"title": "Output",
"format": "uri"
}