typetext
{
"cfg_weight": 0.5,
"exaggeration": 0.5,
"prompt": "We're excited to introduce Chatterbox, our first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.\n\nWhether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out. Try it now on our Hugging Face Gradio app.\n\nIf you like the model but need to scale or finetune it for higher accuracy, check out our competitively priced TTS service (link). It delivers reliable performance with ultra-low latency of sub 200ms—ideal for production use in agents, applications, or interactive media.",
"seed": 0,
"temperature": 0.8
}npm install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_ebV**********************************
This is your API token. Keep it to yourself.
import Replicate from "replicate";
import fs from "node:fs";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run resemble-ai/chatterbox using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const input = {
cfg_weight: 0.5,
exaggeration: 0.5,
prompt: "We're excited to introduce Chatterbox, our first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.\n\nWhether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out. Try it now on our Hugging Face Gradio app.\n\nIf you like the model but need to scale or finetune it for higher accuracy, check out our competitively priced TTS service (link). It delivers reliable performance with ultra-low latency of sub 200ms—ideal for production use in agents, applications, or interactive media.",
seed: 0,
temperature: 0.8
};
const output = await replicate.run("resemble-ai/chatterbox", { input });
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_ebV**********************************
This is your API token. Keep it to yourself.
import replicate
Run resemble-ai/chatterbox using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"resemble-ai/chatterbox",
input={
"cfg_weight": 0.5,
"exaggeration": 0.5,
"prompt": "We're excited to introduce Chatterbox, our first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.\n\nWhether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out. Try it now on our Hugging Face Gradio app.\n\nIf you like the model but need to scale or finetune it for higher accuracy, check out our competitively priced TTS service (link). It delivers reliable performance with ultra-low latency of sub 200ms—ideal for production use in agents, applications, or interactive media.",
"seed": 0,
"temperature": 0.8
}
)
# To access the file URL:
print(output.url())
#=> "http://example.com"
# To write the file to disk:
with open("my-image.png", "wb") as file:
file.write(output.read())
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_ebV**********************************
This is your API token. Keep it to yourself.
Run resemble-ai/chatterbox using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"input": {
"cfg_weight": 0.5,
"exaggeration": 0.5,
"prompt": "We\'re excited to introduce Chatterbox, our first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.\\n\\nWhether you\'re working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It\'s also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out. Try it now on our Hugging Face Gradio app.\\n\\nIf you like the model but need to scale or finetune it for higher accuracy, check out our competitively priced TTS service (link). It delivers reliable performance with ultra-low latency of sub 200ms—ideal for production use in agents, applications, or interactive media.",
"seed": 0,
"temperature": 0.8
}
}' \
https://api.replicate.com/v1/models/resemble-ai/chatterbox/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
{
"id": "g7r2drc7psrmc0cqckasg1z4ec",
"model": "resemble-ai/chatterbox",
"version": "hidden",
"input": {
"cfg_weight": 0.5,
"exaggeration": 0.5,
"prompt": "We're excited to introduce Chatterbox, our first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.\n\nWhether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out. Try it now on our Hugging Face Gradio app.\n\nIf you like the model but need to scale or finetune it for higher accuracy, check out our competitively priced TTS service (link). It delivers reliable performance with ultra-low latency of sub 200ms—ideal for production use in agents, applications, or interactive media.",
"seed": 0,
"temperature": 0.8
},
"logs": "Using seed: 35127\nPrompt: We're excited to introduce Chatterbox, our first production-grade open source TTS model. Licensed under MIT, Chatterbox has been benchmarked against leading closed-source systems like ElevenLabs, and is consistently preferred in side-by-side evaluations.\nWhether you're working on memes, videos, games, or AI agents, Chatterbox brings your content to life. It's also the first open source TTS model to support emotion exaggeration control, a powerful feature that makes your voices stand out. Try it now on our Hugging Face Gradio app.\nIf you like the model but need to scale or finetune it for higher accuracy, check out our competitively priced TTS service (link). It delivers reliable performance with ultra-low latency of sub 200ms—ideal for production use in agents, applications, or interactive media.\n/root/.pyenv/versions/3.11.10/lib/python3.11/site-packages/cog/server/scope.py:22: ExperimentalFeatureWarning: current_scope is an experimental internal function. It may change or be removed without warning.\nwarnings.warn(\n/root/.pyenv/versions/3.11.10/lib/python3.11/contextlib.py:105: FutureWarning: `torch.backends.cuda.sdp_kernel()` is deprecated. In the future, this context manager will be removed. Please see `torch.nn.attention.sdpa_kernel()` for the new context manager, with updated signature.\nself.gen = func(*args, **kwds)\nSampling: 0%| | 0/1000 [00:00<?, ?it/s]\nSampling: 1%| | 6/1000 [00:00<00:18, 55.18it/s]\nSampling: 2%|▏ | 15/1000 [00:00<00:13, 74.76it/s]\nSampling: 2%|▏ | 24/1000 [00:00<00:12, 81.19it/s]\nSampling: 3%|▎ | 33/1000 [00:00<00:11, 83.63it/s]\nSampling: 4%|▍ | 42/1000 [00:00<00:11, 85.73it/s]\nSampling: 5%|▌ | 51/1000 [00:00<00:10, 87.05it/s]\nSampling: 6%|▌ | 61/1000 [00:00<00:10, 88.09it/s]\nSampling: 7%|▋ | 70/1000 [00:00<00:10, 88.63it/s]\nSampling: 8%|▊ | 79/1000 [00:00<00:10, 88.90it/s]\nSampling: 9%|▉ | 88/1000 [00:01<00:10, 89.23it/s]\nSampling: 10%|▉ | 98/1000 [00:01<00:10, 89.51it/s]\nSampling: 11%|█ | 107/1000 [00:01<00:10, 88.86it/s]\nSampling: 12%|█▏ | 116/1000 [00:01<00:09, 88.84it/s]\nSampling: 12%|█▎ | 125/1000 [00:01<00:10, 85.84it/s]\nSampling: 13%|█▎ | 134/1000 [00:01<00:09, 86.92it/s]\nSampling: 14%|█▍ | 143/1000 [00:01<00:09, 87.78it/s]\nSampling: 15%|█▌ | 153/1000 [00:01<00:09, 88.63it/s]\nSampling: 16%|█▋ | 163/1000 [00:01<00:09, 89.36it/s]\nSampling: 17%|█▋ | 172/1000 [00:01<00:09, 89.25it/s]\nSampling: 18%|█▊ | 181/1000 [00:02<00:09, 89.10it/s]\nSampling: 19%|█▉ | 190/1000 [00:02<00:09, 88.12it/s]\nSampling: 20%|█▉ | 199/1000 [00:02<00:09, 88.61it/s]\nSampling: 21%|██ | 208/1000 [00:02<00:08, 88.88it/s]\nSampling: 22%|██▏ | 217/1000 [00:02<00:08, 88.95it/s]\nSampling: 23%|██▎ | 226/1000 [00:02<00:09, 85.92it/s]\nSampling: 24%|██▎ | 235/1000 [00:02<00:08, 85.71it/s]\nSampling: 24%|██▍ | 245/1000 [00:02<00:08, 87.17it/s]\nSampling: 26%|██▌ | 255/1000 [00:02<00:08, 88.27it/s]\nSampling: 26%|██▋ | 265/1000 [00:03<00:08, 89.05it/s]\nSampling: 27%|██▋ | 274/1000 [00:03<00:08, 89.05it/s]\nSampling: 28%|██▊ | 284/1000 [00:03<00:07, 89.58it/s]\nSampling: 29%|██▉ | 294/1000 [00:03<00:07, 89.95it/s]\nSampling: 30%|███ | 303/1000 [00:03<00:07, 89.50it/s]\nSampling: 31%|███ | 312/1000 [00:03<00:07, 89.21it/s]\nSampling: 32%|███▏ | 322/1000 [00:03<00:07, 88.06it/s]\nSampling: 33%|███▎ | 331/1000 [00:03<00:08, 82.01it/s]\nSampling: 34%|███▍ | 340/1000 [00:03<00:08, 76.62it/s]\nSampling: 35%|███▍ | 349/1000 [00:04<00:08, 79.28it/s]\nSampling: 36%|███▌ | 358/1000 [00:04<00:07, 82.15it/s]\nSampling: 37%|███▋ | 368/1000 [00:04<00:07, 84.76it/s]\nSampling: 38%|███▊ | 377/1000 [00:04<00:07, 85.59it/s]\nSampling: 39%|███▊ | 386/1000 [00:04<00:07, 86.24it/s]\nSampling: 40%|███▉ | 395/1000 [00:04<00:06, 87.28it/s]\nSampling: 40%|████ | 404/1000 [00:04<00:06, 87.98it/s]\nSampling: 41%|████▏ | 414/1000 [00:04<00:06, 88.65it/s]\nSampling: 42%|████▏ | 423/1000 [00:04<00:06, 88.67it/s]\nSampling: 43%|████▎ | 432/1000 [00:04<00:06, 87.79it/s]\nSampling: 44%|████▍ | 441/1000 [00:05<00:06, 81.04it/s]\nSampling: 45%|████▌ | 450/1000 [00:05<00:06, 83.47it/s]\nSampling: 46%|████▌ | 459/1000 [00:05<00:06, 84.75it/s]\nSampling: 47%|████▋ | 468/1000 [00:05<00:06, 85.95it/s]\nSampling: 48%|████▊ | 477/1000 [00:05<00:06, 86.40it/s]\nSampling: 49%|████▊ | 486/1000 [00:05<00:05, 86.96it/s]\nSampling: 50%|████▉ | 495/1000 [00:05<00:06, 84.11it/s]\nSampling: 50%|█████ | 504/1000 [00:05<00:06, 79.72it/s]\nSampling: 51%|█████▏ | 513/1000 [00:05<00:06, 75.46it/s]\nSampling: 52%|█████▏ | 521/1000 [00:06<00:06, 74.52it/s]\nSampling: 53%|█████▎ | 529/1000 [00:06<00:06, 74.46it/s]\nSampling: 54%|█████▍ | 538/1000 [00:06<00:05, 77.79it/s]\nSampling: 55%|█████▍ | 547/1000 [00:06<00:05, 79.97it/s]\nSampling: 56%|█████▌ | 556/1000 [00:06<00:05, 81.23it/s]\nSampling: 56%|█████▋ | 565/1000 [00:06<00:05, 82.66it/s]\nSampling: 57%|█████▋ | 574/1000 [00:06<00:05, 83.38it/s]\nSampling: 58%|█████▊ | 583/1000 [00:06<00:04, 84.27it/s]\nSampling: 59%|█████▉ | 592/1000 [00:06<00:04, 85.06it/s]\nSampling: 60%|██████ | 601/1000 [00:07<00:04, 84.42it/s]\nSampling: 61%|██████ | 610/1000 [00:07<00:04, 84.80it/s]\nSampling: 62%|██████▏ | 619/1000 [00:07<00:04, 84.83it/s]\nSampling: 63%|██████▎ | 628/1000 [00:07<00:04, 84.92it/s]\nSampling: 64%|██████▎ | 637/1000 [00:07<00:04, 84.61it/s]\nSampling: 65%|██████▍ | 646/1000 [00:07<00:04, 85.21it/s]\nSampling: 66%|██████▌ | 655/1000 [00:07<00:04, 85.73it/s]\nSampling: 66%|██████▋ | 664/1000 [00:07<00:03, 84.85it/s]\nSampling: 67%|██████▋ | 673/1000 [00:07<00:03, 85.35it/s]\nSampling: 68%|██████▊ | 682/1000 [00:08<00:03, 85.06it/s]\nSampling: 69%|██████▉ | 691/1000 [00:08<00:03, 85.40it/s]\nSampling: 70%|███████ | 700/1000 [00:08<00:03, 85.67it/s]\nSampling: 71%|███████ | 709/1000 [00:08<00:03, 86.28it/s]\nSampling: 72%|███████▏ | 718/1000 [00:08<00:03, 87.08it/s]\nSampling: 73%|███████▎ | 727/1000 [00:08<00:03, 87.74it/s]\nSampling: 74%|███████▎ | 736/1000 [00:08<00:03, 87.56it/s]\nSampling: 74%|███████▍ | 745/1000 [00:08<00:02, 87.25it/s]\nSampling: 75%|███████▌ | 754/1000 [00:08<00:02, 86.45it/s]\nSampling: 76%|███████▋ | 763/1000 [00:08<00:02, 85.54it/s]\nSampling: 77%|███████▋ | 772/1000 [00:09<00:02, 85.73it/s]\nSampling: 78%|███████▊ | 781/1000 [00:09<00:02, 86.06it/s]\nSampling: 79%|███████▉ | 790/1000 [00:09<00:02, 74.94it/s]\nSampling: 80%|███████▉ | 799/1000 [00:09<00:02, 76.75it/s]\nSampling: 81%|████████ | 808/1000 [00:09<00:02, 79.31it/s]\nSampling: 82%|████████▏ | 817/1000 [00:09<00:02, 81.78it/s]\nSampling: 83%|████████▎ | 826/1000 [00:09<00:02, 83.09it/s]\nSampling: 84%|████████▎ | 835/1000 [00:09<00:01, 83.62it/s]\nSampling: 84%|████████▍ | 844/1000 [00:09<00:01, 83.99it/s]\nSampling: 85%|████████▌ | 853/1000 [00:10<00:01, 84.38it/s]\nSampling: 86%|████████▌ | 862/1000 [00:10<00:01, 84.34it/s]\nSampling: 87%|████████▋ | 871/1000 [00:10<00:01, 80.87it/s]\nSampling: 88%|████████▊ | 880/1000 [00:10<00:01, 81.79it/s]\nSampling: 89%|████████▉ | 889/1000 [00:10<00:01, 81.93it/s]\nSampling: 90%|████████▉ | 898/1000 [00:10<00:01, 83.45it/s]\nSampling: 91%|█████████ | 907/1000 [00:10<00:01, 84.63it/s]\nSampling: 92%|█████████▏| 916/1000 [00:10<00:00, 85.65it/s]\nSampling: 92%|█████████▎| 925/1000 [00:10<00:00, 86.04it/s]\nSampling: 93%|█████████▎| 934/1000 [00:11<00:00, 84.32it/s]\nSampling: 94%|█████████▍| 943/1000 [00:11<00:00, 84.61it/s]\nSampling: 95%|█████████▌| 952/1000 [00:11<00:00, 85.10it/s]\nSampling: 96%|█████████▌| 961/1000 [00:11<00:00, 84.62it/s]\nSampling: 97%|█████████▋| 970/1000 [00:11<00:00, 85.25it/s]\nSampling: 98%|█████████▊| 979/1000 [00:11<00:00, 85.66it/s]\nSampling: 98%|█████████▊| 981/1000 [00:11<00:00, 84.85it/s]\nInput character count: 809\nCharacters per second: 63.55\nTotal time: 12.73s",
"output": "https://replicate.delivery/xezq/xx8Wscg0ve0gfko6krsG77SUuy7ZsQINBrJuRgq6GfCzJVspA/output.wav",
"data_removed": false,
"error": null,
"source": "web",
"status": "succeeded",
"created_at": "2025-06-12T15:50:04.982Z",
"started_at": "2025-06-12T15:50:04.991055Z",
"completed_at": "2025-06-12T15:50:17.756333Z",
"urls": {
"cancel": "https://api.replicate.com/v1/predictions/g7r2drc7psrmc0cqckasg1z4ec/cancel",
"get": "https://api.replicate.com/v1/predictions/g7r2drc7psrmc0cqckasg1z4ec",
"stream": "https://stream.replicate.com/v1/files/bcwr-alelxyvdrsmhelqnady6t6w6p7mnhz4vsvyaa2zviel5mupyrbaa",
"web": "https://replicate.com/p/g7r2drc7psrmc0cqckasg1z4ec"
},
"metrics": {
"input_token_count": 161,
"output_token_count": 1,
"predict_time": 12.765277608,
"time_to_first_token": 0.009085425000000001,
"tokens_per_second": 0.07833769183347587,
"total_time": 12.774333
}
}