Failed to load versions. Head to the versions page to see all versions for this model.
You're looking at a specific version of this model. Jump to the model overview.
Input
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
import fs from "node:fs";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run ttsds/metavoice using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"ttsds/metavoice:3495610f45204d13509ef709586d9badd3bc4bd895aa712a252b249df6693143",
{
input: {
text: "With tenure, Suzie'd have all the more leisure for yachting, but her publications are no good.",
speaker_reference: "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
}
}
);
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run ttsds/metavoice using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"ttsds/metavoice:3495610f45204d13509ef709586d9badd3bc4bd895aa712a252b249df6693143",
input={
"text": "With tenure, Suzie'd have all the more leisure for yachting, but her publications are no good.",
"speaker_reference": "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run ttsds/metavoice using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "ttsds/metavoice:3495610f45204d13509ef709586d9badd3bc4bd895aa712a252b249df6693143",
"input": {
"text": "With tenure, Suzie\'d have all the more leisure for yachting, but her publications are no good.",
"speaker_reference": "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
Output
- Chapters
- descriptions off, selected
- captions settings, opens captions settings dialog
- captions off, selected
This is a modal window.
Beginning of dialog window. Escape will cancel and close the window.
End of dialog window.
{
"completed_at": "2025-01-31T09:15:24.593890Z",
"created_at": "2025-01-31T09:10:51.770000Z",
"data_removed": false,
"error": null,
"id": "evp9tr4rf9rj00cmqeg8mng7rr",
"input": {
"text": "With tenure, Suzie'd have all the more leisure for yachting, but her publications are no good.",
"speaker_reference": "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
},
"logs": "0%| | 0/2005 [00:00<?, ?it/s]\n 1%|▏ | 30/2005 [00:00<00:06, 294.61it/s]\n 3%|▎ | 60/2005 [00:00<00:06, 290.72it/s]\n 4%|▍ | 90/2005 [00:00<00:06, 289.47it/s]\n 6%|▌ | 119/2005 [00:00<00:06, 288.95it/s]\n 7%|▋ | 148/2005 [00:00<00:06, 288.68it/s]\n 9%|▉ | 177/2005 [00:00<00:06, 288.49it/s]\n 10%|█ | 206/2005 [00:00<00:06, 288.35it/s]\n 12%|█▏ | 235/2005 [00:00<00:06, 288.35it/s]\n 13%|█▎ | 264/2005 [00:00<00:06, 288.20it/s]\n 15%|█▍ | 293/2005 [00:01<00:05, 288.18it/s]\n 16%|█▌ | 322/2005 [00:01<00:05, 288.20it/s]\n 18%|█▊ | 351/2005 [00:01<00:05, 288.10it/s]\n 19%|█▉ | 380/2005 [00:01<00:05, 287.93it/s]\n 20%|██ | 409/2005 [00:01<00:05, 287.99it/s]\n 22%|██▏ | 438/2005 [00:01<00:05, 287.91it/s]\n 23%|██▎ | 467/2005 [00:01<00:05, 287.78it/s]\n 25%|██▍ | 496/2005 [00:01<00:05, 287.88it/s]\n 26%|██▌ | 525/2005 [00:01<00:05, 287.89it/s]\n 28%|██▊ | 554/2005 [00:01<00:05, 287.99it/s]\n 29%|██▉ | 583/2005 [00:02<00:04, 288.01it/s]\n 31%|███ | 612/2005 [00:02<00:04, 287.97it/s]\n 32%|███▏ | 641/2005 [00:02<00:04, 287.99it/s]\n 33%|███▎ | 670/2005 [00:02<00:04, 288.03it/s]\n 35%|███▍ | 699/2005 [00:02<00:04, 288.01it/s]\n 36%|███▋ | 728/2005 [00:02<00:04, 287.94it/s]\n 38%|███▊ | 757/2005 [00:02<00:04, 287.94it/s]\n 39%|███▉ | 786/2005 [00:02<00:04, 287.91it/s]\n40%|███▉ | 798/2005 [00:02<00:04, 287.90it/s]\nTime for 1st stage LLM inference: 2.78 sec total, 287.29 tokens/sec\nBandwidth achieved: 717.34 GB/s\nMemory used: 8.94 GB\nNon-causal batching: 0%| | 0/1 [00:00<?, ?it/s]\nNon-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.63it/s]\nNon-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.62it/s]\n2025-01-31 09:15:24 | WARNING | DF | Audio sampling rate does not match model sampling rate (24000, 48000). Resampling...\nSaved audio to /src/outputs/synth_25-01-31--09-15-24_With_tenure,_Suzie'd_have_ff59dbfc-89c0-4f09-8c4b-a815958f5cfd.wav\nTotal time to synth (s): 6.059656143188477\nReal-time factor: 1.14",
"metrics": {
"predict_time": 12.372130789,
"total_time": 272.82389
},
"output": "https://replicate.delivery/yhqm/asBO7CuXZSZkERYg7xiX80ttehvccmlYAGKkd5jJqpZWNSFKA/output.wav",
"started_at": "2025-01-31T09:15:12.221759Z",
"status": "succeeded",
"urls": {
"stream": "https://stream.replicate.com/v1/files/yswh-wnk564iuh4qmjadxywvuzzial4lv7wdf3yn4fjkog26lguget3zq",
"get": "https://api.replicate.com/v1/predictions/evp9tr4rf9rj00cmqeg8mng7rr",
"cancel": "https://api.replicate.com/v1/predictions/evp9tr4rf9rj00cmqeg8mng7rr/cancel"
},
"version": "3495610f45204d13509ef709586d9badd3bc4bd895aa712a252b249df6693143"
}
0%| | 0/2005 [00:00<?, ?it/s]
1%|▏ | 30/2005 [00:00<00:06, 294.61it/s]
3%|▎ | 60/2005 [00:00<00:06, 290.72it/s]
4%|▍ | 90/2005 [00:00<00:06, 289.47it/s]
6%|▌ | 119/2005 [00:00<00:06, 288.95it/s]
7%|▋ | 148/2005 [00:00<00:06, 288.68it/s]
9%|▉ | 177/2005 [00:00<00:06, 288.49it/s]
10%|█ | 206/2005 [00:00<00:06, 288.35it/s]
12%|█▏ | 235/2005 [00:00<00:06, 288.35it/s]
13%|█▎ | 264/2005 [00:00<00:06, 288.20it/s]
15%|█▍ | 293/2005 [00:01<00:05, 288.18it/s]
16%|█▌ | 322/2005 [00:01<00:05, 288.20it/s]
18%|█▊ | 351/2005 [00:01<00:05, 288.10it/s]
19%|█▉ | 380/2005 [00:01<00:05, 287.93it/s]
20%|██ | 409/2005 [00:01<00:05, 287.99it/s]
22%|██▏ | 438/2005 [00:01<00:05, 287.91it/s]
23%|██▎ | 467/2005 [00:01<00:05, 287.78it/s]
25%|██▍ | 496/2005 [00:01<00:05, 287.88it/s]
26%|██▌ | 525/2005 [00:01<00:05, 287.89it/s]
28%|██▊ | 554/2005 [00:01<00:05, 287.99it/s]
29%|██▉ | 583/2005 [00:02<00:04, 288.01it/s]
31%|███ | 612/2005 [00:02<00:04, 287.97it/s]
32%|███▏ | 641/2005 [00:02<00:04, 287.99it/s]
33%|███▎ | 670/2005 [00:02<00:04, 288.03it/s]
35%|███▍ | 699/2005 [00:02<00:04, 288.01it/s]
36%|███▋ | 728/2005 [00:02<00:04, 287.94it/s]
38%|███▊ | 757/2005 [00:02<00:04, 287.94it/s]
39%|███▉ | 786/2005 [00:02<00:04, 287.91it/s]
40%|███▉ | 798/2005 [00:02<00:04, 287.90it/s]
Time for 1st stage LLM inference: 2.78 sec total, 287.29 tokens/sec
Bandwidth achieved: 717.34 GB/s
Memory used: 8.94 GB
Non-causal batching: 0%| | 0/1 [00:00<?, ?it/s]
Non-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.63it/s]
Non-causal batching: 100%|██████████| 1/1 [00:00<00:00, 7.62it/s]
2025-01-31 09:15:24 | WARNING | DF | Audio sampling rate does not match model sampling rate (24000, 48000). Resampling...
Saved audio to /src/outputs/synth_25-01-31--09-15-24_With_tenure,_Suzie'd_have_ff59dbfc-89c0-4f09-8c4b-a815958f5cfd.wav
Total time to synth (s): 6.059656143188477
Real-time factor: 1.14