afiaka87 / tortoise-tts
Generate speech from text, clone voices from mp3 files. From James Betker AKA "neonbjb".
Want to make some of these yourself?
Run this modelGenerate speech from text, clone voices from mp3 files. From James Betker AKA "neonbjb".
{
"seed": 0,
"text": "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
"preset": "fast",
"voice_a": "tom",
"voice_b": "disabled",
"voice_c": "disabled"
}
npm install replicate
import Replicate from "replicate";
import fs from "node:fs";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run afiaka87/tortoise-tts using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"afiaka87/tortoise-tts:e9658de4b325863c4fcdc12d94bb7c9b54cbfe351b7ca1b36860008172b91c71",
{
input: {
seed: 0,
text: "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
preset: "fast",
voice_a: "tom",
voice_b: "disabled",
voice_c: "disabled"
}
}
);
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
import replicate
Run afiaka87/tortoise-tts using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"afiaka87/tortoise-tts:e9658de4b325863c4fcdc12d94bb7c9b54cbfe351b7ca1b36860008172b91c71",
input={
"seed": 0,
"text": "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
"preset": "fast",
"voice_a": "tom",
"voice_b": "disabled",
"voice_c": "disabled"
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
Run afiaka87/tortoise-tts using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "afiaka87/tortoise-tts:e9658de4b325863c4fcdc12d94bb7c9b54cbfe351b7ca1b36860008172b91c71",
"input": {
"seed": 0,
"text": "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
"preset": "fast",
"voice_a": "tom",
"voice_b": "disabled",
"voice_c": "disabled"
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
This is a modal window.
Beginning of dialog window. Escape will cancel and close the window.
End of dialog window.
{
"completed_at": "2022-08-11T10:49:37.042797Z",
"created_at": "2022-08-11T10:48:15.709883Z",
"data_removed": false,
"error": null,
"id": "xcxnbxeytbghhegcm2w7jg43te",
"input": {
"seed": 0,
"text": "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
"preset": "fast",
"voice_a": "tom",
"voice_b": "disabled",
"voice_c": "disabled"
},
"logs": "Generating text using voices: ['tom']",
"metrics": {
"predict_time": 81.138097,
"total_time": 81.332914
},
"output": "https://replicate.delivery/mgxm/c07e881f-0f8d-45ef-9bec-69301e163b6f/tortoise.mp3",
"started_at": "2022-08-11T10:48:15.904700Z",
"status": "succeeded",
"urls": {
"get": "https://api.replicate.com/v1/predictions/xcxnbxeytbghhegcm2w7jg43te",
"cancel": "https://api.replicate.com/v1/predictions/xcxnbxeytbghhegcm2w7jg43te/cancel"
},
"version": "e9658de4b325863c4fcdc12d94bb7c9b54cbfe351b7ca1b36860008172b91c71"
}
This is a modal window.
Beginning of dialog window. Escape will cancel and close the window.
End of dialog window.
{
"seed": 0,
"text": "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
"preset": "fast",
"voice_a": "custom_voice",
"voice_b": "disabled",
"voice_c": "disabled",
"custom_voice": "https://replicate.delivery/mgxm/671f3086-382f-4850-be82-db853e5f05a8/nixon.mp3"
}
npm install replicate
import Replicate from "replicate";
import fs from "node:fs";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run afiaka87/tortoise-tts using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"afiaka87/tortoise-tts:e9658de4b325863c4fcdc12d94bb7c9b54cbfe351b7ca1b36860008172b91c71",
{
input: {
seed: 0,
text: "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
preset: "fast",
voice_a: "custom_voice",
voice_b: "disabled",
voice_c: "disabled",
custom_voice: "https://replicate.delivery/mgxm/671f3086-382f-4850-be82-db853e5f05a8/nixon.mp3"
}
}
);
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
import replicate
Run afiaka87/tortoise-tts using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"afiaka87/tortoise-tts:e9658de4b325863c4fcdc12d94bb7c9b54cbfe351b7ca1b36860008172b91c71",
input={
"seed": 0,
"text": "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
"preset": "fast",
"voice_a": "custom_voice",
"voice_b": "disabled",
"voice_c": "disabled",
"custom_voice": "https://replicate.delivery/mgxm/671f3086-382f-4850-be82-db853e5f05a8/nixon.mp3"
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
Run afiaka87/tortoise-tts using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "afiaka87/tortoise-tts:e9658de4b325863c4fcdc12d94bb7c9b54cbfe351b7ca1b36860008172b91c71",
"input": {
"seed": 0,
"text": "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
"preset": "fast",
"voice_a": "custom_voice",
"voice_b": "disabled",
"voice_c": "disabled",
"custom_voice": "https://replicate.delivery/mgxm/671f3086-382f-4850-be82-db853e5f05a8/nixon.mp3"
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
This is a modal window.
Beginning of dialog window. Escape will cancel and close the window.
End of dialog window.
{
"completed_at": "2022-08-11T13:14:11.831680Z",
"created_at": "2022-08-11T13:06:32.322889Z",
"data_removed": false,
"error": null,
"id": "sxo6kqotqfeqxhk24qvy6igsmq",
"input": {
"seed": 0,
"text": "The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.",
"preset": "fast",
"voice_a": "custom_voice",
"voice_b": "disabled",
"voice_c": "disabled",
"custom_voice": "https://replicate.delivery/mgxm/671f3086-382f-4850-be82-db853e5f05a8/nixon.mp3"
},
"logs": "Creating voice from /tmp/tmpn3ll0ogznixon.mp3\n\u001b[1;33m\u001b[1;33m\u001b[1;33m\u001b[1;33m\u001b[1;33m\u001b[1;33mWARNING\u001b[1;0m\u001b[1;0m\u001b[1;0m\u001b[1;0m\u001b[1;0m\u001b[1;0m: Input file had loudness range of 10.4, which is larger than the loudness range target (7.0). Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization.\n\u001b[1;33m\u001b[1;33m\u001b[1;33m\u001b[1;33m\u001b[1;33m\u001b[1;33mWARNING\u001b[1;0m\u001b[1;0m\u001b[1;0m\u001b[1;0m\u001b[1;0m\u001b[1;0m: In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. Specify -ar/--sample-rate to override it.\n[wav @ 0x560e9a45a3c0] ignoring wrong sample_count 55165030\n[wav @ 0x560e9a45a3c0] Estimating duration from bitrate, this may be inaccurate\nGenerating text using voices: ['custom_voice']\nGenerating autoregressive samples..\n\n 0%| | 0/6 [00:00<?, ?it/s]\n 17%|█▋ | 1/6 [00:05<00:28, 5.80s/it]\n 33%|███▎ | 2/6 [00:11<00:22, 5.57s/it]\n 50%|█████ | 3/6 [00:16<00:16, 5.65s/it]\n 67%|██████▋ | 4/6 [00:22<00:10, 5.43s/it]\n 83%|████████▎ | 5/6 [00:27<00:05, 5.54s/it]\n100%|██████████| 6/6 [00:33<00:00, 5.61s/it]\n100%|██████████| 6/6 [00:33<00:00, 5.59s/it]\nComputing best candidates using CLVP\n\n 0%| | 0/6 [00:00<?, ?it/s]\n 17%|█▋ | 1/6 [00:00<00:01, 3.79it/s]\n 33%|███▎ | 2/6 [00:01<00:02, 1.46it/s]\n 50%|█████ | 3/6 [00:02<00:02, 1.22it/s]\n 67%|██████▋ | 4/6 [00:03<00:01, 1.14it/s]\n 83%|████████▎ | 5/6 [00:04<00:00, 1.09it/s]\n100%|██████████| 6/6 [00:05<00:00, 1.06it/s]\n100%|██████████| 6/6 [00:05<00:00, 1.16it/s]",
"metrics": {
"predict_time": 235.452716,
"total_time": 459.508791
},
"output": "https://replicate.delivery/mgxm/f6253eec-53f6-4ca0-8715-8ad16abdb794/tortoise.mp3",
"started_at": "2022-08-11T13:10:16.378964Z",
"status": "succeeded",
"urls": {
"get": "https://api.replicate.com/v1/predictions/sxo6kqotqfeqxhk24qvy6igsmq",
"cancel": "https://api.replicate.com/v1/predictions/sxo6kqotqfeqxhk24qvy6igsmq/cancel"
},
"version": "e9658de4b325863c4fcdc12d94bb7c9b54cbfe351b7ca1b36860008172b91c71"
}
Creating voice from /tmp/tmpn3ll0ogznixon.mp3
WARNING: Input file had loudness range of 10.4, which is larger than the loudness range target (7.0). Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization.
WARNING: In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. Specify -ar/--sample-rate to override it.
[wav @ 0x560e9a45a3c0] ignoring wrong sample_count 55165030
[wav @ 0x560e9a45a3c0] Estimating duration from bitrate, this may be inaccurate
Generating text using voices: ['custom_voice']
Generating autoregressive samples..
0%| | 0/6 [00:00<?, ?it/s]
17%|█▋ | 1/6 [00:05<00:28, 5.80s/it]
33%|███▎ | 2/6 [00:11<00:22, 5.57s/it]
50%|█████ | 3/6 [00:16<00:16, 5.65s/it]
67%|██████▋ | 4/6 [00:22<00:10, 5.43s/it]
83%|████████▎ | 5/6 [00:27<00:05, 5.54s/it]
100%|██████████| 6/6 [00:33<00:00, 5.61s/it]
100%|██████████| 6/6 [00:33<00:00, 5.59s/it]
Computing best candidates using CLVP
0%| | 0/6 [00:00<?, ?it/s]
17%|█▋ | 1/6 [00:00<00:01, 3.79it/s]
33%|███▎ | 2/6 [00:01<00:02, 1.46it/s]
50%|█████ | 3/6 [00:02<00:02, 1.22it/s]
67%|██████▋ | 4/6 [00:03<00:01, 1.14it/s]
83%|████████▎ | 5/6 [00:04<00:00, 1.09it/s]
100%|██████████| 6/6 [00:05<00:00, 1.06it/s]
100%|██████████| 6/6 [00:05<00:00, 1.16it/s]
Want to make some of these yourself?
Run this modelThis model is cold. You'll get a fast response if the model is warm and already running, and a slower response if the model is cold and starting up.
This model runs on T4. View more.
Generating text using voices: ['tom']
Creating voice from /tmp/tmpn3ll0ogznixon.mp3
WARNING: Input file had loudness range of 10.4, which is larger than the loudness range target (7.0). Normalization will revert to dynamic mode. Choose a higher target loudness range if you want linear normalization.
WARNING: In dynamic mode, the sample rate will automatically be set to 192 kHz by the loudnorm filter. Specify -ar/--sample-rate to override it.
[wav @ 0x560e9a45a3c0] ignoring wrong sample_count 55165030
[wav @ 0x560e9a45a3c0] Estimating duration from bitrate, this may be inaccurate
Generating text using voices: ['custom_voice']
Generating autoregressive samples..
0%| | 0/6 [00:00<?, ?it/s]
17%|█▋ | 1/6 [00:05<00:28, 5.80s/it]
33%|███▎ | 2/6 [00:11<00:22, 5.57s/it]
50%|█████ | 3/6 [00:16<00:16, 5.65s/it]
67%|██████▋ | 4/6 [00:22<00:10, 5.43s/it]
83%|████████▎ | 5/6 [00:27<00:05, 5.54s/it]
100%|██████████| 6/6 [00:33<00:00, 5.61s/it]
100%|██████████| 6/6 [00:33<00:00, 5.59s/it]
Computing best candidates using CLVP
0%| | 0/6 [00:00<?, ?it/s]
17%|█▋ | 1/6 [00:00<00:01, 3.79it/s]
33%|███▎ | 2/6 [00:01<00:02, 1.46it/s]
50%|█████ | 3/6 [00:02<00:02, 1.22it/s]
67%|██████▋ | 4/6 [00:03<00:01, 1.14it/s]
83%|████████▎ | 5/6 [00:04<00:00, 1.09it/s]
100%|██████████| 6/6 [00:05<00:00, 1.06it/s]
100%|██████████| 6/6 [00:05<00:00, 1.16it/s]