Failed to load versions. Head to the versions page to see all versions for this model.
You're looking at a specific version of this model. Jump to the model overview.
ttsds /voicecraft:d693bf62
Input
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run ttsds/voicecraft using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"ttsds/voicecraft:d693bf62054c6bd92898ea6b644317a9bb912881fb2c37a0fad4ce26b59f0539",
{
input: {
text: "With tenure, Suzie'd have all the more leisure for yachting, but her publications are no good.",
version: "giga330m",
text_reference: "and keeping eternity before the eyes, though much.",
speaker_reference: "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
}
}
);
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run ttsds/voicecraft using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"ttsds/voicecraft:d693bf62054c6bd92898ea6b644317a9bb912881fb2c37a0fad4ce26b59f0539",
input={
"text": "With tenure, Suzie'd have all the more leisure for yachting, but her publications are no good.",
"version": "giga330m",
"text_reference": "and keeping eternity before the eyes, though much.",
"speaker_reference": "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
}
)
print(output)
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run ttsds/voicecraft using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "ttsds/voicecraft:d693bf62054c6bd92898ea6b644317a9bb912881fb2c37a0fad4ce26b59f0539",
"input": {
"text": "With tenure, Suzie\'d have all the more leisure for yachting, but her publications are no good.",
"version": "giga330m",
"text_reference": "and keeping eternity before the eyes, though much.",
"speaker_reference": "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
Output
- Chapters
- descriptions off, selected
- captions settings, opens captions settings dialog
- captions off, selected
This is a modal window.
Beginning of dialog window. Escape will cancel and close the window.
End of dialog window.
{
"completed_at": "2025-02-26T12:46:00.898854Z",
"created_at": "2025-02-26T12:42:07.745000Z",
"data_removed": false,
"error": null,
"id": "b2js7ntw05rj00cn893t3w0nhc",
"input": {
"text": "With tenure, Suzie'd have all the more leisure for yachting, but her publications are no good.",
"version": "giga330m",
"text_reference": "and keeping eternity before the eyes, though much.",
"speaker_reference": "https://replicate.delivery/pbxt/MNFXdPaUPOwYCZjZM4azsymbzE2TCV2WJXfGpeV2DrFWaSq8/example_en.wav"
},
"logs": "INFO Setting up corpus information...\nINFO Loading corpus from source files...\n 1% 1/100 [ 0:00:01 < -:--:-- , ? it/s ]\nINFO Found 1 speaker across 1 file, average number of utterances per\nspeaker: 1.0\nINFO Initializing multiprocessing jobs...\nINFO Normalizing text...\n 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]\nINFO Generating MFCCs...\n 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:12 < 0:00:00 , ? it/s ]\nINFO Calculating CMVN...\nINFO Generating final features...\n 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]\nINFO Creating corpus split...\n 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]\nINFO Compiling training graphs...\nINFO Performing first-pass alignment...\nINFO Generating alignments...\n 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]\nINFO Calculating fMLLR for speaker adaptation...\n 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]\nINFO Performing second-pass alignment...\nINFO Generating alignments...\n 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]\nINFO Collecting phone and word alignments from alignment lattices...\n 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:02 < 0:00:00 , ? it/s ]\nWARNING Alignment analysis not available without using postgresql\nINFO Exporting alignment TextGrids to /tmp/tmphk_hfkzo/mfa_alignments...\n 100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:00 < 0:00:00 , ? it/s ]\nINFO Finished exporting TextGrids to /tmp/tmphk_hfkzo/mfa_alignments!\nINFO Done! Everything took 70.744 seconds",
"metrics": {
"predict_time": 79.023730294,
"total_time": 233.153854
},
"output": "https://replicate.delivery/yhqm/9aeIbkwlkhyILiz23FSX1Y14TfppQboQueLep4g6wAvgwvMRB/output.wav",
"started_at": "2025-02-26T12:44:41.875123Z",
"status": "succeeded",
"urls": {
"stream": "https://stream.replicate.com/v1/files/yswh-fbsvlojvo62m7ag6gnctefdjutazdy2e4a4snprch22cokks7m3a",
"get": "https://api.replicate.com/v1/predictions/b2js7ntw05rj00cn893t3w0nhc",
"cancel": "https://api.replicate.com/v1/predictions/b2js7ntw05rj00cn893t3w0nhc/cancel"
},
"version": "d693bf62054c6bd92898ea6b644317a9bb912881fb2c37a0fad4ce26b59f0539"
}
INFO Setting up corpus information...
INFO Loading corpus from source files...
1% 1/100 [ 0:00:01 < -:--:-- , ? it/s ]
INFO Found 1 speaker across 1 file, average number of utterances per
speaker: 1.0
INFO Initializing multiprocessing jobs...
INFO Normalizing text...
100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]
INFO Generating MFCCs...
100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:12 < 0:00:00 , ? it/s ]
INFO Calculating CMVN...
INFO Generating final features...
100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]
INFO Creating corpus split...
100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]
INFO Compiling training graphs...
INFO Performing first-pass alignment...
INFO Generating alignments...
100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]
INFO Calculating fMLLR for speaker adaptation...
100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]
INFO Performing second-pass alignment...
INFO Generating alignments...
100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:01 < 0:00:00 , ? it/s ]
INFO Collecting phone and word alignments from alignment lattices...
100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:02 < 0:00:00 , ? it/s ]
WARNING Alignment analysis not available without using postgresql
INFO Exporting alignment TextGrids to /tmp/tmphk_hfkzo/mfa_alignments...
100% ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1/1 [ 0:00:00 < 0:00:00 , ? it/s ]
INFO Finished exporting TextGrids to /tmp/tmphk_hfkzo/mfa_alignments!
INFO Done! Everything took 70.744 seconds