typetext
{
"bitrate": 128000,
"channel": "mono",
"emotion": "angry",
"english_normalization": true,
"language_boost": "English",
"pitch": 0,
"sample_rate": 32000,
"speed": 1,
"text": "Speech-02-series is a Text-to-Audio and voice cloning technology that offers voice synthesis, emotional expression, and multilingual capabilities.\n\nThe HD version is optimized for high-fidelity applications like voiceovers and audiobooks. While the turbo one is designed for real-time applications with low latency.\n\nWhen using this model on Replicate, each character represents 1 token.",
"voice_id": "Deep_Voice_Man",
"volume": 1
}npm install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_UiP**********************************
This is your API token. Keep it to yourself.
import Replicate from "replicate";
import fs from "node:fs";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run minimax/speech-02-turbo using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const input = {
bitrate: 128000,
channel: "mono",
emotion: "angry",
english_normalization: true,
language_boost: "English",
pitch: 0,
sample_rate: 32000,
speed: 1,
text: "Speech-02-series is a Text-to-Audio and voice cloning technology that offers voice synthesis, emotional expression, and multilingual capabilities.\n\nThe HD version is optimized for high-fidelity applications like voiceovers and audiobooks. While the turbo one is designed for real-time applications with low latency.\n\nWhen using this model on Replicate, each character represents 1 token.",
voice_id: "Deep_Voice_Man",
volume: 1
};
const output = await replicate.run("minimax/speech-02-turbo", { input });
// To access the file URL:
console.log(output.url()); //=> "http://example.com"
// To write the file to disk:
fs.writeFile("my-image.png", output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_UiP**********************************
This is your API token. Keep it to yourself.
import replicate
Run minimax/speech-02-turbo using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"minimax/speech-02-turbo",
input={
"bitrate": 128000,
"channel": "mono",
"emotion": "angry",
"english_normalization": True,
"language_boost": "English",
"pitch": 0,
"sample_rate": 32000,
"speed": 1,
"text": "Speech-02-series is a Text-to-Audio and voice cloning technology that offers voice synthesis, emotional expression, and multilingual capabilities.\n\nThe HD version is optimized for high-fidelity applications like voiceovers and audiobooks. While the turbo one is designed for real-time applications with low latency.\n\nWhen using this model on Replicate, each character represents 1 token.",
"voice_id": "Deep_Voice_Man",
"volume": 1
}
)
# To access the file URL:
print(output.url())
#=> "http://example.com"
# To write the file to disk:
with open("my-image.png", "wb") as file:
file.write(output.read())
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_UiP**********************************
This is your API token. Keep it to yourself.
Run minimax/speech-02-turbo using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"input": {
"bitrate": 128000,
"channel": "mono",
"emotion": "angry",
"english_normalization": true,
"language_boost": "English",
"pitch": 0,
"sample_rate": 32000,
"speed": 1,
"text": "Speech-02-series is a Text-to-Audio and voice cloning technology that offers voice synthesis, emotional expression, and multilingual capabilities.\\n\\nThe HD version is optimized for high-fidelity applications like voiceovers and audiobooks. While the turbo one is designed for real-time applications with low latency.\\n\\nWhen using this model on Replicate, each character represents 1 token.",
"voice_id": "Deep_Voice_Man",
"volume": 1
}
}' \
https://api.replicate.com/v1/models/minimax/speech-02-turbo/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
{
"id": "by67sg9dxdrm80cpjat9x3apxw",
"model": "minimax/speech-02-turbo",
"version": "hidden",
"input": {
"bitrate": 128000,
"channel": "mono",
"emotion": "angry",
"english_normalization": true,
"language_boost": "English",
"pitch": 0,
"sample_rate": 32000,
"speed": 1,
"text": "Speech-02-series is a Text-to-Audio and voice cloning technology that offers voice synthesis, emotional expression, and multilingual capabilities.\n\nThe HD version is optimized for high-fidelity applications like voiceovers and audiobooks. While the turbo one is designed for real-time applications with low latency.\n\nWhen using this model on Replicate, each character represents 1 token.",
"voice_id": "Deep_Voice_Man",
"volume": 1
},
"logs": "Generating speech with model speech-02-turbo\nGenerated speech in 2.35sec\nEach character is 1 token\nTokens: 387",
"output": "https://replicate.delivery/xezq/SnPxXgl26yaAApm29BJpcHRl5PyxHAxpDt97TP59rPiFeWUKA/tmp517d49p_.mp3",
"data_removed": false,
"error": null,
"source": "web",
"status": "succeeded",
"created_at": "2025-05-02T20:33:23.947Z",
"started_at": "2025-05-02T20:33:23.956706Z",
"completed_at": "2025-05-02T20:33:26.323487Z",
"urls": {
"cancel": "https://api.replicate.com/v1/predictions/by67sg9dxdrm80cpjat9x3apxw/cancel",
"get": "https://api.replicate.com/v1/predictions/by67sg9dxdrm80cpjat9x3apxw",
"stream": "https://stream.replicate.com/v1/files/bcwr-5f2ztzmmo5dpyzhjikqd7ivkp42p6ecqjmuwt2nwmbr22irhrlrq",
"web": "https://replicate.com/p/by67sg9dxdrm80cpjat9x3apxw"
},
"metrics": {
"input_token_count": 387,
"output_token_count": 1,
"predict_time": 2.366780828,
"time_to_first_token": 0.009735579999999999,
"tokens_per_second": 0.4225201109939375,
"total_time": 2.376487
}
}