typetext
{
"audio": [
"https://replicate.delivery/pbxt/NMdAjCoC0WiNKkHIIbSsmssPEXujCRSDIjg9LlJYkt5BGs8d/10226_10111_000000.wav"
],
"frequency_penalty": 0,
"max_tokens": 512,
"min_tokens": 0,
"presence_penalty": 0,
"prompt": "Transcribe the speech into written form.",
"temperature": 0.6,
"top_k": 50,
"top_p": 0.9
}npm install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_OSX**********************************
This is your API token. Keep it to yourself.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run ibm-granite/granite-speech-3.3-8b using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"ibm-granite/granite-speech-3.3-8b:8dd07eef06015ad770a53c89ec0ba9594632cc65fb0dabb1503e78c9aefdce1b",
{
input: {
audio: ["https://replicate.delivery/pbxt/NMdAjCoC0WiNKkHIIbSsmssPEXujCRSDIjg9LlJYkt5BGs8d/10226_10111_000000.wav"],
frequency_penalty: 0,
max_tokens: 512,
min_tokens: 0,
presence_penalty: 0,
prompt: "Transcribe the speech into written form.",
temperature: 0.6,
top_k: 50,
top_p: 0.9
}
}
);
console.log(output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_OSX**********************************
This is your API token. Keep it to yourself.
import replicate
Run ibm-granite/granite-speech-3.3-8b using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"ibm-granite/granite-speech-3.3-8b:8dd07eef06015ad770a53c89ec0ba9594632cc65fb0dabb1503e78c9aefdce1b",
input={
"audio": ["https://replicate.delivery/pbxt/NMdAjCoC0WiNKkHIIbSsmssPEXujCRSDIjg9LlJYkt5BGs8d/10226_10111_000000.wav"],
"frequency_penalty": 0,
"max_tokens": 512,
"min_tokens": 0,
"presence_penalty": 0,
"prompt": "Transcribe the speech into written form.",
"temperature": 0.6,
"top_k": 50,
"top_p": 0.9
}
)
# The ibm-granite/granite-speech-3.3-8b model can stream output as it's running.
# The predict method returns an iterator, and you can iterate over that output.
for item in output:
# https://replicate.com/ibm-granite/granite-speech-3.3-8b/api#output-schema
print(item, end="")
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_OSX**********************************
This is your API token. Keep it to yourself.
Run ibm-granite/granite-speech-3.3-8b using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "ibm-granite/granite-speech-3.3-8b:8dd07eef06015ad770a53c89ec0ba9594632cc65fb0dabb1503e78c9aefdce1b",
"input": {
"audio": ["https://replicate.delivery/pbxt/NMdAjCoC0WiNKkHIIbSsmssPEXujCRSDIjg9LlJYkt5BGs8d/10226_10111_000000.wav"],
"frequency_penalty": 0,
"max_tokens": 512,
"min_tokens": 0,
"presence_penalty": 0,
"prompt": "Transcribe the speech into written form.",
"temperature": 0.6,
"top_k": 50,
"top_p": 0.9
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
after his nap timothy lazily stretched first one grey velvet foot then another strolled indolently to his plate turning over the food carefully selecting choice bits nosing out that which he scorned upon the clean hearth
{
"id": "erzyn7ew2nrma0cr1z6ttvfjd0",
"model": "ibm-granite/granite-speech-3.3-8b",
"version": "8dd07eef06015ad770a53c89ec0ba9594632cc65fb0dabb1503e78c9aefdce1b",
"input": {
"audio": [
"https://replicate.delivery/pbxt/NMdAjCoC0WiNKkHIIbSsmssPEXujCRSDIjg9LlJYkt5BGs8d/10226_10111_000000.wav"
],
"frequency_penalty": 0,
"max_tokens": 512,
"min_tokens": 0,
"presence_penalty": 0,
"prompt": "Transcribe the speech into written form.",
"temperature": 0.6,
"top_k": 50,
"top_p": 0.9
},
"logs": "2025-07-15 20:43:02 [info ] predict() commencing request_id=1 user_prompt=Transcribe the speech into written form.\n2025-07-15 20:43:02 [debug ] Formatted prompt using chat template formatted_prompt=<|start_of_role|>system<|end_of_role|> Knowledge Cutoff Date: April 2024.\n Today's Date: July 15, 2025. You are Granite, developed by IBM. You are a helpful AI assistant.<|end_of_text|>\n<|start_of_role|>user<|end_of_role|><|audio|>Transcribe the speech into written form.<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|> request_id=1 user_prompt=Transcribe the speech into written form.\n2025-07-15 20:43:02 [debug ] SamplingParams request_id=1 sampling_params=SamplingParams(n=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=50, min_p=0.0, seed=None, stop=[], stop_token_ids=[0], bad_words=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=512, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None, guided_decoding=None, extra_args=None) user_prompt=Transcribe the speech into written form.\n2025-07-15 20:43:10 [debug ] LoRARequest lora_request=LoRARequest(lora_name='speech', lora_int_id=1, lora_path='/src/weights', lora_local_path=None, long_lora_max_len=None, base_model_name=None, tensorizer_config_dict=None) request_id=1 user_prompt=Transcribe the speech into written form.\nINFO 07-15 20:43:10 [async_llm.py:270] Added request 1.\n2025-07-15 20:43:11 [debug ] result finish_reason=stop request_id=1 text=after his nap timothy lazily stretched first one grey velvet foot then another strolled indolently to his plate turning over the food carefully selecting choice bits nosing out that which he scorned upon the clean hearth user_prompt=Transcribe the speech into written form.\n2025-07-15 20:43:11 [info ] Generation took 9.37s request_id=1 user_prompt=Transcribe the speech into written form.\n/root/.pyenv/versions/3.12.11/lib/python3.12/site-packages/cog/server/scope.py:22: ExperimentalFeatureWarning: current_scope is an experimental internal function. It may change or be removed without warning.\n warnings.warn(\n2025-07-15 20:43:11 [info ] predict() complete request_id=1 user_prompt=Transcribe the speech into written form.",
"output": [
"after",
" his",
" nap",
" tim",
"oth",
"y",
" l",
"az",
"ily",
" st",
"ret",
"ched",
" first",
" one",
" grey",
" vel",
"vet",
" foot",
" then",
" another",
" st",
"rolled",
" ind",
"ol",
"ently",
" to",
" his",
" plate",
" turning",
" over",
" the",
" food",
" carefully",
" selecting",
" choice",
" bits",
" n",
"osing",
" out",
" that",
" which",
" he",
" sc",
"orn",
"ed",
" upon",
" the",
" clean",
" he",
"arth",
""
],
"data_removed": false,
"error": null,
"source": "web",
"status": "succeeded",
"created_at": "2025-07-15T20:36:46.229Z",
"started_at": "2025-07-15T20:43:02.019251Z",
"completed_at": "2025-07-15T20:43:11.560879Z",
"urls": {
"cancel": "https://api.replicate.com/v1/predictions/erzyn7ew2nrma0cr1z6ttvfjd0/cancel",
"get": "https://api.replicate.com/v1/predictions/erzyn7ew2nrma0cr1z6ttvfjd0",
"stream": "https://stream-b.svc.ric1.c.replicate.net/v1/streams/dj7vcb4mqduqloinzdkclu3bquqpbbt4gl2skkxjnoogj7z6uyha",
"web": "https://replicate.com/p/erzyn7ew2nrma0cr1z6ttvfjd0"
},
"metrics": {
"predict_time": 9.541627094999999,
"total_time": 385.331879
}
}