typetext
{
"frequency_penalty": 0,
"max_tokens": 1024,
"presence_penalty": 0,
"prompt": "Hello, Llama!",
"temperature": 0.6,
"top_p": 1
}npm install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_f2g**********************************
This is your API token. Keep it to yourself.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run meta/llama-4-maverick-instruct using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const input = {
frequency_penalty: 0,
max_tokens: 1024,
presence_penalty: 0,
prompt: "Hello, Llama!",
temperature: 0.6,
top_p: 1
};
for await (const event of replicate.stream("meta/llama-4-maverick-instruct", { input })) {
process.stdout.write(event.toString());
};
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_f2g**********************************
This is your API token. Keep it to yourself.
import replicate
Run meta/llama-4-maverick-instruct using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
# The meta/llama-4-maverick-instruct model can stream output as it's running.
for event in replicate.stream(
"meta/llama-4-maverick-instruct",
input={
"frequency_penalty": 0,
"max_tokens": 1024,
"presence_penalty": 0,
"prompt": "Hello, Llama!",
"temperature": 0.6,
"top_p": 1
},
):
print(str(event), end="")
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_f2g**********************************
This is your API token. Keep it to yourself.
Run meta/llama-4-maverick-instruct using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"input": {
"frequency_penalty": 0,
"max_tokens": 1024,
"presence_penalty": 0,
"prompt": "Hello, Llama!",
"temperature": 0.6,
"top_p": 1
}
}' \
https://api.replicate.com/v1/models/meta/llama-4-maverick-instruct/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Hello! It's nice to meet you. Is there something I can help you with or would you like to chat?
{
"id": "8vn5mz0xbsrme0cp10vv3zd150",
"model": "meta/llama-4-maverick-instruct",
"version": "hidden",
"input": {
"frequency_penalty": 0,
"max_tokens": 1024,
"presence_penalty": 0,
"prompt": "Hello, Llama!",
"temperature": 0.6,
"top_p": 1
},
"logs": "Prompt: Hello, Llama!\nInput token count: 5\nOutput token count: 24\nTTFT: 0.39s\nTokens per second: 40.51\nTotal time: 0.59s",
"output": [
"",
"Hello!",
" It's nice to meet you.",
" Is there something I can help",
" you with or would you like",
" to chat?",
""
],
"data_removed": false,
"error": null,
"source": "web",
"status": "succeeded",
"created_at": "2025-04-05T23:09:11.902Z",
"started_at": "2025-04-05T23:09:11.908871Z",
"completed_at": "2025-04-05T23:09:12.502307Z",
"urls": {
"cancel": "https://api.replicate.com/v1/predictions/8vn5mz0xbsrme0cp10vv3zd150/cancel",
"get": "https://api.replicate.com/v1/predictions/8vn5mz0xbsrme0cp10vv3zd150",
"stream": "https://stream-b.svc.ric1.c.replicate.net/v1/streams/fajhmjcarci7xzk7wmapfnqqd3p4sptbb7kd3uijjerjnkgqdteq",
"web": "https://replicate.com/p/8vn5mz0xbsrme0cp10vv3zd150"
},
"metrics": {
"input_token_count": 5,
"output_token_count": 24,
"predict_time": 0.593435972,
"time_to_first_token": 0.007776063000000001,
"tokens_per_second": 40.50421612689237,
"total_time": 0.600307
}
}