typetext
{
"debug": false,
"max_new_tokens": 128,
"min_new_tokens": -1,
"prompt": "What is 10+4?",
"repetition_penalty": 1.15,
"return_logits": false,
"temperature": 0.7,
"top_p": 0.95
}npm install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_7cT**********************************
This is your API token. Keep it to yourself.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run johnnyoshika/llama2-combine-numbers using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"johnnyoshika/llama2-combine-numbers:3d318c904899fa396a3255078da6a56c0d4f0b7837550159f196eb05932aae0a",
{
input: {
debug: false,
max_new_tokens: 128,
min_new_tokens: -1,
prompt: "What is 10+4?",
repetition_penalty: 1.15,
return_logits: false,
temperature: 0.7,
top_p: 0.95
}
}
);
console.log(output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_7cT**********************************
This is your API token. Keep it to yourself.
import replicate
Run johnnyoshika/llama2-combine-numbers using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"johnnyoshika/llama2-combine-numbers:3d318c904899fa396a3255078da6a56c0d4f0b7837550159f196eb05932aae0a",
input={
"debug": False,
"max_new_tokens": 128,
"min_new_tokens": -1,
"prompt": "What is 10+4?",
"repetition_penalty": 1.15,
"return_logits": False,
"temperature": 0.7,
"top_p": 0.95
}
)
# The johnnyoshika/llama2-combine-numbers model can stream output as it's running.
# The predict method returns an iterator, and you can iterate over that output.
for item in output:
# https://replicate.com/johnnyoshika/llama2-combine-numbers/api#output-schema
print(item, end="")
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN environment variable:export REPLICATE_API_TOKEN=r8_7cT**********************************
This is your API token. Keep it to yourself.
Run johnnyoshika/llama2-combine-numbers using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "johnnyoshika/llama2-combine-numbers:3d318c904899fa396a3255078da6a56c0d4f0b7837550159f196eb05932aae0a",
"input": {
"debug": false,
"max_new_tokens": 128,
"min_new_tokens": -1,
"prompt": "What is 10+4?",
"repetition_penalty": 1.15,
"return_logits": false,
"temperature": 0.7,
"top_p": 0.95
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
10 + 4 = 14
{
"id": "27dqpzg7pnrgm0cfhsh9rxr30w",
"model": "johnnyoshika/llama2-combine-numbers",
"version": "3d318c904899fa396a3255078da6a56c0d4f0b7837550159f196eb05932aae0a",
"input": {
"debug": false,
"max_new_tokens": 128,
"min_new_tokens": -1,
"prompt": "What is 10+4?",
"repetition_penalty": 1.15,
"return_logits": false,
"temperature": 0.7,
"top_p": 0.95
},
"logs": "Your formatted prompt is:\nWhat is 10+4?\ncorrect lora is already loaded\nOverall initialize_peft took 0.000\nExllama: False\nINFO 05-19 01:05:42 async_llm_engine.py:371] Received request 0: prompt: 'What is 10+4?', sampling params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=1.0, temperature=0.7, top_p=0.95, top_k=50, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=['</s>'], ignore_eos=False, max_tokens=128, logprobs=None, skip_special_tokens=True), prompt token ids: None.\nINFO 05-19 01:05:42 llm_engine.py:631] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 05-19 01:05:42 async_llm_engine.py:111] Finished request 0.\nhostname: model-hp-77dde5d6c56598691b9008f7d123a18d-74856449d8-4m969",
"output": [
"\n",
"1",
"0",
" +",
" ",
"4",
" =",
" ",
"1",
"4",
""
],
"data_removed": false,
"error": null,
"source": "web",
"status": "succeeded",
"created_at": "2024-05-19T01:05:40.277Z",
"started_at": "2024-05-19T01:05:42.473367Z",
"completed_at": "2024-05-19T01:05:42.738483Z",
"urls": {
"cancel": "https://api.replicate.com/v1/predictions/27dqpzg7pnrgm0cfhsh9rxr30w/cancel",
"get": "https://api.replicate.com/v1/predictions/27dqpzg7pnrgm0cfhsh9rxr30w",
"stream": "https://streaming-api.svc.us.c.replicate.net/v1/streams/cfplbm56v6eavspddix3jwkd4h5ljxeh75pdk26fqpzmh5bu36aa",
"web": "https://replicate.com/p/27dqpzg7pnrgm0cfhsh9rxr30w"
},
"metrics": {
"predict_time": 0.265116,
"total_time": 2.461483
}
}