Failed to load versions. Head to the versions page to see all versions for this model.
You're looking at a specific version of this model. Jump to the model overview.
lucataco /qwen2-57b-a14b-instruct:fc67fa3f
Input
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run lucataco/qwen2-57b-a14b-instruct using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"lucataco/qwen2-57b-a14b-instruct:fc67fa3fa20d3d0ee59794df05548b59d285fbb43d944506203a8a2195b75c36",
{
input: {
top_k: 50,
top_p: 0.9,
prompt: "Give me a short introduction to large language model.",
max_tokens: 512,
min_tokens: 0,
temperature: 0.6,
system_prompt: "You are a helpful assistant.",
presence_penalty: 0,
frequency_penalty: 0
}
}
);
console.log(output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run lucataco/qwen2-57b-a14b-instruct using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"lucataco/qwen2-57b-a14b-instruct:fc67fa3fa20d3d0ee59794df05548b59d285fbb43d944506203a8a2195b75c36",
input={
"top_k": 50,
"top_p": 0.9,
"prompt": "Give me a short introduction to large language model.",
"max_tokens": 512,
"min_tokens": 0,
"temperature": 0.6,
"system_prompt": "You are a helpful assistant.",
"presence_penalty": 0,
"frequency_penalty": 0
}
)
# The lucataco/qwen2-57b-a14b-instruct model can stream output as it's running.
# The predict method returns an iterator, and you can iterate over that output.
for item in output:
# https://replicate.com/lucataco/qwen2-57b-a14b-instruct/api#output-schema
print(item, end="")
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run lucataco/qwen2-57b-a14b-instruct using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "lucataco/qwen2-57b-a14b-instruct:fc67fa3fa20d3d0ee59794df05548b59d285fbb43d944506203a8a2195b75c36",
"input": {
"top_k": 50,
"top_p": 0.9,
"prompt": "Give me a short introduction to large language model.",
"max_tokens": 512,
"min_tokens": 0,
"temperature": 0.6,
"system_prompt": "You are a helpful assistant.",
"presence_penalty": 0,
"frequency_penalty": 0
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
Output
{
"completed_at": "2024-07-02T15:28:33.485577Z",
"created_at": "2024-07-02T15:24:19.616000Z",
"data_removed": false,
"error": null,
"id": "0zkzt2qq41rgj0cgeg5tck6598",
"input": {
"top_k": 50,
"top_p": 0.9,
"prompt": "Give me a short introduction to large language model.",
"max_tokens": 512,
"min_tokens": 0,
"temperature": 0.6,
"system_prompt": "You are a helpful assistant.",
"presence_penalty": 0,
"frequency_penalty": 0
},
"logs": "INFO 07-02 15:28:31 async_llm_engine.py:529] Received request 98ac8d73e2454af68ca1cb481bc48be2: prompt: '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\nGive me a short introduction to large language model.<|im_end|>\\n<|im_start|>assistant\\n', sampling_params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=50, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=[], stop_token_ids=[151645], include_stop_str_in_output=False, ignore_eos=False, max_tokens=512, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None), prompt_token_ids: None, lora_request: None.\n stdoutGeneration took 1719933329.48sFormatted prompt: <|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nGive me a short introduction to large language model.<|im_end|>\n<|im_start|>assistant\nINFO 07-02 15:28:33 async_llm_engine.py:120] Finished request 98ac8d73e2454af68ca1cb481bc48be2.\n stdout",
"metrics": {
"predict_time": 2.107849268,
"total_time": 253.869577
},
"output": [
"A",
" large",
" language",
" model",
" (",
"LL",
"M",
")",
" is",
" a",
" type",
" of",
" artificial",
" intelligence",
" model",
" that",
" is",
" trained",
" on",
" a",
" massive",
" amount",
" of",
" text",
" data",
" to",
" generate",
" human",
"-like",
" text",
".",
" These",
" models",
" are",
" typically",
" trained",
" using",
" deep",
" learning",
" techniques",
",",
" and",
" they",
" are",
" able",
" to",
" generate",
" text",
" that",
" is",
" coherent",
" and",
" context",
"ually",
" appropriate",
",",
" making",
" them",
" useful",
" for",
" a",
" variety",
" of",
" natural",
" language",
" processing",
" tasks",
".",
" Some",
" common",
" applications",
" of",
" large",
" language",
" models",
" include",
" language",
" translation",
",",
" text",
" summar",
"ization",
",",
" and",
" question",
" answering",
".",
" They",
" are",
" also",
" used",
" in",
" chat",
"bots",
" and",
" virtual",
" assistants",
" to",
" enable",
" more",
" natural",
" and",
" realistic",
" conversations",
" with",
" users",
".",
" Large",
" language",
" models",
" are",
" often",
" referred",
" to",
" as",
" \"",
"gener",
"ative",
" models",
"\"",
" because",
" they",
" are",
" able",
" to",
" generate",
" new",
" text",
" based",
" on",
" the",
" patterns",
" they",
" have",
" learned",
" from",
" the",
" training",
" data",
".",
""
],
"started_at": "2024-07-02T15:28:31.377728Z",
"status": "succeeded",
"urls": {
"stream": "https://streaming-api.svc.us.c.replicate.net/v1/streams/mnco4zxca54lxj247vmhhtt3mna6et5dzpyxolh5o2yo3m2s4lma",
"get": "https://api.replicate.com/v1/predictions/0zkzt2qq41rgj0cgeg5tck6598",
"cancel": "https://api.replicate.com/v1/predictions/0zkzt2qq41rgj0cgeg5tck6598/cancel"
},
"version": "fc67fa3fa20d3d0ee59794df05548b59d285fbb43d944506203a8a2195b75c36"
}
INFO 07-02 15:28:31 async_llm_engine.py:529] Received request 98ac8d73e2454af68ca1cb481bc48be2: prompt: '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nGive me a short introduction to large language model.<|im_end|>\n<|im_start|>assistant\n', sampling_params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=50, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=[], stop_token_ids=[151645], include_stop_str_in_output=False, ignore_eos=False, max_tokens=512, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True, truncate_prompt_tokens=None), prompt_token_ids: None, lora_request: None.
stdoutGeneration took 1719933329.48sFormatted prompt: <|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Give me a short introduction to large language model.<|im_end|>
<|im_start|>assistant
INFO 07-02 15:28:33 async_llm_engine.py:120] Finished request 98ac8d73e2454af68ca1cb481bc48be2.
stdout