Failed to load versions. Head to the versions page to see all versions for this model.
You're looking at a specific version of this model. Jump to the model overview.
lucataco /qwen1.5-110b:af7953cb
Input
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run lucataco/qwen1.5-110b using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"lucataco/qwen1.5-110b:af7953cb4fe4948df44a074d4785c2f74d0096257197198e90c9ac84361b6aa9",
{
input: {
top_k: 50,
top_p: 0.8,
prompt: "Provide a short introduction to large language models",
temperature: 0.7,
system_prompt: "You are a helpful assistant.",
max_new_tokens: 256,
repetition_penalty: 1.05
}
}
);
console.log(output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run lucataco/qwen1.5-110b using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"lucataco/qwen1.5-110b:af7953cb4fe4948df44a074d4785c2f74d0096257197198e90c9ac84361b6aa9",
input={
"top_k": 50,
"top_p": 0.8,
"prompt": "Provide a short introduction to large language models",
"temperature": 0.7,
"system_prompt": "You are a helpful assistant.",
"max_new_tokens": 256,
"repetition_penalty": 1.05
}
)
# The lucataco/qwen1.5-110b model can stream output as it's running.
# The predict method returns an iterator, and you can iterate over that output.
for item in output:
# https://replicate.com/lucataco/qwen1.5-110b/api#output-schema
print(item, end="")
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run lucataco/qwen1.5-110b using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "lucataco/qwen1.5-110b:af7953cb4fe4948df44a074d4785c2f74d0096257197198e90c9ac84361b6aa9",
"input": {
"top_k": 50,
"top_p": 0.8,
"prompt": "Provide a short introduction to large language models",
"temperature": 0.7,
"system_prompt": "You are a helpful assistant.",
"max_new_tokens": 256,
"repetition_penalty": 1.05
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
Output
{
"completed_at": "2024-04-26T23:20:01.001195Z",
"created_at": "2024-04-26T23:15:50.603000Z",
"data_removed": false,
"error": null,
"id": "nytm2kwjsdrgp0cf3jrsy25yaw",
"input": {
"top_k": 50,
"top_p": 0.8,
"prompt": "Provide a short introduction to large language models",
"temperature": 0.7,
"system_prompt": "You are a helpful assistant.",
"max_new_tokens": 256,
"repetition_penalty": 1.05
},
"logs": "\u001b[36m(RayWorkerVllm pid=3790)\u001b[0m INFO 04-26 23:19:54 model_runner.py:867] Graph capturing finished in 6 secs.\nINFO 04-26 23:19:56 async_llm_engine.py:508] Received request 0: prompt: '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n<|im_start|>user\\nProvide a short introduction to large language models<|im_end|>\\n<|im_start|>assistant\\n', sampling_params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.05, temperature=0.7, top_p=0.8, top_k=50, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=['<|endoftext|>'], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=256, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True), prompt_token_ids: None, lora_request: None.\nINFO 04-26 23:19:56 metrics.py:218] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%\nINFO 04-26 23:20:00 async_llm_engine.py:120] Finished request 0.\ngeneration took 4.678s",
"metrics": {
"predict_time": 4.741052,
"total_time": 250.398195
},
"output": [
"Large",
" language",
" models",
" (",
"LL",
"Ms",
")",
" are",
" artificial",
" intelligence",
" systems",
" that",
" have",
" been",
" trained",
" on",
" massive",
" amounts",
" of",
" text",
" data",
" to",
" generate",
" human",
"-like",
" language",
" output",
".",
" They",
" are",
" capable",
" of",
" understanding",
" and",
" generating",
" natural",
" language",
",",
" and",
" can",
" be",
" used",
" for",
" a",
" wide",
" range",
" of",
" tasks",
" such",
" as",
" language",
" translation",
",",
" summar",
"ization",
",",
" question",
" answering",
",",
" and",
" even",
" creative",
" writing",
".",
" L",
"LM",
"s",
" have",
" become",
" increasingly",
" popular",
" in",
" recent",
" years",
" due",
" to",
" their",
" ability",
" to",
" perform",
" complex",
" language",
" tasks",
" with",
" high",
" accuracy",
" and",
" speed",
".",
" However",
",",
" they",
" also",
" raise",
" ethical",
" concerns",
" around",
" issues",
" such",
" as",
" bias",
" and",
" privacy",
".",
""
],
"started_at": "2024-04-26T23:19:56.260143Z",
"status": "succeeded",
"urls": {
"stream": "https://streaming-api.svc.us.c.replicate.net/v1/streams/5cdyqwo42pwc54elug22xnt7iz3tmqojvpuq6qukb4s3arl3taza",
"get": "https://api.replicate.com/v1/predictions/nytm2kwjsdrgp0cf3jrsy25yaw",
"cancel": "https://api.replicate.com/v1/predictions/nytm2kwjsdrgp0cf3jrsy25yaw/cancel"
},
"version": "af7953cb4fe4948df44a074d4785c2f74d0096257197198e90c9ac84361b6aa9"
}
(RayWorkerVllm pid=3790) INFO 04-26 23:19:54 model_runner.py:867] Graph capturing finished in 6 secs.
INFO 04-26 23:19:56 async_llm_engine.py:508] Received request 0: prompt: '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\nProvide a short introduction to large language models<|im_end|>\n<|im_start|>assistant\n', sampling_params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.05, temperature=0.7, top_p=0.8, top_k=50, min_p=0.0, seed=None, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=['<|endoftext|>'], stop_token_ids=[], include_stop_str_in_output=False, ignore_eos=False, max_tokens=256, min_tokens=0, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True), prompt_token_ids: None, lora_request: None.
INFO 04-26 23:19:56 metrics.py:218] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.0%, CPU KV cache usage: 0.0%
INFO 04-26 23:20:00 async_llm_engine.py:120] Finished request 0.
generation took 4.678s