Failed to load versions. Head to the versions page to see all versions for this model.
You're looking at a specific version of this model. Jump to the model overview.
nateraw /nous-hermes-2-solar-10.7b:1e918ab6
Input
Run this model in Node.js with one line of code:
npm install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import Replicate from "replicate";
const replicate = new Replicate({
auth: process.env.REPLICATE_API_TOKEN,
});
Run nateraw/nous-hermes-2-solar-10.7b using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
const output = await replicate.run(
"nateraw/nous-hermes-2-solar-10.7b:1e918ab6ffd5872c21fba21a511f344fd12ac0edff6302c9cd260395c7707ff4",
{
input: {
top_k: 50,
top_p: 0.9,
prompt: "Write a short hello world FastAPI example",
temperature: 0.6,
max_new_tokens: 512,
prompt_template: "<|im_start|>system\nYou are \"Hermes 2\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
presence_penalty: 0,
frequency_penalty: 0
}
}
);
console.log(output);
To learn more, take a look at the guide on getting started with Node.js.
pip install replicate
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
import replicate
Run nateraw/nous-hermes-2-solar-10.7b using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
output = replicate.run(
"nateraw/nous-hermes-2-solar-10.7b:1e918ab6ffd5872c21fba21a511f344fd12ac0edff6302c9cd260395c7707ff4",
input={
"top_k": 50,
"top_p": 0.9,
"prompt": "Write a short hello world FastAPI example",
"temperature": 0.6,
"max_new_tokens": 512,
"prompt_template": "<|im_start|>system\nYou are \"Hermes 2\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
"presence_penalty": 0,
"frequency_penalty": 0
}
)
# The nateraw/nous-hermes-2-solar-10.7b model can stream output as it's running.
# The predict method returns an iterator, and you can iterate over that output.
for item in output:
# https://replicate.com/nateraw/nous-hermes-2-solar-10.7b/api#output-schema
print(item, end="")
To learn more, take a look at the guide on getting started with Python.
REPLICATE_API_TOKEN
environment variable:export REPLICATE_API_TOKEN=<paste-your-token-here>
Find your API token in your account settings.
Run nateraw/nous-hermes-2-solar-10.7b using Replicate’s API. Check out the model's schema for an overview of inputs and outputs.
curl -s -X POST \
-H "Authorization: Bearer $REPLICATE_API_TOKEN" \
-H "Content-Type: application/json" \
-H "Prefer: wait" \
-d $'{
"version": "nateraw/nous-hermes-2-solar-10.7b:1e918ab6ffd5872c21fba21a511f344fd12ac0edff6302c9cd260395c7707ff4",
"input": {
"top_k": 50,
"top_p": 0.9,
"prompt": "Write a short hello world FastAPI example",
"temperature": 0.6,
"max_new_tokens": 512,
"prompt_template": "<|im_start|>system\\nYou are \\"Hermes 2\\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.<|im_end|>\\n<|im_start|>user\\n{prompt}<|im_end|>\\n<|im_start|>assistant\\n",
"presence_penalty": 0,
"frequency_penalty": 0
}
}' \
https://api.replicate.com/v1/predictions
To learn more, take a look at Replicate’s HTTP API reference docs.
Add a payment method to run this model.
By signing in, you agree to our
terms of service and privacy policy
Output
{
"completed_at": "2024-01-03T00:05:00.273091Z",
"created_at": "2024-01-03T00:02:14.751468Z",
"data_removed": false,
"error": null,
"id": "t6nhwatb7ph3ublql5tmpquxbu",
"input": {
"top_k": 50,
"top_p": 0.9,
"prompt": "Write a short hello world FastAPI example",
"temperature": 0.6,
"max_new_tokens": 512,
"prompt_template": "<|im_start|>system\nYou are \"Hermes 2\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
"presence_penalty": 0,
"frequency_penalty": 0
},
"logs": "INFO 01-03 00:04:56 async_llm_engine.py:379] Received request 0: prompt: '<|im_start|>system\\nYou are \"Hermes 2\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.<|im_end|>\\n<|im_start|>user\\nWrite a short hello world FastAPI example<|im_end|>\\n<|im_start|>assistant\\n', sampling params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=50, min_p=0.0, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=['<|im_end|>'], stop_token_ids=[], ignore_eos=False, max_tokens=512, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True), prompt token ids: None.\nINFO 01-03 00:04:56 llm_engine.py:649] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.1%, CPU KV cache usage: 0.0%\nINFO 01-03 00:05:00 async_llm_engine.py:111] Finished request 0.\ngeneration took 3.893s",
"metrics": {
"predict_time": 3.954088,
"total_time": 165.521623
},
"output": [
"C",
"ertain",
"ly",
"!",
" Here",
"'",
"s",
" a",
" simple",
" \"",
"Hello",
",",
" World",
"!\"",
" Fast",
"API",
" example",
":",
"\n",
"\n",
"``",
"`",
"python",
"\n",
"from",
" fast",
"api",
" import",
" Fast",
"API",
"\n",
"\n",
"app",
" =",
" Fast",
"API",
"()",
"\n",
"\n",
"@",
"app",
".",
"get",
"(\"/",
"\")",
"\n",
"async",
" def",
" root",
"():",
"\n",
" ",
" return",
" {\"",
"message",
"\":",
" \"",
"Hello",
",",
" World",
"!\"",
"}",
"\n",
"``",
"`",
"\n",
"\n",
"This",
" code",
" creates",
" a",
" Fast",
"API",
" application",
",",
" defines",
" a",
" route",
" at",
" the",
" root",
" URL",
" (\"",
"/",
"\"),",
" and",
" returns",
" a",
" JSON",
" object",
" with",
" a",
" message",
" saying",
" \"",
"Hello",
",",
" World",
"!\"",
" when",
" that",
" route",
" is",
" accessed",
".",
""
],
"started_at": "2024-01-03T00:04:56.319003Z",
"status": "succeeded",
"urls": {
"stream": "https://streaming-api.svc.us.c.replicate.net/v1/predictions/t6nhwatb7ph3ublql5tmpquxbu",
"get": "https://api.replicate.com/v1/predictions/t6nhwatb7ph3ublql5tmpquxbu",
"cancel": "https://api.replicate.com/v1/predictions/t6nhwatb7ph3ublql5tmpquxbu/cancel"
},
"version": "1e918ab6ffd5872c21fba21a511f344fd12ac0edff6302c9cd260395c7707ff4"
}
INFO 01-03 00:04:56 async_llm_engine.py:379] Received request 0: prompt: '<|im_start|>system\nYou are "Hermes 2", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia.<|im_end|>\n<|im_start|>user\nWrite a short hello world FastAPI example<|im_end|>\n<|im_start|>assistant\n', sampling params: SamplingParams(n=1, best_of=1, presence_penalty=0.0, frequency_penalty=0.0, repetition_penalty=1.0, temperature=0.6, top_p=0.9, top_k=50, min_p=0.0, use_beam_search=False, length_penalty=1.0, early_stopping=False, stop=['<|im_end|>'], stop_token_ids=[], ignore_eos=False, max_tokens=512, logprobs=None, prompt_logprobs=None, skip_special_tokens=True, spaces_between_special_tokens=True), prompt token ids: None.
INFO 01-03 00:04:56 llm_engine.py:649] Avg prompt throughput: 0.0 tokens/s, Avg generation throughput: 0.0 tokens/s, Running: 1 reqs, Swapped: 0 reqs, Pending: 0 reqs, GPU KV cache usage: 0.1%, CPU KV cache usage: 0.0%
INFO 01-03 00:05:00 async_llm_engine.py:111] Finished request 0.
generation took 3.893s