Hosted version of NousResearch/Genstruct-7B
Files used to host this model are on GitHub.
This model is hosted with vLLM with the following code:
import os
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
import torch
from cog import BasePredictor
from vllm import LLM, SamplingParams
def prompt(title, content):
return f"""[[[Title]]] {title}
[[[Content]]] {content}
The following is an interaction between a user and an AI assistant that is related to the above text.
[[[User]]] """
class Predictor(BasePredictor):
def setup(self):
n_gpus = torch.cuda.device_count()
self.llm = LLM(model='NousResearch/Genstruct-7B',
tensor_parallel_size=n_gpus)
def predict(self, title: str, content: str, temp:float=0.0, max_tokens:int=2000) -> str:
_p = prompt(title, content)
sampling_params = SamplingParams(temperature=temp, ignore_eos=True, max_tokens=max_tokens)
out = self.llm.generate(_p, sampling_params=sampling_params, use_tqdm=False)
return out[0].outputs[0].text
For more information, see these docs.