Build LLM
- OpenAI
- Anthropic
- vLLM
- Transformers
- Llama.cpp
!pip install openai
from superduper_openai import OpenAIChatCompletion
llm = OpenAIChatCompletion(identifier='llm', model='gpt-3.5-turbo')
!pip install anthropic
from superduper_anthropic import AnthropicCompletions
import os
os.environ["ANTHROPIC_API_KEY"] = "sk-xxx"
predict_kwargs = {
"max_tokens": 1024,
"temperature": 0.8,
}
llm = AnthropicCompletions(identifier='llm', model='claude-2.1', predict_kwargs=predict_kwargs)
!pip install vllm
from superduper_vllm import VllmModel
predict_kwargs = {
"max_tokens": 1024,
"temperature": 0.8,
}
llm = VllmModel(
identifier="llm",
model_name="TheBloke/Mistral-7B-Instruct-v0.2-AWQ",
vllm_kwargs={
"gpu_memory_utilization": 0.7,
"max_model_len": 1024,
"quantization": "awq",
},
predict_kwargs=predict_kwargs,
)
!pip install transformers datasets bitsandbytes accelerate
from superduper_transformers import LLM
llm = LLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", load_in_8bit=True, device_map="cuda", identifier="llm", predict_kwargs=dict(max_new_tokens=128))
!pip install llama_cpp_python
# !huggingface-cli download TheBloke/Mistral-7B-Instruct-v0.2-GGUF mistral-7b-instruct-v0.2.Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False
from superduper_llama_cpp.model import LlamaCpp
llm = LlamaCpp(identifier="llm", model_name_or_path="mistral-7b-instruct-v0.2.Q4_K_M.gguf")
# test the llm model
llm.predict("Tell me about the superduper")