ollama models
Source code is located at here.
# wrapper around ollama for LLMs
import re
import time
import ollama
from aios.llm_core.cores.base import BaseLLM
from aios.utils import get_from_env
from cerebrum.llm.communication import Response
class OllamaLLM(BaseLLM):
def __init__(
self,
llm_name: str,
max_gpu_memory: dict = None,
eval_device: str = None,
max_new_tokens: int = 256,
log_mode: str = "console",
use_context_manager=False,
):
super().__init__(
llm_name,
max_gpu_memory,
eval_device,
max_new_tokens,
log_mode,
use_context_manager,
)
def load_llm_and_tokenizer(self) -> None:
self.model = None
self.tokenizer = None
def address_syscall(self, llm_syscall, temperature=0.0):
# ensures the models are from ollama
# print(self.model_name)
assert re.search(r"ollama", self.model_name, re.IGNORECASE)
""" simple wrapper around ollama functions """
llm_syscall.set_status("executing")
llm_syscall.set_start_time(time.time())
messages = llm_syscall.query.messages
tools = llm_syscall.query.tools
message_return_type = llm_syscall.query.message_return_type
self.logger.log(
f"{llm_syscall.agent_name} is switched to executing.\n", level="executing"
)
# with and without overhead for tool handling
# print(messages)
# print(tools)
if tools:
messages = self.tool_calling_input_format(messages, tools)
try:
chat_result = ollama.chat(
model=self.model_name.split("/")[-1], messages=messages
)
# print(f"***** original response: {response} *****")
tool_calls = self.parse_tool_calls(chat_result["message"]["content"])
# print(tool_calls)
if tool_calls:
response = Response(
response_message=None, tool_calls=tool_calls, finished=True
)
else:
response = Response(
response_message=response["message"]["content"], finished=True
)
except Exception as e:
response = Response(
response_message=response["message"]["content"], finished=True
)
else:
try:
chat_result = ollama.chat(
model=self.model_name.split("/")[-1],
messages=messages,
options=ollama.Options(num_predict=self.max_new_tokens),
)
result = chat_result["message"]["content"]
# print(f"***** original result: {result} *****")
if message_return_type == "json":
result = self.parse_json_format(result)
response = Response(response_message=result, finished=True)
except Exception as e:
response = Response(
response_message=f"An unexpected error occurred: {e}", finished=True
)
return response
Last updated