Skip to content

Add a New LLM Provider

PACE ships with AnthropicAdapter and LiteLLMAdapter. If you need to integrate a provider that LiteLLM doesn’t cover — a private inference endpoint, a custom wrapper, or a research model — you can implement the LLMAdapter interface directly.

The LLMAdapter interface

class LLMAdapter(ABC):
@abstractmethod
def complete(self, system: str, user: str, max_tokens: int = 4096) -> str:
"""Single-turn structured completion. Returns the response text.
Used by: PRIME, GATE, SENTINEL, CONDUIT (non-agentic agents).
"""
@abstractmethod
def chat(
self,
system: str,
messages: list[dict],
tools: list[dict] | None = None,
max_tokens: int = 8192,
) -> ChatResponse:
"""Multi-turn conversation with optional tool calling.
Used by: FORGE, SCRIBE (agentic tool-loop agents).
messages and tools are in Anthropic format.
"""

Message format

PACE uses Anthropic message format internally. Your adapter receives messages in this format and must return a ChatResponse:

# Input message examples (Anthropic format):
{"role": "user", "content": "Write a login endpoint."}
{"role": "assistant", "content": [
{"type": "text", "text": "I'll read the existing code first."},
{"type": "tool_use", "id": "toolu_1", "name": "read_file", "input": {"path": "src/auth.py"}},
]}
{"role": "user", "content": [
{"type": "tool_result", "tool_use_id": "toolu_1", "content": "# auth.py\n..."}
]}
# Return type:
@dataclass
class ChatResponse:
stop_reason: str # "end_turn" | "tool_use" | "max_tokens"
text: str | None # assistant text (may be None if only tool calls)
tool_calls: list[ToolCall] # empty list if no tool calls
@dataclass
class ToolCall:
id: str
name: str
input: dict

1 — Create the adapter file

"""MyModel LLM adapter for PACE."""
import requests
from .base import LLMAdapter, ChatResponse, ToolCall
class MyModelAdapter(LLMAdapter):
"""PACE LLM adapter for MyModel inference API."""
def __init__(self, model: str, api_key: str, base_url: str = "https://api.mymodel.ai"):
self.model = model
self.api_key = api_key
self.base_url = base_url
def complete(self, system: str, user: str, max_tokens: int = 4096) -> str:
"""Single-turn completion — used by PRIME, GATE, SENTINEL, CONDUIT."""
response = requests.post(
f"{self.base_url}/v1/complete",
headers={"Authorization": f"Bearer {self.api_key}"},
json={
"model": self.model,
"system": system,
"prompt": user,
"max_tokens": max_tokens,
},
timeout=120,
)
response.raise_for_status()
return response.json()["text"]
def chat(
self,
system: str,
messages: list[dict],
tools: list[dict] | None = None,
max_tokens: int = 8192,
) -> ChatResponse:
"""Multi-turn chat with tool calling — used by FORGE, SCRIBE."""
payload = {
"model": self.model,
"system": system,
"messages": self._convert_messages(messages),
"max_tokens": max_tokens,
}
if tools:
payload["tools"] = self._convert_tools(tools)
response = requests.post(
f"{self.base_url}/v1/chat",
headers={"Authorization": f"Bearer {self.api_key}"},
json=payload,
timeout=300,
)
response.raise_for_status()
return self._parse_response(response.json())
def _convert_messages(self, messages: list[dict]) -> list[dict]:
"""Convert Anthropic-format messages to your API format."""
converted = []
for msg in messages:
# Implement format conversion here
converted.append(msg)
return converted
def _convert_tools(self, tools: list[dict]) -> list[dict]:
"""Convert Anthropic tool definitions to your API format."""
return tools
def _parse_response(self, data: dict) -> ChatResponse:
"""Parse your API response into a ChatResponse."""
tool_calls = [
ToolCall(id=tc["id"], name=tc["name"], input=tc["arguments"])
for tc in data.get("tool_calls", [])
]
return ChatResponse(
stop_reason=data.get("stop_reason", "end_turn"),
text=data.get("text"),
tool_calls=tool_calls,
)

2 — Register the adapter

Open pace/llm/__init__.py:

from .mymodel import MyModelAdapter
def get_llm_adapter() -> LLMAdapter:
cfg = load_config()
provider = cfg.llm.provider
model = cfg.llm.model
if provider == "anthropic":
return AnthropicAdapter(model=model, api_key=os.environ["ANTHROPIC_API_KEY"])
elif provider == "litellm":
return LiteLLMAdapter(model=model, ...)
elif provider == "mymodel": # ← add this
return MyModelAdapter( # ← and this
model=model,
api_key=os.environ["MYMODEL_API_KEY"],
base_url=cfg.llm.base_url or "https://api.mymodel.ai",
)
else:
raise ValueError(f"Unknown LLM provider: {provider!r}")

3 — Configure and run

pace/pace.config.yaml
llm:
provider: mymodel
model: mymodel-large-v2
base_url: null
Terminal window
export MYMODEL_API_KEY="..."
python pace/orchestrator.py --day 1

Key implementation notes

Tool call ID uniqueness

FORGE and SCRIBE match tool results to tool calls by id. Ensure each tool call in your response has a unique ID string. If your API doesn’t return IDs, generate them:

import uuid
tool_calls = [
ToolCall(id=f"call_{uuid.uuid4().hex[:8]}", name=tc["name"], input=tc["args"])
for tc in data.get("tool_calls", [])
]

stop_reason values

PACE checks stop_reason to decide whether to continue the tool loop:

ValueMeaning
"tool_use"Model wants to call a tool — loop continues
"end_turn"Model is done — loop exits
"max_tokens"Context limit hit — treated as done

Map your API’s equivalent values to these strings.

Timeout handling

FORGE and SCRIBE loops can run for many turns. Set generous timeouts on your HTTP client (300+ seconds) and handle network errors gracefully.