import os
import re
from abc import ABC, abstractmethod
import openai
from darca_exception.exception import DarcaException
from darca_log_facility.logger import DarcaLogger
# === Custom Exceptions ===
[docs]
class LLMException(DarcaException):
"""
Base class for all darca-llm exceptions.
Inherits from :class:`DarcaException`.
"""
[docs]
class LLMAPIKeyMissing(LLMException):
"""
Raised when the API key is missing for the selected LLM provider.
This exception indicates that the environment variable for the API key
(e.g., ``OPENAI_API_KEY``) is not set, preventing the LLM from being used.
"""
[docs]
class LLMResponseError(LLMException):
"""
Raised when an LLM API request fails or returns malformed data.
This exception can be raised due to API connectivity issues, invalid
responses, or unexpected errors from the LLM provider.
"""
[docs]
class LLMContentFormatError(LLMException):
"""
Raised when the input content contains multiple code blocks or
when the response cannot be properly stripped of Markdown/code block
formatting.
This exception is specifically tailored to ensure that the LLM response
includes exactly one code block and that the format matches the expected
structure.
"""
# === Abstract Base Client ===
[docs]
class BaseLLMClient(ABC):
"""
Abstract base class for LLM backends. Provides shared logic for file
content processing.
Concrete implementations must implement the :meth:`get_raw_response`
method, which handles sending prompts to the respective LLM.
"""
[docs]
@abstractmethod
def get_raw_response(
self,
system: str,
user: str,
llm: str = "gpt-4",
temperature: float = 1.0,
) -> str:
"""
Send a raw prompt (consisting of a system message and a user message)
to the LLM and return the string response.
:param system: The system-level instructions or context for the LLM.
:type system: str
:param user: The user-level query or request for the LLM.
:type user: str
:param llm: The identifier for the LLM model to use (e.g., ``gpt-4``).
:type llm: str
:param temperature: The sampling temperature for the LLM, controlling
creativity in the response.
:type temperature: float
:return: The raw response text returned by the LLM.
:rtype: str
:raises LLMResponseError: If the LLM request fails or returns an
invalid response.
:raises LLMAPIKeyMissing: If the required API key is not set in
the environment.
"""
pass
[docs]
def get_file_content_response(
self,
system: str,
user: str,
llm: str = "gpt-4",
temperature: float = 1.0,
) -> str:
"""
Process a prompt to return the content of a single file.
This method:
1. Sends the prompt to the LLM via :meth:`get_raw_response`.
2. Verifies that the returned response contains exactly one code block.
3. Strips any Markdown or code block formatting from the response.
:param system: The system message for the LLM.
:type system: str
:param user: The user query or request for the LLM, typically
referencing a file content request.
:type user: str
:param llm: The identifier for the LLM model to use
(e.g., ``gpt-4``).
:type llm: str
:param temperature: The sampling temperature for the LLM, controlling
creativity in the response.
:type temperature: float
:return: Cleaned-up text containing the single file content.
:rtype: str
:raises LLMContentFormatError: If the response has multiple
code blocks, or if it cannot be properly stripped of
Markdown/code formatting.
"""
response = self.get_raw_response(system, user, llm, temperature)
if not self._has_single_block(response):
raise LLMContentFormatError(
message=(
"Expected a single file block in the response, "
"but found multiple."
),
error_code="LLM_CONTENT_MULTIBLOCK",
metadata={"response_preview": response[:100]},
)
cleaned_response = self._strip_markdown_prefix(response)
if not cleaned_response.strip():
raise LLMContentFormatError(
message=(
"The response could not be properly stripped of "
"markdown/code block formatting."
),
error_code="LLM_CONTENT_STRIP_ERROR",
metadata={"response_preview": response[:100]},
)
return cleaned_response
def _strip_markdown_prefix(self, text: str) -> str:
"""
Strip Markdown or code block prefix and suffix
(e.g., `````python ... ```).
This method uses a regular expression to detect code blocks
delimited by
triple backticks. If a match is found, it returns only the
content within
the code block, otherwise returns the original text stripped of
leading/trailing whitespace.
:param text: The text potentially containing Markdown or code
block formatting.
:type text: str
:return: The stripped text without the code block delimiters.
:rtype: str
"""
pattern = r"^```(?:[\w+-]*)\n([\s\S]*?)\n```$"
match = re.match(pattern, text.strip())
if match:
return match.group(
1
).strip() # Return only the content within the block
return (
text.strip()
) # Return the original text if it doesn't match the pattern
def _has_single_block(self, text: str) -> bool:
"""
Check if the text contains exactly one Markdown/code block.
:param text: The text that may contain zero, one, or multiple
code blocks.
:type text: str
:return: True if there is exactly one code block in the text,
False otherwise.
:rtype: bool
"""
blocks = re.findall(r"```(?:[\w+-]*)\n[\s\S]*?\n```", text)
return len(blocks) == 1
# === OpenAI Implementation ===
[docs]
class OpenAIClient(BaseLLMClient):
"""
LLM backend that uses OpenAI's GPT models via their official API.
This class implements the :class:`BaseLLMClient` interface, utilizing
the OpenAI Python client library to make requests to GPT models
(e.g., ``gpt-4``, ``gpt-3.5-turbo``).
"""
def __init__(self):
"""
Initialize the OpenAI client.
:raises LLMAPIKeyMissing: If the ``OPENAI_API_KEY`` environment
variable is not set.
"""
self.logger = DarcaLogger("darca-llm").get_logger()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise LLMAPIKeyMissing(
message="OPENAI_API_KEY environment variable is not set.",
error_code="LLM_API_KEY_MISSING",
metadata={"provider": "openai"},
)
openai.api_key = api_key
[docs]
def get_raw_response(
self,
system: str,
user: str,
llm: str = "gpt-4",
temperature: float = 1.0,
) -> str:
"""
Send a system and user prompt to OpenAI and return the chat response.
:param system: The system message providing context or instructions
to the LLM.
:type system: str
:param user: The user message, typically containing the main query.
:type user: str
:param llm: The identifier of the OpenAI model to
use (e.g., ``gpt-4``).
:type llm: str
:param temperature: Sampling temperature for the request to control
response randomness.
:type temperature: float
:return: The text content of the LLM response.
:rtype: str
:raises LLMResponseError: If the API request fails or the response
cannot be parsed.
"""
messages = [
{"role": "system", "content": system},
{"role": "user", "content": user},
]
try:
self.logger.debug("Sending prompt to OpenAI", extra={"model": llm})
response = openai.chat.completions.create(
model=llm, messages=messages, temperature=temperature
)
content = response.choices[0].message.content
self.logger.debug("Received response from OpenAI")
return content
except openai.OpenAIError as oe:
raise LLMResponseError(
message="OpenAI API returned an error.",
error_code="LLM_API_REQUEST_FAILED",
metadata={
"model": llm,
"temperature": temperature,
"prompt_preview": user[:100],
"system_prompt_preview": system[:100],
},
cause=oe,
)
except Exception as e:
raise LLMResponseError(
message="Unexpected failure during OpenAI response parsing.",
error_code="LLM_RESPONSE_PARSE_ERROR",
metadata={"model": llm, "temperature": temperature},
cause=e,
)
# === AIClient Wrapper ===
[docs]
class AIClient:
"""
A unified client for interacting with LLMs using the darca pluggable
backend system. Defaults to the OpenAI backend.
This class acts as a simple wrapper, delegating all method calls
to the selected backend. Currently, only ``openai`` is supported.
"""
def __init__(self, backend: str = "openai"):
"""
Initialize the AIClient with the given backend.
:param backend: The chosen LLM backend. Currently only ``openai``
is supported.
:type backend: str
:raises LLMException: If the requested backend is not supported.
"""
if backend == "openai":
self._client = OpenAIClient()
else:
raise LLMException(
message=f"LLM backend '{backend}' is not supported.",
error_code="LLM_UNSUPPORTED_BACKEND",
metadata={"requested_backend": backend},
)
def __getattr__(self, name):
"""
Delegate attribute or method calls to the selected backend client.
If this client does not have the attribute or method, it is fetched
from the underlying backend (e.g., :class:`OpenAIClient`).
:param name: The name of the attribute or method to be accessed.
:type name: str
:return: The attribute or method from the underlying backend client.
"""
return getattr(self._client, name)