Skip to content

OpenAI Backend LLM Implementation

This section details the OpenAI implementation of the BaseBackendLLM interface.

memora.llm_backends.OpenAIBackendLLM

OpenAIBackendLLM(
    api_key: str,
    organization: str | None = None,
    project: str | None = None,
    model: str = "gpt-4o",
    temperature: float = 0.7,
    top_p: float = 1,
    max_tokens: int = 1024,
    max_retries: int = 3,
)

Bases: BaseBackendLLM

PARAMETER DESCRIPTION
api_key

The API key to use for authentication

TYPE: str

organization

Your OpenAI organization ID

TYPE: str | None DEFAULT: None

project

Your OpenAI project ID

TYPE: str | None DEFAULT: None

model

The name of the OpenAI model to use

TYPE: str DEFAULT: 'gpt-4o'

temperature

The temperature to use for sampling

TYPE: float DEFAULT: 0.7

top_p

The top_p value to use for sampling

TYPE: float DEFAULT: 1

max_tokens

The maximum number of tokens to generate

TYPE: int DEFAULT: 1024

max_retries

The maximum number of retries to make if a request fails

TYPE: int DEFAULT: 3

Example
from memora.llm_backends import OpenAIBackendLLM

openai_backend_llm = OpenAIBackendLLM(
    api_key="OPENAI_API_KEY",
    model="gpt-4o"
)
Source code in memora/llm_backends/openai_backend_llm.py
def __init__(
    self,
    api_key: str,
    organization: str | None = None,
    project: str | None = None,
    model: str = "gpt-4o",
    temperature: float = 0.7,
    top_p: float = 1,
    max_tokens: int = 1024,
    max_retries: int = 3,
):
    """
    Initialize the OpenAIBackendLLM class with specific parameters.

    Args:
        api_key (str): The API key to use for authentication
        organization (str | None): Your OpenAI organization ID
        project (str | None): Your OpenAI project ID
        model (str): The name of the OpenAI model to use
        temperature (float): The temperature to use for sampling
        top_p (float): The top_p value to use for sampling
        max_tokens (int): The maximum number of tokens to generate
        max_retries (int): The maximum number of retries to make if a request fails

    Example:
        ```python
        from memora.llm_backends import OpenAIBackendLLM

        openai_backend_llm = OpenAIBackendLLM(
            api_key="OPENAI_API_KEY",
            model="gpt-4o"
        )
        ```
    """

    self.openai_client = AsyncOpenAI(
        api_key=api_key,
        organization=organization,
        project=project,
        max_retries=max_retries,
    )

    self.model = model
    self.temperature = temperature
    self.top_p = top_p
    self.max_tokens = max_tokens

Attributes

get_model_kwargs property

get_model_kwargs: Dict[str, Any]

Returns dictionary of model configuration parameters

max_tokens instance-attribute

max_tokens = max_tokens

model instance-attribute

model = model

openai_client instance-attribute

openai_client = AsyncOpenAI(
    api_key=api_key,
    organization=organization,
    project=project,
    max_retries=max_retries,
)

temperature instance-attribute

temperature = temperature

top_p instance-attribute

top_p = top_p

Functions

__call__ async

__call__(
    messages: List[Dict[str, str]],
    output_schema_model: Type[BaseModel] | None = None,
) -> Union[str, BaseModel]

Process messages and generate response (📌 Streaming is not supported, as full response is required at once)

PARAMETER DESCRIPTION
messages

List of message dicts with role and content e.g [{"role": "user", "content": "Hello!"}, ...]

TYPE: List[Dict[str, str]]

output_schema_model

Optional Pydantic base model for structured output.

TYPE: Type[BaseModel] | None DEFAULT: None

RETURNS DESCRIPTION
Union[str, BaseModel]

Union[str, BaseModel]: Generated text response as a string, or an instance of the output schema model if specified

Source code in memora/llm_backends/openai_backend_llm.py
@override
async def __call__(
    self,
    messages: List[Dict[str, str]],
    output_schema_model: Type[BaseModel] | None = None,
) -> Union[str, BaseModel]:
    """
    Process messages and generate response (📌 Streaming is not supported, as full response is required at once)

    Args:
        messages (List[Dict[str, str]]): List of message dicts with role and content e.g [{"role": "user", "content": "Hello!"}, ...]
        output_schema_model (Type[BaseModel] | None): Optional Pydantic base model for structured output.

    Returns:
        Union[str, BaseModel]: Generated text response as a string, or an instance of the output schema model if specified
    """

    if output_schema_model:
        response = await self.openai_client.beta.chat.completions.parse(
            messages=messages,
            **self.get_model_kwargs,
            response_format=output_schema_model,
        )
        return response.choices[0].message.parsed
    else:
        response = await self.openai_client.chat.completions.create(
            messages=messages,
            **self.get_model_kwargs,
        )
        return response.choices[0].message.content

close async

close() -> None

Closes the LLM connection.

Source code in memora/llm_backends/openai_backend_llm.py
@override
async def close(self) -> None:
    """Closes the LLM connection."""

    await self.openai_client.close()
    self.openai_client = None