Source code for lexilux.chat.utils

"""
Utility functions for chat API.

Provides message normalization, finish_reason normalization, and usage parsing.
"""

from __future__ import annotations

from typing import Any

from lexilux.chat.models import MessagesLike
from lexilux.usage import Json, Usage


[docs] def normalize_messages( messages: MessagesLike, system: str | None = None, ) -> list[dict[str, Any]]: """ Normalize messages input to a list of message dictionaries. Supports multiple input formats with backward compatibility: - str: Converted to [{"role": "user", "content": str}] - List[Dict[str, str]]: Used as-is (legacy format, content is string) - List[Dict[str, Any]]: Used as-is (supports multimodal content as list) - List[str]: Converted to [{"role": "user", "content": str}, ...] Multimodal content is supported by passing content as a list of blocks: [{"type": "text", "text": "..."}, {"type": "image_url", "image_url": {...}}] Args: messages: Messages in various formats. system: Optional system message to prepend. Returns: Normalized list of message dictionaries. Examples: >>> # Simple string >>> normalize_messages("hi") [{'role': 'user', 'content': 'hi'}] >>> # Legacy format (content as string) >>> normalize_messages([{"role": "user", "content": "hi"}]) [{'role': 'user', 'content': 'hi'}] >>> # Multimodal format >>> normalize_messages([{ ... "role": "user", ... "content": [ ... {"type": "text", "text": "What's in this image?"}, ... {"type": "image_url", "image_url": {"url": "https://..."}} ... ] ... }]) >>> # With system message >>> normalize_messages("hi", system="You are helpful") [{'role': 'system', 'content': 'You are helpful'}, {'role': 'user', 'content': 'hi'}] """ result: list[dict[str, Any]] = [] # Add system message if provided if system: result.append({"role": "system", "content": system}) # Normalize messages if isinstance(messages, str): # Single string -> single user message result.append({"role": "user", "content": messages}) elif isinstance(messages, (list, tuple)): # List of messages for msg in messages: if isinstance(msg, str): # String in list -> user message result.append({"role": "user", "content": msg}) elif isinstance(msg, dict): # Dict -> validate and use as-is if "role" not in msg: raise ValueError( f"Invalid message dict: {msg}. Must have 'role' key." ) # Allow assistant messages with tool_calls to omit content # (OpenAI API allows content to be null/omitted when tool_calls exist) if "content" not in msg: if msg.get("role") == "assistant" and "tool_calls" in msg: content = None else: raise ValueError( f"Invalid message dict: {msg}. Must have 'content' key." ) else: content = msg["content"] # Validate content format (skip for None when tool_calls exist) if content is not None and not isinstance(content, (str, list)): raise ValueError( f"Invalid content type: {type(content)}. " "Content must be str or list of content blocks." ) # If content is a list, validate each block if isinstance(content, list): for i, block in enumerate(content): if not isinstance(block, dict): raise ValueError( f"Invalid content block at index {i}: {block}. " "Each block must be a dict." ) if "type" not in block: raise ValueError( f"Invalid content block at index {i}: {block}. " "Each block must have a 'type' key." ) # Message is valid, add it with all original fields # Preserve special fields like tool_calls, tool_call_id, etc. normalized_msg: dict[str, Any] = { "role": msg["role"], "content": content, } # Preserve tool_calls field for assistant messages if "tool_calls" in msg: normalized_msg["tool_calls"] = msg["tool_calls"] # Preserve tool_call_id field for tool messages if "tool_call_id" in msg: normalized_msg["tool_call_id"] = msg["tool_call_id"] # Preserve any other fields (for extensibility) for key, value in msg.items(): if key not in ("role", "content", "tool_calls", "tool_call_id"): normalized_msg[key] = value result.append(normalized_msg) else: raise ValueError( f"Invalid message type: {type(msg)}. Expected str or dict." ) else: raise ValueError( f"Invalid messages type: {type(messages)}. Expected str, list, or tuple." ) return result
def parse_usage(response_data: Json) -> Usage: """ Parse usage information from API response. Args: response_data: API response data. Returns: Usage object. """ usage_data = response_data.get("usage") if usage_data is None: usage_data = {} elif not isinstance(usage_data, dict): usage_data = {} return Usage( input_tokens=usage_data.get("prompt_tokens") or usage_data.get("input_tokens"), output_tokens=usage_data.get("completion_tokens") or usage_data.get("output_tokens"), total_tokens=usage_data.get("total_tokens"), details=usage_data, ) def normalize_finish_reason(finish_reason: Any) -> str | None: """ Normalize finish_reason to a valid string or None. Handles cases where compatible services may return invalid values: - None -> None - Empty string "" -> None - Valid string ("stop", "length", "content_filter") -> as-is - Other types (int, bool, etc.) -> None (defensive) Args: finish_reason: Raw finish_reason value from API. Returns: Normalized finish_reason (str or None). """ if finish_reason is None: return None if isinstance(finish_reason, str): # Empty string should be treated as None return finish_reason if finish_reason else None # For any other type (int, bool, list, etc.), return None defensively return None