zclaw_openfang/crates/zclaw-runtime/src/driver/mod.rs

//! LLM Driver trait and implementations
//!
//! This module provides a unified interface for multiple LLM providers.

use async_trait::async_trait;
use futures::Stream;
use secrecy::SecretString;
use serde::{Deserialize, Serialize};
use std::pin::Pin;
use zclaw_types::Result;

use crate::stream::StreamChunk;

mod anthropic;
mod openai;
mod gemini;
mod local;
mod error_classifier;
mod retry_driver;

pub use anthropic::AnthropicDriver;
pub use openai::OpenAiDriver;
pub use gemini::GeminiDriver;
pub use local::LocalDriver;
pub use retry_driver::{RetryDriver, RetryConfig};

/// LLM Driver trait - unified interface for all providers
#[async_trait]
pub trait LlmDriver: Send + Sync {
    /// Get the provider name
    fn provider(&self) -> &str;

    /// Send a completion request
    async fn complete(&self, request: CompletionRequest) -> Result<CompletionResponse>;

    /// Send a streaming completion request
    /// Returns a stream of chunks
    fn stream(
        &self,
        request: CompletionRequest,
    ) -> Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send + '_>>;

    /// Check if the driver is properly configured
    fn is_configured(&self) -> bool;
}

/// Completion request
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompletionRequest {
    /// Model identifier
    pub model: String,
    /// System prompt
    pub system: Option<String>,
    /// Conversation messages
    pub messages: Vec<zclaw_types::Message>,
    /// Available tools
    pub tools: Vec<ToolDefinition>,
    /// Maximum tokens to generate
    pub max_tokens: Option<u32>,
    /// Temperature (0.0 - 1.0)
    pub temperature: Option<f32>,
    /// Stop sequences
    pub stop: Vec<String>,
    /// Enable streaming
    pub stream: bool,
    /// Enable extended thinking/reasoning
    #[serde(default)]
    pub thinking_enabled: bool,
    /// Reasoning effort level (for providers that support it)
    #[serde(default)]
    pub reasoning_effort: Option<String>,
    /// Enable plan mode
    #[serde(default)]
    pub plan_mode: bool,
}

impl Default for CompletionRequest {
    fn default() -> Self {
        Self {
            model: String::new(),
            system: None,
            messages: Vec::new(),
            tools: Vec::new(),
            max_tokens: Some(4096),
            temperature: Some(0.7),
            stop: Vec::new(),
            stream: false,
            thinking_enabled: false,
            reasoning_effort: None,
            plan_mode: false,
        }
    }
}

/// Tool definition for LLM function calling.
/// Re-exported from `zclaw_types::tool::ToolDefinition` (canonical definition).
pub use zclaw_types::tool::ToolDefinition;

/// Completion response
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CompletionResponse {
    /// Generated content blocks
    pub content: Vec<ContentBlock>,
    /// Model used
    pub model: String,
    /// Input tokens
    pub input_tokens: u32,
    /// Output tokens
    pub output_tokens: u32,
    /// Stop reason
    pub stop_reason: StopReason,
    /// Cache creation input tokens (Anthropic prompt caching)
    #[serde(default)]
    pub cache_creation_input_tokens: Option<u32>,
    /// Cache read input tokens (Anthropic prompt caching)
    #[serde(default)]
    pub cache_read_input_tokens: Option<u32>,
}

/// LLM driver response content block (subset of canonical zclaw_types::ContentBlock).
/// Used internally by Anthropic/OpenAI/Gemini/Local drivers for API response parsing.
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentBlock {
    Text { text: String },
    Thinking { thinking: String },
    ToolUse { id: String, name: String, input: serde_json::Value },
    /// Anthropic API tool result — must be sent as `role: "user"` with this content block.
    ToolResult {
        tool_use_id: String,
        content: String,
        #[serde(skip_serializing_if = "std::ops::Not::not")]
        is_error: bool,
    },
}

/// Stop reason
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum StopReason {
    EndTurn,
    MaxTokens,
    StopSequence,
    ToolUse,
    Error,
}

/// Driver configuration
#[derive(Debug, Clone)]
pub enum DriverConfig {
    Anthropic { api_key: SecretString },
    OpenAi { api_key: SecretString, base_url: Option<String> },
    Gemini { api_key: SecretString },
    Local { base_url: String },
}

impl DriverConfig {
    pub fn anthropic(api_key: impl Into<String>) -> Self {
        Self::Anthropic {
            api_key: SecretString::new(api_key.into()),
        }
    }

    pub fn openai(api_key: impl Into<String>) -> Self {
        Self::OpenAi {
            api_key: SecretString::new(api_key.into()),
            base_url: None,
        }
    }

    pub fn openai_with_base(api_key: impl Into<String>, base_url: impl Into<String>) -> Self {
        Self::OpenAi {
            api_key: SecretString::new(api_key.into()),
            base_url: Some(base_url.into()),
        }
    }

    pub fn gemini(api_key: impl Into<String>) -> Self {
        Self::Gemini {
            api_key: SecretString::new(api_key.into()),
        }
    }

    pub fn ollama() -> Self {
        Self::Local {
            base_url: "http://localhost:11434".to_string(),
        }
    }

    pub fn local(base_url: impl Into<String>) -> Self {
        Self::Local {
            base_url: base_url.into(),
        }
    }
}