Files
zclaw_openfang/desktop/src-tauri/src/intelligence/extraction_adapter.rs
iven d9b0b4f4f7
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
fix(audit): Batch 7-9 dead_code 标注 + TODO 清理 + 文档同步
Batch 7: dead_code 标注统一 (16 处)
- crates/ 9 处: growth, kernel, pipeline, runtime, saas, skills
- src-tauri/ 7 处: classroom, intelligence, browser, mcp
- 统一格式: #[allow(dead_code)] // @reserved: <原因>

Batch 7+: EvolutionEngine L2/L3 10 个未使用 pub 函数
- 全部标注 @reserved: EvolutionEngine L2/L3, post-release integration

Batch 9: TODO → FUTURE 标记 (4 处)
- html.rs: template-based export
- nl_schedule.rs: LLM-assisted parsing
- knowledge/handlers.rs: category_id from upload
- personality_detector.rs: VikingStorage persistence

Batch 5+: Cargo.lock 更新 (serde_yaml_bw 迁移)

全量测试通过: 719 passed, 0 failed
2026-04-19 08:54:57 +08:00

382 lines
13 KiB
Rust

//! Extraction Adapter - Bridges zclaw_growth::LlmDriverForExtraction with the Kernel's LlmDriver
//!
//! Implements the `LlmDriverForExtraction` trait by delegating to the Kernel's
//! `zclaw_runtime::driver::LlmDriver`, which already handles provider-specific
//! API calls (OpenAI, Anthropic, Gemini, etc.).
//!
//! This enables the Growth system's MemoryExtractor to call the LLM for memory
//! extraction from conversations.
use std::sync::Arc;
use zclaw_growth::extractor::{LlmDriverForExtraction, prompts};
use zclaw_growth::types::{ExtractedMemory, MemoryType};
use zclaw_runtime::driver::{CompletionRequest, ContentBlock, LlmDriver};
use zclaw_types::{Message, Result, SessionId};
/// Adapter that wraps the Kernel's `LlmDriver` to implement `LlmDriverForExtraction`.
///
/// The adapter translates extraction requests into completion requests that the
/// Kernel's LLM driver can process, then parses the structured JSON response
/// back into `ExtractedMemory` objects.
pub struct TauriExtractionDriver {
driver: Arc<dyn LlmDriver>,
model: String,
}
impl TauriExtractionDriver {
/// Create a new extraction driver wrapping the given LLM driver.
///
/// The `model` parameter specifies which model to use for extraction calls.
pub fn new(driver: Arc<dyn LlmDriver>, model: String) -> Self {
Self { driver, model }
}
/// Build a completion request from the extraction prompt and conversation messages.
fn build_request(
&self,
messages: &[Message],
extraction_type: MemoryType,
) -> CompletionRequest {
let extraction_prompt = prompts::get_extraction_prompt(extraction_type);
// Format conversation for the prompt
// Message is an enum with variants: User{content}, Assistant{content, thinking},
// System{content}, ToolUse{...}, ToolResult{...}
let conversation_text = messages
.iter()
.filter_map(|msg| {
match msg {
Message::User { content } => {
Some(format!("[User]: {}", content))
}
Message::Assistant { content, .. } => {
Some(format!("[Assistant]: {}", content))
}
Message::System { content } => {
Some(format!("[System]: {}", content))
}
// Skip tool use/result messages -- not relevant for memory extraction
Message::ToolUse { .. } | Message::ToolResult { .. } => None,
}
})
.collect::<Vec<_>>()
.join("\n\n");
let full_prompt = format!("{}{}", extraction_prompt, conversation_text);
CompletionRequest {
model: self.model.clone(),
system: Some(
"You are a memory extraction assistant. Analyze conversations and extract \
structured memories in valid JSON format. Always respond with valid JSON only, \
no additional text or markdown formatting."
.to_string(),
),
messages: vec![Message::user(full_prompt)],
tools: Vec::new(),
max_tokens: Some(2000),
temperature: Some(0.3),
stop: Vec::new(),
stream: false,
thinking_enabled: false,
reasoning_effort: None,
plan_mode: false,
}
}
/// Parse the LLM response text into a list of extracted memories.
fn parse_response(
&self,
response_text: &str,
extraction_type: MemoryType,
) -> Vec<ExtractedMemory> {
// Strip markdown code fences if present
let cleaned = response_text
.trim()
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
// Extract the JSON array from the response
let json_str = match (cleaned.find('['), cleaned.rfind(']')) {
(Some(start), Some(end)) => &cleaned[start..=end],
_ => {
tracing::warn!(
"[TauriExtractionDriver] No JSON array found in LLM response"
);
return Vec::new();
}
};
let raw_items: Vec<serde_json::Value> = match serde_json::from_str(json_str) {
Ok(items) => items,
Err(e) => {
tracing::warn!(
"[TauriExtractionDriver] Failed to parse extraction JSON: {}",
e
);
return Vec::new();
}
};
raw_items
.into_iter()
.filter_map(|item| self.parse_memory_item(&item, extraction_type))
.collect()
}
/// Parse a single memory item from JSON.
fn parse_memory_item(
&self,
value: &serde_json::Value,
fallback_type: MemoryType,
) -> Option<ExtractedMemory> {
let content = value.get("content")?.as_str()?.to_string();
let category = value
.get("category")
.and_then(|v| v.as_str())
.unwrap_or("unknown")
.to_string();
let confidence = value
.get("confidence")
.and_then(|v| v.as_f64())
.unwrap_or(0.7) as f32;
let keywords = value
.get("keywords")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default();
Some(
ExtractedMemory::new(fallback_type, category, content, SessionId::new())
.with_confidence(confidence)
.with_keywords(keywords),
)
}
}
#[async_trait::async_trait]
impl LlmDriverForExtraction for TauriExtractionDriver {
async fn extract_memories(
&self,
messages: &[Message],
extraction_type: MemoryType,
) -> Result<Vec<ExtractedMemory>> {
let type_name = format!("{}", extraction_type);
tracing::debug!(
"[TauriExtractionDriver] Extracting {} memories from {} messages",
type_name,
messages.len()
);
// Skip extraction if there are too few messages
if messages.len() < 2 {
tracing::debug!(
"[TauriExtractionDriver] Too few messages ({}) for extraction, skipping",
messages.len()
);
return Ok(Vec::new());
}
let request = self.build_request(messages, extraction_type);
let response = self.driver.complete(request).await.map_err(|e| {
tracing::error!(
"[TauriExtractionDriver] LLM completion failed for {}: {}",
type_name,
e
);
e
})?;
// Extract text content from response
let response_text: String = response
.content
.into_iter()
.filter_map(|block| match block {
ContentBlock::Text { text } => Some(text),
_ => None,
})
.collect::<Vec<_>>()
.join("");
if response_text.is_empty() {
tracing::warn!(
"[TauriExtractionDriver] Empty response from LLM for {} extraction",
type_name
);
return Ok(Vec::new());
}
let memories = self.parse_response(&response_text, extraction_type);
tracing::info!(
"[TauriExtractionDriver] Extracted {} {} memories",
memories.len(),
type_name
);
Ok(memories)
}
async fn extract_with_prompt(
&self,
messages: &[Message],
system_prompt: &str,
user_prompt: &str,
) -> Result<String> {
if messages.len() < 2 {
return Err(zclaw_types::ZclawError::InvalidInput(
"Too few messages for combined extraction".to_string(),
));
}
tracing::debug!(
"[TauriExtractionDriver] Combined extraction from {} messages",
messages.len()
);
let request = CompletionRequest {
model: self.model.clone(),
system: Some(system_prompt.to_string()),
messages: vec![Message::user(user_prompt.to_string())],
tools: Vec::new(),
max_tokens: Some(3000),
temperature: Some(0.3),
stop: Vec::new(),
stream: false,
thinking_enabled: false,
reasoning_effort: None,
plan_mode: false,
};
let response = self.driver.complete(request).await.map_err(|e| {
tracing::error!(
"[TauriExtractionDriver] Combined extraction LLM call failed: {}",
e
);
e
})?;
let response_text: String = response
.content
.into_iter()
.filter_map(|block| match block {
ContentBlock::Text { text } => Some(text),
_ => None,
})
.collect::<Vec<_>>()
.join("");
if response_text.is_empty() {
return Err(zclaw_types::ZclawError::LlmError(
"Empty response from LLM for combined extraction".to_string(),
));
}
tracing::info!(
"[TauriExtractionDriver] Combined extraction response: {} chars",
response_text.len()
);
Ok(response_text)
}
}
/// Global extraction driver instance (legacy path, kept for compatibility).
///
/// **Architecture note:** The Kernel struct now holds its own `extraction_driver` field
/// (set via `kernel.set_extraction_driver()`), which is the primary path used by
/// the middleware chain. This OnceCell global is a legacy artifact — its accessors
/// are dead code. The `configure_extraction_driver()` function is still called during
/// kernel_init for backward compatibility but the primary consumption path is
/// through the Kernel struct.
static EXTRACTION_DRIVER: tokio::sync::OnceCell<Arc<TauriExtractionDriver>> =
tokio::sync::OnceCell::const_new();
/// Configure the global extraction driver (legacy path).
///
/// Called during kernel initialization. The primary path is via
/// `kernel.set_extraction_driver()` which stores the driver in the Kernel struct
/// for use by the middleware chain.
pub fn configure_extraction_driver(driver: Arc<dyn LlmDriver>, model: String) {
let adapter = TauriExtractionDriver::new(driver, model);
let _ = EXTRACTION_DRIVER.set(Arc::new(adapter));
tracing::info!("[ExtractionAdapter] Extraction driver configured (legacy OnceCell path)");
}
/// Check if the extraction driver is available (legacy OnceCell path).
#[allow(dead_code)] // @reserved: diagnostic check for legacy OnceCell extraction driver
pub fn is_extraction_driver_configured() -> bool {
EXTRACTION_DRIVER.get().is_some()
}
/// Get the global extraction driver (legacy OnceCell path).
///
/// Prefer accessing via `kernel.extraction_driver()` when the Kernel is available.
#[allow(dead_code)] // @reserved: legacy accessor, prefer kernel.extraction_driver()
pub fn get_extraction_driver() -> Option<Arc<TauriExtractionDriver>> {
EXTRACTION_DRIVER.get().cloned()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extraction_driver_not_configured_by_default() {
assert!(!is_extraction_driver_configured());
}
#[test]
fn test_parse_valid_json_response() {
let response = r#"```json
[
{
"category": "communication-style",
"content": "User prefers concise replies",
"confidence": 0.9,
"keywords": ["concise", "style"]
},
{
"category": "language",
"content": "User prefers Chinese responses",
"confidence": 0.85,
"keywords": ["Chinese", "language"]
}
]
```"#;
// Verify the parsing logic works by manually simulating it
let cleaned = response
.trim()
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
let json_str = &cleaned[cleaned.find('[').unwrap()..=cleaned.rfind(']').unwrap()];
let items: Vec<serde_json::Value> = serde_json::from_str(json_str).unwrap();
assert_eq!(items.len(), 2);
assert_eq!(
items[0].get("category").unwrap().as_str().unwrap(),
"communication-style"
);
}
#[test]
fn test_parse_no_json_array() {
let response = "No memories could be extracted from this conversation.";
let has_array =
response.find('[').is_some() && response.rfind(']').is_some();
assert!(!has_array);
}
}