Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
C-01: ExperienceExtractor 接入 ExperienceStore - GrowthIntegration.new() 创建 ExperienceExtractor 时注入 ExperienceStore - 经验持久化路径打通:extract_combined → persist_experiences → ExperienceStore C-02+C-03: 进化触发链路全链路接通 - create_middleware_chain() 注册 EvolutionMiddleware (priority 78) - MemoryMiddleware 持有 Arc<EvolutionMiddleware> 共享引用 - after_completion 中调用 check_evolution() → 推送 PendingEvolution - EvolutionMiddleware 在下次对话前注入进化建议到 system prompt H-01: FeedbackCollector loaded 标志修复 - load() 失败时保留 loaded=false,下次 save 重试 - 日志级别 debug → warn H-03: FeedbackCollector 内部可变性 - EvolutionEngine.feedback 改为 Arc<Mutex<FeedbackCollector>> - submit_feedback() 从 &mut self → &self,支持中间件 &self 调用路径 - GrowthIntegration.initialize() 从 &mut self → &self H-05: 删除空测试 test_parse_empty_response (无 assert) H-06: infer_experiences_from_memories() fallback - Outcome::Success → Outcome::Partial (反映推断不确定性)
382 lines
13 KiB
Rust
382 lines
13 KiB
Rust
//! Extraction Adapter - Bridges zclaw_growth::LlmDriverForExtraction with the Kernel's LlmDriver
|
|
//!
|
|
//! Implements the `LlmDriverForExtraction` trait by delegating to the Kernel's
|
|
//! `zclaw_runtime::driver::LlmDriver`, which already handles provider-specific
|
|
//! API calls (OpenAI, Anthropic, Gemini, etc.).
|
|
//!
|
|
//! This enables the Growth system's MemoryExtractor to call the LLM for memory
|
|
//! extraction from conversations.
|
|
|
|
use std::sync::Arc;
|
|
use zclaw_growth::extractor::{LlmDriverForExtraction, prompts};
|
|
use zclaw_growth::types::{ExtractedMemory, MemoryType};
|
|
use zclaw_runtime::driver::{CompletionRequest, ContentBlock, LlmDriver};
|
|
use zclaw_types::{Message, Result, SessionId};
|
|
|
|
/// Adapter that wraps the Kernel's `LlmDriver` to implement `LlmDriverForExtraction`.
|
|
///
|
|
/// The adapter translates extraction requests into completion requests that the
|
|
/// Kernel's LLM driver can process, then parses the structured JSON response
|
|
/// back into `ExtractedMemory` objects.
|
|
pub struct TauriExtractionDriver {
|
|
driver: Arc<dyn LlmDriver>,
|
|
model: String,
|
|
}
|
|
|
|
impl TauriExtractionDriver {
|
|
/// Create a new extraction driver wrapping the given LLM driver.
|
|
///
|
|
/// The `model` parameter specifies which model to use for extraction calls.
|
|
pub fn new(driver: Arc<dyn LlmDriver>, model: String) -> Self {
|
|
Self { driver, model }
|
|
}
|
|
|
|
/// Build a completion request from the extraction prompt and conversation messages.
|
|
fn build_request(
|
|
&self,
|
|
messages: &[Message],
|
|
extraction_type: MemoryType,
|
|
) -> CompletionRequest {
|
|
let extraction_prompt = prompts::get_extraction_prompt(extraction_type);
|
|
|
|
// Format conversation for the prompt
|
|
// Message is an enum with variants: User{content}, Assistant{content, thinking},
|
|
// System{content}, ToolUse{...}, ToolResult{...}
|
|
let conversation_text = messages
|
|
.iter()
|
|
.filter_map(|msg| {
|
|
match msg {
|
|
Message::User { content } => {
|
|
Some(format!("[User]: {}", content))
|
|
}
|
|
Message::Assistant { content, .. } => {
|
|
Some(format!("[Assistant]: {}", content))
|
|
}
|
|
Message::System { content } => {
|
|
Some(format!("[System]: {}", content))
|
|
}
|
|
// Skip tool use/result messages -- not relevant for memory extraction
|
|
Message::ToolUse { .. } | Message::ToolResult { .. } => None,
|
|
}
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join("\n\n");
|
|
|
|
let full_prompt = format!("{}{}", extraction_prompt, conversation_text);
|
|
|
|
CompletionRequest {
|
|
model: self.model.clone(),
|
|
system: Some(
|
|
"You are a memory extraction assistant. Analyze conversations and extract \
|
|
structured memories in valid JSON format. Always respond with valid JSON only, \
|
|
no additional text or markdown formatting."
|
|
.to_string(),
|
|
),
|
|
messages: vec![Message::user(full_prompt)],
|
|
tools: Vec::new(),
|
|
max_tokens: Some(2000),
|
|
temperature: Some(0.3),
|
|
stop: Vec::new(),
|
|
stream: false,
|
|
thinking_enabled: false,
|
|
reasoning_effort: None,
|
|
plan_mode: false,
|
|
}
|
|
}
|
|
|
|
/// Parse the LLM response text into a list of extracted memories.
|
|
fn parse_response(
|
|
&self,
|
|
response_text: &str,
|
|
extraction_type: MemoryType,
|
|
) -> Vec<ExtractedMemory> {
|
|
// Strip markdown code fences if present
|
|
let cleaned = response_text
|
|
.trim()
|
|
.trim_start_matches("```json")
|
|
.trim_start_matches("```")
|
|
.trim_end_matches("```")
|
|
.trim();
|
|
|
|
// Extract the JSON array from the response
|
|
let json_str = match (cleaned.find('['), cleaned.rfind(']')) {
|
|
(Some(start), Some(end)) => &cleaned[start..=end],
|
|
_ => {
|
|
tracing::warn!(
|
|
"[TauriExtractionDriver] No JSON array found in LLM response"
|
|
);
|
|
return Vec::new();
|
|
}
|
|
};
|
|
|
|
let raw_items: Vec<serde_json::Value> = match serde_json::from_str(json_str) {
|
|
Ok(items) => items,
|
|
Err(e) => {
|
|
tracing::warn!(
|
|
"[TauriExtractionDriver] Failed to parse extraction JSON: {}",
|
|
e
|
|
);
|
|
return Vec::new();
|
|
}
|
|
};
|
|
|
|
raw_items
|
|
.into_iter()
|
|
.filter_map(|item| self.parse_memory_item(&item, extraction_type))
|
|
.collect()
|
|
}
|
|
|
|
/// Parse a single memory item from JSON.
|
|
fn parse_memory_item(
|
|
&self,
|
|
value: &serde_json::Value,
|
|
fallback_type: MemoryType,
|
|
) -> Option<ExtractedMemory> {
|
|
let content = value.get("content")?.as_str()?.to_string();
|
|
let category = value
|
|
.get("category")
|
|
.and_then(|v| v.as_str())
|
|
.unwrap_or("unknown")
|
|
.to_string();
|
|
|
|
let confidence = value
|
|
.get("confidence")
|
|
.and_then(|v| v.as_f64())
|
|
.unwrap_or(0.7) as f32;
|
|
|
|
let keywords = value
|
|
.get("keywords")
|
|
.and_then(|v| v.as_array())
|
|
.map(|arr| {
|
|
arr.iter()
|
|
.filter_map(|v| v.as_str().map(String::from))
|
|
.collect()
|
|
})
|
|
.unwrap_or_default();
|
|
|
|
Some(
|
|
ExtractedMemory::new(fallback_type, category, content, SessionId::new())
|
|
.with_confidence(confidence)
|
|
.with_keywords(keywords),
|
|
)
|
|
}
|
|
}
|
|
|
|
#[async_trait::async_trait]
|
|
impl LlmDriverForExtraction for TauriExtractionDriver {
|
|
async fn extract_memories(
|
|
&self,
|
|
messages: &[Message],
|
|
extraction_type: MemoryType,
|
|
) -> Result<Vec<ExtractedMemory>> {
|
|
let type_name = format!("{}", extraction_type);
|
|
tracing::debug!(
|
|
"[TauriExtractionDriver] Extracting {} memories from {} messages",
|
|
type_name,
|
|
messages.len()
|
|
);
|
|
|
|
// Skip extraction if there are too few messages
|
|
if messages.len() < 2 {
|
|
tracing::debug!(
|
|
"[TauriExtractionDriver] Too few messages ({}) for extraction, skipping",
|
|
messages.len()
|
|
);
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
let request = self.build_request(messages, extraction_type);
|
|
|
|
let response = self.driver.complete(request).await.map_err(|e| {
|
|
tracing::error!(
|
|
"[TauriExtractionDriver] LLM completion failed for {}: {}",
|
|
type_name,
|
|
e
|
|
);
|
|
e
|
|
})?;
|
|
|
|
// Extract text content from response
|
|
let response_text: String = response
|
|
.content
|
|
.into_iter()
|
|
.filter_map(|block| match block {
|
|
ContentBlock::Text { text } => Some(text),
|
|
_ => None,
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join("");
|
|
|
|
if response_text.is_empty() {
|
|
tracing::warn!(
|
|
"[TauriExtractionDriver] Empty response from LLM for {} extraction",
|
|
type_name
|
|
);
|
|
return Ok(Vec::new());
|
|
}
|
|
|
|
let memories = self.parse_response(&response_text, extraction_type);
|
|
|
|
tracing::info!(
|
|
"[TauriExtractionDriver] Extracted {} {} memories",
|
|
memories.len(),
|
|
type_name
|
|
);
|
|
|
|
Ok(memories)
|
|
}
|
|
|
|
async fn extract_with_prompt(
|
|
&self,
|
|
messages: &[Message],
|
|
system_prompt: &str,
|
|
user_prompt: &str,
|
|
) -> Result<String> {
|
|
if messages.len() < 2 {
|
|
return Err(zclaw_types::ZclawError::InvalidInput(
|
|
"Too few messages for combined extraction".to_string(),
|
|
));
|
|
}
|
|
|
|
tracing::debug!(
|
|
"[TauriExtractionDriver] Combined extraction from {} messages",
|
|
messages.len()
|
|
);
|
|
|
|
let request = CompletionRequest {
|
|
model: self.model.clone(),
|
|
system: Some(system_prompt.to_string()),
|
|
messages: vec![Message::user(user_prompt.to_string())],
|
|
tools: Vec::new(),
|
|
max_tokens: Some(3000),
|
|
temperature: Some(0.3),
|
|
stop: Vec::new(),
|
|
stream: false,
|
|
thinking_enabled: false,
|
|
reasoning_effort: None,
|
|
plan_mode: false,
|
|
};
|
|
|
|
let response = self.driver.complete(request).await.map_err(|e| {
|
|
tracing::error!(
|
|
"[TauriExtractionDriver] Combined extraction LLM call failed: {}",
|
|
e
|
|
);
|
|
e
|
|
})?;
|
|
|
|
let response_text: String = response
|
|
.content
|
|
.into_iter()
|
|
.filter_map(|block| match block {
|
|
ContentBlock::Text { text } => Some(text),
|
|
_ => None,
|
|
})
|
|
.collect::<Vec<_>>()
|
|
.join("");
|
|
|
|
if response_text.is_empty() {
|
|
return Err(zclaw_types::ZclawError::LlmError(
|
|
"Empty response from LLM for combined extraction".to_string(),
|
|
));
|
|
}
|
|
|
|
tracing::info!(
|
|
"[TauriExtractionDriver] Combined extraction response: {} chars",
|
|
response_text.len()
|
|
);
|
|
|
|
Ok(response_text)
|
|
}
|
|
}
|
|
|
|
/// Global extraction driver instance (legacy path, kept for compatibility).
|
|
///
|
|
/// **Architecture note:** The Kernel struct now holds its own `extraction_driver` field
|
|
/// (set via `kernel.set_extraction_driver()`), which is the primary path used by
|
|
/// the middleware chain. This OnceCell global is a legacy artifact — its accessors
|
|
/// are dead code. The `configure_extraction_driver()` function is still called during
|
|
/// kernel_init for backward compatibility but the primary consumption path is
|
|
/// through the Kernel struct.
|
|
static EXTRACTION_DRIVER: tokio::sync::OnceCell<Arc<TauriExtractionDriver>> =
|
|
tokio::sync::OnceCell::const_new();
|
|
|
|
/// Configure the global extraction driver (legacy path).
|
|
///
|
|
/// Called during kernel initialization. The primary path is via
|
|
/// `kernel.set_extraction_driver()` which stores the driver in the Kernel struct
|
|
/// for use by the middleware chain.
|
|
pub fn configure_extraction_driver(driver: Arc<dyn LlmDriver>, model: String) {
|
|
let adapter = TauriExtractionDriver::new(driver, model);
|
|
let _ = EXTRACTION_DRIVER.set(Arc::new(adapter));
|
|
tracing::info!("[ExtractionAdapter] Extraction driver configured (legacy OnceCell path)");
|
|
}
|
|
|
|
/// Check if the extraction driver is available (legacy OnceCell path).
|
|
#[allow(dead_code)]
|
|
pub fn is_extraction_driver_configured() -> bool {
|
|
EXTRACTION_DRIVER.get().is_some()
|
|
}
|
|
|
|
/// Get the global extraction driver (legacy OnceCell path).
|
|
///
|
|
/// Prefer accessing via `kernel.extraction_driver()` when the Kernel is available.
|
|
#[allow(dead_code)]
|
|
pub fn get_extraction_driver() -> Option<Arc<TauriExtractionDriver>> {
|
|
EXTRACTION_DRIVER.get().cloned()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_extraction_driver_not_configured_by_default() {
|
|
assert!(!is_extraction_driver_configured());
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_valid_json_response() {
|
|
let response = r#"```json
|
|
[
|
|
{
|
|
"category": "communication-style",
|
|
"content": "User prefers concise replies",
|
|
"confidence": 0.9,
|
|
"keywords": ["concise", "style"]
|
|
},
|
|
{
|
|
"category": "language",
|
|
"content": "User prefers Chinese responses",
|
|
"confidence": 0.85,
|
|
"keywords": ["Chinese", "language"]
|
|
}
|
|
]
|
|
```"#;
|
|
|
|
// Verify the parsing logic works by manually simulating it
|
|
let cleaned = response
|
|
.trim()
|
|
.trim_start_matches("```json")
|
|
.trim_start_matches("```")
|
|
.trim_end_matches("```")
|
|
.trim();
|
|
|
|
let json_str = &cleaned[cleaned.find('[').unwrap()..=cleaned.rfind(']').unwrap()];
|
|
let items: Vec<serde_json::Value> = serde_json::from_str(json_str).unwrap();
|
|
assert_eq!(items.len(), 2);
|
|
assert_eq!(
|
|
items[0].get("category").unwrap().as_str().unwrap(),
|
|
"communication-style"
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_no_json_array() {
|
|
let response = "No memories could be extracted from this conversation.";
|
|
let has_array =
|
|
response.find('[').is_some() && response.rfind(']').is_some();
|
|
assert!(!has_array);
|
|
}
|
|
}
|