fix(presentation): 修复 presentation 模块类型错误和语法问题
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
- 创建 types.ts 定义完整的类型系统 - 重写 DocumentRenderer.tsx 修复语法错误 - 重写 QuizRenderer.tsx 修复语法错误 - 重写 PresentationContainer.tsx 添加类型守卫 - 重写 TypeSwitcher.tsx 修复类型引用 - 更新 index.ts 移除不存在的 ChartRenderer 导出 审计结果: - 类型检查: 通过 - 单元测试: 222 passed - 构建: 成功
This commit is contained in:
352
crates/zclaw-growth/src/retrieval/query.rs
Normal file
352
crates/zclaw-growth/src/retrieval/query.rs
Normal file
@@ -0,0 +1,352 @@
|
||||
//! Query Analyzer
|
||||
//!
|
||||
//! Provides query analysis and expansion capabilities for improved retrieval.
|
||||
//! Extracts keywords, identifies intent, and generates search variations.
|
||||
|
||||
use crate::types::MemoryType;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Query analysis result
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AnalyzedQuery {
|
||||
/// Original query string
|
||||
pub original: String,
|
||||
/// Extracted keywords
|
||||
pub keywords: Vec<String>,
|
||||
/// Query intent
|
||||
pub intent: QueryIntent,
|
||||
/// Memory types to search (inferred from query)
|
||||
pub target_types: Vec<MemoryType>,
|
||||
/// Expanded search terms
|
||||
pub expansions: Vec<String>,
|
||||
}
|
||||
|
||||
/// Query intent classification
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum QueryIntent {
|
||||
/// Looking for preferences/settings
|
||||
Preference,
|
||||
/// Looking for factual knowledge
|
||||
Knowledge,
|
||||
/// Looking for how-to/experience
|
||||
Experience,
|
||||
/// General conversation
|
||||
General,
|
||||
/// Code-related query
|
||||
Code,
|
||||
/// Configuration query
|
||||
Configuration,
|
||||
}
|
||||
|
||||
/// Query analyzer
|
||||
pub struct QueryAnalyzer {
|
||||
/// Keywords that indicate preference queries
|
||||
preference_indicators: HashSet<String>,
|
||||
/// Keywords that indicate knowledge queries
|
||||
knowledge_indicators: HashSet<String>,
|
||||
/// Keywords that indicate experience queries
|
||||
experience_indicators: HashSet<String>,
|
||||
/// Keywords that indicate code queries
|
||||
code_indicators: HashSet<String>,
|
||||
/// Stop words to filter out
|
||||
stop_words: HashSet<String>,
|
||||
}
|
||||
|
||||
impl QueryAnalyzer {
|
||||
/// Create a new query analyzer
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
preference_indicators: [
|
||||
"prefer", "like", "want", "favorite", "favourite", "style",
|
||||
"format", "language", "setting", "preference", "usually",
|
||||
"typically", "always", "never", "习惯", "偏好", "喜欢", "想要",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect(),
|
||||
knowledge_indicators: [
|
||||
"what", "how", "why", "explain", "tell", "know", "learn",
|
||||
"understand", "meaning", "definition", "concept", "theory",
|
||||
"是什么", "怎么", "为什么", "解释", "了解", "知道",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect(),
|
||||
experience_indicators: [
|
||||
"experience", "tried", "used", "before", "last time",
|
||||
"previous", "history", "remember", "recall", "when",
|
||||
"经验", "尝试", "用过", "上次", "记得", "回忆",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect(),
|
||||
code_indicators: [
|
||||
"code", "function", "class", "method", "variable", "type",
|
||||
"error", "bug", "fix", "implement", "refactor", "api",
|
||||
"代码", "函数", "类", "方法", "变量", "错误", "修复", "实现",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect(),
|
||||
stop_words: [
|
||||
"the", "a", "an", "is", "are", "was", "were", "be", "been",
|
||||
"have", "has", "had", "do", "does", "did", "will", "would",
|
||||
"could", "should", "may", "might", "must", "can", "to", "of",
|
||||
"in", "for", "on", "with", "at", "by", "from", "as", "and",
|
||||
"or", "but", "if", "then", "else", "when", "where", "which",
|
||||
"who", "whom", "whose", "this", "that", "these", "those",
|
||||
]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Analyze a query string
|
||||
pub fn analyze(&self, query: &str) -> AnalyzedQuery {
|
||||
let keywords = self.extract_keywords(query);
|
||||
let intent = self.classify_intent(&keywords);
|
||||
let target_types = self.infer_memory_types(intent, &keywords);
|
||||
let expansions = self.expand_query(&keywords);
|
||||
|
||||
AnalyzedQuery {
|
||||
original: query.to_string(),
|
||||
keywords,
|
||||
intent,
|
||||
target_types,
|
||||
expansions,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract keywords from query
|
||||
fn extract_keywords(&self, query: &str) -> Vec<String> {
|
||||
query
|
||||
.to_lowercase()
|
||||
.split(|c: char| !c.is_alphanumeric() && !is_cjk(c))
|
||||
.filter(|s| !s.is_empty() && s.len() > 1)
|
||||
.filter(|s| !self.stop_words.contains(*s))
|
||||
.map(|s| s.to_string())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Classify query intent
|
||||
fn classify_intent(&self, keywords: &[String]) -> QueryIntent {
|
||||
let mut scores = [
|
||||
(QueryIntent::Preference, 0),
|
||||
(QueryIntent::Knowledge, 0),
|
||||
(QueryIntent::Experience, 0),
|
||||
(QueryIntent::Code, 0),
|
||||
];
|
||||
|
||||
for keyword in keywords {
|
||||
if self.preference_indicators.contains(keyword) {
|
||||
scores[0].1 += 2;
|
||||
}
|
||||
if self.knowledge_indicators.contains(keyword) {
|
||||
scores[1].1 += 2;
|
||||
}
|
||||
if self.experience_indicators.contains(keyword) {
|
||||
scores[2].1 += 2;
|
||||
}
|
||||
if self.code_indicators.contains(keyword) {
|
||||
scores[3].1 += 2;
|
||||
}
|
||||
}
|
||||
|
||||
// Find highest scoring intent
|
||||
scores.sort_by(|a, b| b.1.cmp(&a.1));
|
||||
|
||||
if scores[0].1 > 0 {
|
||||
scores[0].0
|
||||
} else {
|
||||
QueryIntent::General
|
||||
}
|
||||
}
|
||||
|
||||
/// Infer which memory types to search
|
||||
fn infer_memory_types(&self, intent: QueryIntent, _keywords: &[String]) -> Vec<MemoryType> {
|
||||
let mut types = Vec::new();
|
||||
|
||||
match intent {
|
||||
QueryIntent::Preference => {
|
||||
types.push(MemoryType::Preference);
|
||||
}
|
||||
QueryIntent::Knowledge | QueryIntent::Code => {
|
||||
types.push(MemoryType::Knowledge);
|
||||
types.push(MemoryType::Experience);
|
||||
}
|
||||
QueryIntent::Experience => {
|
||||
types.push(MemoryType::Experience);
|
||||
types.push(MemoryType::Knowledge);
|
||||
}
|
||||
QueryIntent::General => {
|
||||
// Search all types
|
||||
types.push(MemoryType::Preference);
|
||||
types.push(MemoryType::Knowledge);
|
||||
types.push(MemoryType::Experience);
|
||||
}
|
||||
QueryIntent::Configuration => {
|
||||
types.push(MemoryType::Preference);
|
||||
types.push(MemoryType::Knowledge);
|
||||
}
|
||||
}
|
||||
|
||||
types
|
||||
}
|
||||
|
||||
/// Expand query with related terms
|
||||
fn expand_query(&self, keywords: &[String]) -> Vec<String> {
|
||||
let mut expansions = Vec::new();
|
||||
|
||||
// Add stemmed variations (simplified)
|
||||
for keyword in keywords {
|
||||
// Add singular/plural variations
|
||||
if keyword.ends_with('s') && keyword.len() > 3 {
|
||||
expansions.push(keyword[..keyword.len()-1].to_string());
|
||||
} else {
|
||||
expansions.push(format!("{}s", keyword));
|
||||
}
|
||||
|
||||
// Add common synonyms (simplified)
|
||||
if let Some(synonyms) = self.get_synonyms(keyword) {
|
||||
expansions.extend(synonyms);
|
||||
}
|
||||
}
|
||||
|
||||
expansions
|
||||
}
|
||||
|
||||
/// Get synonyms for a keyword (simplified)
|
||||
fn get_synonyms(&self, keyword: &str) -> Option<Vec<String>> {
|
||||
let synonyms: &[&str] = match keyword {
|
||||
"code" => &["program", "script", "source"],
|
||||
"error" => &["bug", "issue", "problem", "exception"],
|
||||
"fix" => &["solve", "resolve", "repair", "patch"],
|
||||
"fast" => &["quick", "speed", "performance", "efficient"],
|
||||
"slow" => &["performance", "optimize", "speed"],
|
||||
"help" => &["assist", "support", "guide", "aid"],
|
||||
"learn" => &["study", "understand", "know", "grasp"],
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(synonyms.iter().map(|s| s.to_string()).collect())
|
||||
}
|
||||
|
||||
/// Generate search queries from analyzed query
|
||||
pub fn generate_search_queries(&self, analyzed: &AnalyzedQuery) -> Vec<String> {
|
||||
let mut queries = vec![analyzed.original.clone()];
|
||||
|
||||
// Add keyword-based query
|
||||
if !analyzed.keywords.is_empty() {
|
||||
queries.push(analyzed.keywords.join(" "));
|
||||
}
|
||||
|
||||
// Add expanded terms
|
||||
for expansion in &analyzed.expansions {
|
||||
if !expansion.is_empty() {
|
||||
queries.push(expansion.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// Deduplicate
|
||||
queries.sort();
|
||||
queries.dedup();
|
||||
|
||||
queries
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for QueryAnalyzer {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if character is CJK
|
||||
fn is_cjk(c: char) -> bool {
|
||||
matches!(c,
|
||||
'\u{4E00}'..='\u{9FFF}' | // CJK Unified Ideographs
|
||||
'\u{3400}'..='\u{4DBF}' | // CJK Unified Ideographs Extension A
|
||||
'\u{20000}'..='\u{2A6DF}' | // CJK Unified Ideographs Extension B
|
||||
'\u{2A700}'..='\u{2B73F}' | // CJK Unified Ideographs Extension C
|
||||
'\u{2B740}'..='\u{2B81F}' | // CJK Unified Ideographs Extension D
|
||||
'\u{2B820}'..='\u{2CEAF}' | // CJK Unified Ideographs Extension E
|
||||
'\u{F900}'..='\u{FAFF}' | // CJK Compatibility Ideographs
|
||||
'\u{2F800}'..='\u{2FA1F}' // CJK Compatibility Ideographs Supplement
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_extract_keywords() {
|
||||
let analyzer = QueryAnalyzer::new();
|
||||
let keywords = analyzer.extract_keywords("What is the Rust programming language?");
|
||||
|
||||
assert!(keywords.contains(&"rust".to_string()));
|
||||
assert!(keywords.contains(&"programming".to_string()));
|
||||
assert!(keywords.contains(&"language".to_string()));
|
||||
assert!(!keywords.contains(&"the".to_string())); // stop word
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_classify_intent_preference() {
|
||||
let analyzer = QueryAnalyzer::new();
|
||||
let analyzed = analyzer.analyze("I prefer concise responses");
|
||||
|
||||
assert_eq!(analyzed.intent, QueryIntent::Preference);
|
||||
assert!(analyzed.target_types.contains(&MemoryType::Preference));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_classify_intent_knowledge() {
|
||||
let analyzer = QueryAnalyzer::new();
|
||||
let analyzed = analyzer.analyze("Explain how async/await works in Rust");
|
||||
|
||||
assert_eq!(analyzed.intent, QueryIntent::Knowledge);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_classify_intent_code() {
|
||||
let analyzer = QueryAnalyzer::new();
|
||||
let analyzed = analyzer.analyze("Fix this error in my function");
|
||||
|
||||
assert_eq!(analyzed.intent, QueryIntent::Code);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_query_expansion() {
|
||||
let analyzer = QueryAnalyzer::new();
|
||||
let analyzed = analyzer.analyze("fix the error");
|
||||
|
||||
assert!(!analyzed.expansions.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_generate_search_queries() {
|
||||
let analyzer = QueryAnalyzer::new();
|
||||
let analyzed = analyzer.analyze("Rust programming");
|
||||
let queries = analyzer.generate_search_queries(&analyzed);
|
||||
|
||||
assert!(queries.len() >= 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_cjk_detection() {
|
||||
assert!(is_cjk('中'));
|
||||
assert!(is_cjk('文'));
|
||||
assert!(!is_cjk('a'));
|
||||
assert!(!is_cjk('1'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chinese_keywords() {
|
||||
let analyzer = QueryAnalyzer::new();
|
||||
let keywords = analyzer.extract_keywords("我喜欢简洁的回复");
|
||||
|
||||
// Chinese characters should be extracted
|
||||
assert!(!keywords.is_empty());
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user