Files
zclaw_openfang/crates/zclaw-pipeline/src/presentation/analyzer.rs
iven 0d4fa96b82
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
refactor: 统一项目名称从OpenFang到ZCLAW
重构所有代码和文档中的项目名称,将OpenFang统一更新为ZCLAW。包括:
- 配置文件中的项目名称
- 代码注释和文档引用
- 环境变量和路径
- 类型定义和接口名称
- 测试用例和模拟数据

同时优化部分代码结构,移除未使用的模块,并更新相关依赖项。
2026-03-27 07:36:03 +08:00

568 lines
17 KiB
Rust

//! Presentation Analyzer
//!
//! Analyzes pipeline output data and recommends the best presentation type.
//!
//! # Strategy
//!
//! 1. **Structure Detection** (Fast Path, < 5ms):
//! - Check for known data patterns (slides, questions, chart data)
//! - Use simple heuristics for common cases
//!
//! 2. **LLM Analysis** (Optional, ~300ms):
//! - Semantic understanding of data content
//! - Better recommendations for ambiguous cases
use serde_json::Value;
use super::types::*;
/// Presentation analyzer
pub struct PresentationAnalyzer {
/// Detection rules
rules: Vec<DetectionRule>,
}
/// Detection rule for a presentation type
struct DetectionRule {
/// Target presentation type
type_: PresentationType,
/// Detection function
detector: fn(&Value) -> DetectionResult,
/// Priority (higher = checked first)
priority: u32,
}
/// Result of a detection rule
struct DetectionResult {
/// Confidence score (0.0 - 1.0)
confidence: f32,
/// Reason for detection
reason: String,
/// Detected sub-type (e.g., "bar" for Chart)
sub_type: Option<String>,
}
impl PresentationAnalyzer {
/// Create a new analyzer with default rules
pub fn new() -> Self {
let rules = vec![
// Quiz detection (high priority)
DetectionRule {
type_: PresentationType::Quiz,
detector: detect_quiz,
priority: 100,
},
// Chart detection
DetectionRule {
type_: PresentationType::Chart,
detector: detect_chart,
priority: 90,
},
// Slideshow detection
DetectionRule {
type_: PresentationType::Slideshow,
detector: detect_slideshow,
priority: 80,
},
// Whiteboard detection
DetectionRule {
type_: PresentationType::Whiteboard,
detector: detect_whiteboard,
priority: 70,
},
// Document detection (fallback, lowest priority)
DetectionRule {
type_: PresentationType::Document,
detector: detect_document,
priority: 10,
},
];
Self { rules }
}
/// Analyze data and recommend presentation type
pub fn analyze(&self, data: &Value) -> PresentationAnalysis {
// Sort rules by priority (descending)
let mut sorted_rules: Vec<_> = self.rules.iter().collect();
sorted_rules.sort_by(|a, b| b.priority.cmp(&a.priority));
let mut results: Vec<(PresentationType, DetectionResult)> = Vec::new();
// Apply each detection rule
for rule in sorted_rules {
let result = (rule.detector)(data);
if result.confidence > 0.0 {
results.push((rule.type_, result));
}
}
// Sort by confidence
results.sort_by(|a, b| {
b.1.confidence.partial_cmp(&a.1.confidence).unwrap_or(std::cmp::Ordering::Equal)
});
if results.is_empty() {
// Fallback to document
return PresentationAnalysis {
recommended_type: PresentationType::Document,
confidence: 0.5,
reason: "无法识别数据结构,使用默认文档展示".to_string(),
alternatives: vec![],
structure_hints: vec!["未检测到特定结构".to_string()],
sub_type: None,
};
}
// Build analysis result
let (primary_type, primary_result) = &results[0];
let alternatives: Vec<AlternativeType> = results[1..]
.iter()
.filter(|(_, r)| r.confidence > 0.3)
.map(|(t, r)| AlternativeType {
type_: *t,
confidence: r.confidence,
reason: r.reason.clone(),
})
.collect();
// Collect structure hints
let structure_hints = collect_structure_hints(data);
PresentationAnalysis {
recommended_type: *primary_type,
confidence: primary_result.confidence,
reason: primary_result.reason.clone(),
alternatives,
structure_hints,
sub_type: primary_result.sub_type.clone(),
}
}
/// Quick check if data matches a specific type
pub fn can_render_as(&self, data: &Value, type_: PresentationType) -> bool {
for rule in &self.rules {
if rule.type_ == type_ {
let result = (rule.detector)(data);
return result.confidence > 0.5;
}
}
false
}
}
impl Default for PresentationAnalyzer {
fn default() -> Self {
Self::new()
}
}
// === Detection Functions ===
/// Detect if data is a quiz
fn detect_quiz(data: &Value) -> DetectionResult {
let obj = match data.as_object() {
Some(o) => o,
None => return DetectionResult {
confidence: 0.0,
reason: String::new(),
sub_type: None,
},
};
// Check for quiz structure
if let Some(questions) = obj.get("questions").and_then(|q| q.as_array()) {
if !questions.is_empty() {
// Check if questions have options (choice questions)
let has_options = questions.iter().any(|q| {
q.get("options").and_then(|o| o.as_array()).map(|o| !o.is_empty()).unwrap_or(false)
});
if has_options {
return DetectionResult {
confidence: 0.95,
reason: "检测到问题数组,且包含选项".to_string(),
sub_type: Some("choice".to_string()),
};
}
return DetectionResult {
confidence: 0.85,
reason: "检测到问题数组".to_string(),
sub_type: None,
};
}
}
// Check for quiz field
if let Some(quiz) = obj.get("quiz") {
if quiz.get("questions").is_some() {
return DetectionResult {
confidence: 0.95,
reason: "包含 quiz 字段和 questions".to_string(),
sub_type: None,
};
}
}
// Check for common quiz field patterns
let quiz_fields = ["questions", "answers", "score", "quiz", "exam"];
let matches: Vec<_> = quiz_fields.iter()
.filter(|f| obj.contains_key(*f as &str))
.collect();
if matches.len() >= 2 {
return DetectionResult {
confidence: 0.6,
reason: format!("包含测验相关字段: {:?}", matches),
sub_type: None,
};
}
DetectionResult {
confidence: 0.0,
reason: String::new(),
sub_type: None,
}
}
/// Detect if data is a chart
fn detect_chart(data: &Value) -> DetectionResult {
let obj = match data.as_object() {
Some(o) => o,
None => return DetectionResult {
confidence: 0.0,
reason: String::new(),
sub_type: None,
},
};
// Check for explicit chart field
if obj.contains_key("chart") || obj.contains_key("chartType") {
let chart_type = obj.get("chartType")
.and_then(|v| v.as_str())
.unwrap_or("bar");
return DetectionResult {
confidence: 0.95,
reason: "包含 chart/chartType 字段".to_string(),
sub_type: Some(chart_type.to_string()),
};
}
// Check for x/y axis
if obj.contains_key("xAxis") || obj.contains_key("yAxis") {
return DetectionResult {
confidence: 0.9,
reason: "包含坐标轴定义".to_string(),
sub_type: Some("line".to_string()),
};
}
// Check for labels + series pattern
if let Some(labels) = obj.get("labels").and_then(|l| l.as_array()) {
if let Some(series) = obj.get("series").and_then(|s| s.as_array()) {
if !labels.is_empty() && !series.is_empty() {
// Determine chart type
let chart_type = if series.len() > 3 {
"line"
} else {
"bar"
};
return DetectionResult {
confidence: 0.9,
reason: format!("包含 labels({}) 和 series({})", labels.len(), series.len()),
sub_type: Some(chart_type.to_string()),
};
}
}
}
// Check for data array with numeric values
if let Some(data_arr) = obj.get("data").and_then(|d| d.as_array()) {
let numeric_count = data_arr.iter()
.filter(|v| v.is_number())
.count();
if numeric_count > data_arr.len() / 2 {
return DetectionResult {
confidence: 0.7,
reason: format!("data 数组包含 {} 个数值", numeric_count),
sub_type: Some("bar".to_string()),
};
}
}
// Check for multiple data series
let data_keys: Vec<_> = obj.keys()
.filter(|k| k.starts_with("data") || k.ends_with("_data"))
.collect();
if data_keys.len() >= 2 {
return DetectionResult {
confidence: 0.6,
reason: format!("包含多个数据系列: {:?}", data_keys),
sub_type: Some("line".to_string()),
};
}
DetectionResult {
confidence: 0.0,
reason: String::new(),
sub_type: None,
}
}
/// Detect if data is a slideshow
fn detect_slideshow(data: &Value) -> DetectionResult {
let obj = match data.as_object() {
Some(o) => o,
None => return DetectionResult {
confidence: 0.0,
reason: String::new(),
sub_type: None,
},
};
// Check for slides array
if let Some(slides) = obj.get("slides").and_then(|s| s.as_array()) {
if !slides.is_empty() {
return DetectionResult {
confidence: 0.95,
reason: format!("包含 {} 张幻灯片", slides.len()),
sub_type: None,
};
}
}
// Check for sections array with title/content structure
if let Some(sections) = obj.get("sections").and_then(|s| s.as_array()) {
let has_slides_structure = sections.iter().all(|s| {
s.get("title").is_some() && s.get("content").is_some()
});
if has_slides_structure && !sections.is_empty() {
return DetectionResult {
confidence: 0.85,
reason: format!("sections 数组包含 {} 个幻灯片结构", sections.len()),
sub_type: None,
};
}
}
// Check for scenes array (classroom style)
if let Some(scenes) = obj.get("scenes").and_then(|s| s.as_array()) {
if !scenes.is_empty() {
return DetectionResult {
confidence: 0.85,
reason: format!("包含 {} 个场景", scenes.len()),
sub_type: Some("classroom".to_string()),
};
}
}
// Check for presentation-like fields
let pres_fields = ["slides", "sections", "scenes", "outline", "chapters"];
let matches: Vec<_> = pres_fields.iter()
.filter(|f| obj.contains_key(*f as &str))
.collect();
if matches.len() >= 2 {
return DetectionResult {
confidence: 0.7,
reason: format!("包含演示文稿字段: {:?}", matches),
sub_type: None,
};
}
DetectionResult {
confidence: 0.0,
reason: String::new(),
sub_type: None,
}
}
/// Detect if data is a whiteboard
fn detect_whiteboard(data: &Value) -> DetectionResult {
let obj = match data.as_object() {
Some(o) => o,
None => return DetectionResult {
confidence: 0.0,
reason: String::new(),
sub_type: None,
},
};
// Check for canvas/elements
if obj.contains_key("canvas") || obj.contains_key("elements") {
return DetectionResult {
confidence: 0.9,
reason: "包含 canvas/elements 字段".to_string(),
sub_type: None,
};
}
// Check for strokes (drawing data)
if obj.contains_key("strokes") {
return DetectionResult {
confidence: 0.95,
reason: "包含 strokes 绘图数据".to_string(),
sub_type: None,
};
}
DetectionResult {
confidence: 0.0,
reason: String::new(),
sub_type: None,
}
}
/// Detect if data is a document (always returns some confidence as fallback)
fn detect_document(data: &Value) -> DetectionResult {
let obj = match data.as_object() {
Some(o) => o,
None => return DetectionResult {
confidence: 0.5,
reason: "非对象数据,使用文档展示".to_string(),
sub_type: None,
},
};
// Check for markdown/text content
if obj.contains_key("markdown") || obj.contains_key("content") {
return DetectionResult {
confidence: 0.8,
reason: "包含 markdown/content 字段".to_string(),
sub_type: Some("markdown".to_string()),
};
}
// Check for summary/report structure
if obj.contains_key("summary") || obj.contains_key("report") {
return DetectionResult {
confidence: 0.7,
reason: "包含 summary/report 字段".to_string(),
sub_type: None,
};
}
// Default document
DetectionResult {
confidence: 0.5,
reason: "默认文档展示".to_string(),
sub_type: None,
}
}
/// Collect structure hints from data
fn collect_structure_hints(data: &Value) -> Vec<String> {
let mut hints = Vec::new();
if let Some(obj) = data.as_object() {
// Check array fields
for (key, value) in obj {
if let Some(arr) = value.as_array() {
hints.push(format!("{}: {}", key, arr.len()));
}
}
// Check for common patterns
if obj.contains_key("title") {
hints.push("包含标题".to_string());
}
if obj.contains_key("description") {
hints.push("包含描述".to_string());
}
if obj.contains_key("metadata") {
hints.push("包含元数据".to_string());
}
}
hints
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
#[test]
fn test_analyze_quiz() {
let analyzer = PresentationAnalyzer::new();
let data = json!({
"title": "Python 测验",
"questions": [
{
"id": "q1",
"text": "Python 是什么?",
"options": [
{"id": "a", "text": "编译型语言"},
{"id": "b", "text": "解释型语言"}
]
}
]
});
let result = analyzer.analyze(&data);
assert_eq!(result.recommended_type, PresentationType::Quiz);
assert!(result.confidence > 0.8);
}
#[test]
fn test_analyze_chart() {
let analyzer = PresentationAnalyzer::new();
let data = json!({
"chartType": "bar",
"title": "销售数据",
"labels": ["一月", "二月", "三月"],
"series": [
{"name": "销售额", "data": [100, 150, 200]}
]
});
let result = analyzer.analyze(&data);
assert_eq!(result.recommended_type, PresentationType::Chart);
assert_eq!(result.sub_type, Some("bar".to_string()));
}
#[test]
fn test_analyze_slideshow() {
let analyzer = PresentationAnalyzer::new();
let data = json!({
"title": "课程大纲",
"slides": [
{"title": "第一章", "content": "..."},
{"title": "第二章", "content": "..."}
]
});
let result = analyzer.analyze(&data);
assert_eq!(result.recommended_type, PresentationType::Slideshow);
}
#[test]
fn test_analyze_document_fallback() {
let analyzer = PresentationAnalyzer::new();
let data = json!({
"title": "报告",
"content": "这是一段文本内容..."
});
let result = analyzer.analyze(&data);
assert_eq!(result.recommended_type, PresentationType::Document);
}
#[test]
fn test_can_render_as() {
let analyzer = PresentationAnalyzer::new();
let quiz_data = json!({
"questions": [{"id": "q1", "text": "问题"}]
});
assert!(analyzer.can_render_as(&quiz_data, PresentationType::Quiz));
assert!(!analyzer.can_render_as(&quiz_data, PresentationType::Chart));
}
}