//! 共享 JSON 工具函数 //! 从 LLM 返回的文本中提取 JSON 块 /// 从 LLM 返回文本中提取 JSON 块 /// 支持三种格式:```json...``` 围栏、```...``` 围栏、裸 {...} /// 使用括号平衡算法找到第一个完整 JSON 块,避免误匹配 pub fn extract_json_block(text: &str) -> &str { // 尝试匹配 ```json ... ``` if let Some(start) = text.find("```json") { let json_start = start + 7; if let Some(end) = text[json_start..].find("```") { return text[json_start..json_start + end].trim(); } } // 尝试匹配 ``` ... ``` if let Some(start) = text.find("```") { let json_start = start + 3; if let Some(end) = text[json_start..].find("```") { return text[json_start..json_start + end].trim(); } } // 用括号平衡算法找第一个完整 {...} 块 if let Some(slice) = find_balanced_json(text) { return slice; } text.trim() } /// 使用括号平衡计数找到第一个完整的 {...} JSON 块 /// 正确处理字符串字面量中的花括号 fn find_balanced_json(text: &str) -> Option<&str> { let start = text.find('{')?; let mut depth = 0i32; let mut in_string = false; let mut escape_next = false; for (i, c) in text[start..].char_indices() { if escape_next { escape_next = false; continue; } match c { '\\' if in_string => escape_next = true, '"' => in_string = !in_string, '{' if !in_string => { depth += 1; } '}' if !in_string => { depth -= 1; if depth == 0 { return Some(&text[start..=start + i]); } } _ => {} } } None } /// 从 serde_json::Value 中提取字符串数组 /// 用于解析 LLM 返回 JSON 中的 triggers/tools 等字段 pub fn extract_string_array(raw: &serde_json::Value, key: &str) -> Vec { raw.get(key) .and_then(|v| v.as_array()) .map(|a| { a.iter() .filter_map(|v| v.as_str().map(String::from)) .collect() }) .unwrap_or_default() } #[cfg(test)] mod tests { use super::*; #[test] fn test_json_block_with_markdown() { let text = "Here is the result:\n```json\n{\"key\": \"value\"}\n```\nDone."; assert_eq!(extract_json_block(text), "{\"key\": \"value\"}"); } #[test] fn test_json_block_bare() { let text = "{\"key\": \"value\"}"; assert_eq!(extract_json_block(text), "{\"key\": \"value\"}"); } #[test] fn test_json_block_plain_fences() { let text = "Result:\n```\n{\"a\": 1}\n```"; assert_eq!(extract_json_block(text), "{\"a\": 1}"); } #[test] fn test_json_block_nested_braces() { let text = r#"{"outer": {"inner": "val"}}"#; assert_eq!(extract_json_block(text), r#"{"outer": {"inner": "val"}}"#); } #[test] fn test_json_block_no_json() { let text = "no json here"; assert_eq!(extract_json_block(text), "no json here"); } #[test] fn test_balanced_json_skips_outer_text() { // 第一个 { 到最后一个 } 会包含多余文本,但平衡算法只取第一个完整块 let text = "prefix {\"a\": 1} suffix {\"b\": 2}"; assert_eq!(extract_json_block(text), "{\"a\": 1}"); } #[test] fn test_balanced_json_handles_braces_in_strings() { let text = r#"{"body": "function() { return x; }", "name": "test"}"#; assert_eq!( extract_json_block(text), r#"{"body": "function() { return x; }", "name": "test"}"# ); } #[test] fn test_balanced_json_handles_escaped_quotes() { let text = r#"{"msg": "He said \"hello {world}\""}"#; assert_eq!( extract_json_block(text), r#"{"msg": "He said \"hello {world}\""}"# ); } #[test] fn test_extract_string_array() { let raw: serde_json::Value = serde_json::from_str( r#"{"triggers": ["报表", "日报"], "name": "test"}"#, ) .unwrap(); let arr = extract_string_array(&raw, "triggers"); assert_eq!(arr, vec!["报表", "日报"]); } #[test] fn test_extract_string_array_missing_key() { let raw: serde_json::Value = serde_json::from_str(r#"{"name": "test"}"#).unwrap(); let arr = extract_string_array(&raw, "triggers"); assert!(arr.is_empty()); } }