Files
zclaw_openfang/crates/zclaw-growth/src/json_utils.rs
iven cb727fdcc7
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
fix(growth): 二次审计修复 — 6项 CRITICAL/HIGH/MEDIUM 全部修复
CRITICAL-1/2: json_utils 花括号匹配改为括号平衡算法
  - 处理字符串字面量中的花括号和转义引号
  - 新增 5 个测试(平衡匹配、字符串内花括号、转义引号、extract_string_array)

HIGH-4: EvolutionMiddleware 只取第一个事件(remove(0)),不丢弃后续
HIGH-5: EvolutionMiddleware 先 read() 判空再 write(),减少锁竞争
HIGH-7: from_experience_store 使用传入 store 的 viking 实例(不再忽略参数)
  - ExperienceStore 新增 viking() getter

MEDIUM-9: skill_generator + workflow_composer JSON 数组解析去重
  - 新增 json_utils::extract_string_array() 共享函数
MEDIUM-14: EvolutionMiddleware 注入文本去除多余缩进空格

测试: zclaw-growth 133 tests, zclaw-runtime 87 tests, workspace 0 failures
2026-04-18 22:30:10 +08:00

149 lines
4.5 KiB
Rust

//! 共享 JSON 工具函数
//! 从 LLM 返回的文本中提取 JSON 块
/// 从 LLM 返回文本中提取 JSON 块
/// 支持三种格式:```json...``` 围栏、```...``` 围栏、裸 {...}
/// 使用括号平衡算法找到第一个完整 JSON 块,避免误匹配
pub fn extract_json_block(text: &str) -> &str {
// 尝试匹配 ```json ... ```
if let Some(start) = text.find("```json") {
let json_start = start + 7;
if let Some(end) = text[json_start..].find("```") {
return text[json_start..json_start + end].trim();
}
}
// 尝试匹配 ``` ... ```
if let Some(start) = text.find("```") {
let json_start = start + 3;
if let Some(end) = text[json_start..].find("```") {
return text[json_start..json_start + end].trim();
}
}
// 用括号平衡算法找第一个完整 {...} 块
if let Some(slice) = find_balanced_json(text) {
return slice;
}
text.trim()
}
/// 使用括号平衡计数找到第一个完整的 {...} JSON 块
/// 正确处理字符串字面量中的花括号
fn find_balanced_json(text: &str) -> Option<&str> {
let start = text.find('{')?;
let mut depth = 0i32;
let mut in_string = false;
let mut escape_next = false;
for (i, c) in text[start..].char_indices() {
if escape_next {
escape_next = false;
continue;
}
match c {
'\\' if in_string => escape_next = true,
'"' => in_string = !in_string,
'{' if !in_string => {
depth += 1;
}
'}' if !in_string => {
depth -= 1;
if depth == 0 {
return Some(&text[start..=start + i]);
}
}
_ => {}
}
}
None
}
/// 从 serde_json::Value 中提取字符串数组
/// 用于解析 LLM 返回 JSON 中的 triggers/tools 等字段
pub fn extract_string_array(raw: &serde_json::Value, key: &str) -> Vec<String> {
raw.get(key)
.and_then(|v| v.as_array())
.map(|a| {
a.iter()
.filter_map(|v| v.as_str().map(String::from))
.collect()
})
.unwrap_or_default()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_json_block_with_markdown() {
let text = "Here is the result:\n```json\n{\"key\": \"value\"}\n```\nDone.";
assert_eq!(extract_json_block(text), "{\"key\": \"value\"}");
}
#[test]
fn test_json_block_bare() {
let text = "{\"key\": \"value\"}";
assert_eq!(extract_json_block(text), "{\"key\": \"value\"}");
}
#[test]
fn test_json_block_plain_fences() {
let text = "Result:\n```\n{\"a\": 1}\n```";
assert_eq!(extract_json_block(text), "{\"a\": 1}");
}
#[test]
fn test_json_block_nested_braces() {
let text = r#"{"outer": {"inner": "val"}}"#;
assert_eq!(extract_json_block(text), r#"{"outer": {"inner": "val"}}"#);
}
#[test]
fn test_json_block_no_json() {
let text = "no json here";
assert_eq!(extract_json_block(text), "no json here");
}
#[test]
fn test_balanced_json_skips_outer_text() {
// 第一个 { 到最后一个 } 会包含多余文本,但平衡算法只取第一个完整块
let text = "prefix {\"a\": 1} suffix {\"b\": 2}";
assert_eq!(extract_json_block(text), "{\"a\": 1}");
}
#[test]
fn test_balanced_json_handles_braces_in_strings() {
let text = r#"{"body": "function() { return x; }", "name": "test"}"#;
assert_eq!(
extract_json_block(text),
r#"{"body": "function() { return x; }", "name": "test"}"#
);
}
#[test]
fn test_balanced_json_handles_escaped_quotes() {
let text = r#"{"msg": "He said \"hello {world}\""}"#;
assert_eq!(
extract_json_block(text),
r#"{"msg": "He said \"hello {world}\""}"#
);
}
#[test]
fn test_extract_string_array() {
let raw: serde_json::Value = serde_json::from_str(
r#"{"triggers": ["报表", "日报"], "name": "test"}"#,
)
.unwrap();
let arr = extract_string_array(&raw, "triggers");
assert_eq!(arr, vec!["报表", "日报"]);
}
#[test]
fn test_extract_string_array_missing_key() {
let raw: serde_json::Value = serde_json::from_str(r#"{"name": "test"}"#).unwrap();
let arr = extract_string_array(&raw, "triggers");
assert!(arr.is_empty());
}
}