feat(ai): 知识库 V2 菜单迁移 + 文本切片器 + 前端路由权限

- 新增迁移 000168:在 AI 知识库同级添加「知识库 V2」菜单,绑定 admin 角色
- 新增 document/chunker.rs:固定大小 + overlap 文本切片器(5 单元测试)
- 前端 routeConfig 添加 /health/ai-knowledge-v2 权限声明
- App.tsx validateRouteCoverage 补充 v2 路径
This commit is contained in:
iven
2026-05-27 00:49:27 +08:00
parent 2324d770bc
commit b0323ec89c
6 changed files with 157 additions and 1 deletions

View File

@@ -0,0 +1,87 @@
/// 文本切片:按固定大小 + 重叠切分
pub fn chunk_text(text: &str, chunk_size: usize, overlap: usize) -> Vec<String> {
if text.is_empty() {
return vec![];
}
let chars: Vec<char> = text.chars().collect();
let total = chars.len();
if total <= chunk_size {
return vec![text.to_string()];
}
let mut chunks = Vec::new();
let mut start = 0;
while start < total {
let end = (start + chunk_size).min(total);
let chunk: String = chars[start..end].iter().collect();
let trimmed = chunk.trim().to_string();
if !trimmed.is_empty() {
chunks.push(trimmed);
}
if end >= total {
break;
}
start += chunk_size.saturating_sub(overlap);
// 防止无限循环
if start <= end - chunk_size && start > 0 {
start = end;
}
}
chunks
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_chunk_empty() {
assert_eq!(chunk_text("", 100, 20), Vec::<String>::new());
}
#[test]
fn test_chunk_small_text() {
let text = "hello world";
let chunks = chunk_text(text, 100, 20);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0], "hello world");
}
#[test]
fn test_chunk_long_text() {
let text = "abcdefghij".repeat(100); // 1000 chars
let chunks = chunk_text(&text, 200, 50);
assert!(chunks.len() > 1);
// First chunk should be 200 chars
assert_eq!(chars_count(&chunks[0]), 200);
}
#[test]
fn test_chunk_with_overlap() {
let text = "abcdefghijklmnopqrstuvwxyz".repeat(20); // 520 chars
let chunks = chunk_text(&text, 100, 20);
assert!(chunks.len() > 1);
}
#[test]
fn test_chunk_chinese() {
let text = "你好世界这是一段中文测试文本。".repeat(30);
let chunks = chunk_text(&text, 100, 20);
assert!(chunks.len() > 1);
// 确保中文不被截断
for chunk in &chunks {
assert!(!chunk.is_empty());
}
}
fn chars_count(s: &str) -> usize {
s.chars().count()
}
}

View File

@@ -7,6 +7,7 @@ pub mod chat_session;
pub mod comparison;
pub mod cost;
pub mod dialysis_risk_scorer;
pub mod document;
pub mod embedding;
pub mod feature_flag_service;
pub mod insight_service;