feat(knowledge): Phase A 知识库可见性隔离 + 结构化数据源 + 蒸馏Worker
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

- knowledge_items 增加 visibility(public/private) + account_id 字段
- 新建 structured_sources + structured_rows 表 (Excel JSONB 行级存储)
- 结构化数据源 CRUD API (5 路由: list/get/rows/delete/query)
- 安全查询: JSONB GIN 索引 + 可见性过滤 + 行数限制
- 蒸馏 Worker: 复用 Provider Key Pool 调 DeepSeek/Qwen API
- L0 质量过滤: 长度/隐私检测
- create_item 增加 is_admin 参数控制可见性默认值
- generate_embedding: extract_keywords_from_text 改为 pub 复用

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
iven
2026-04-12 18:36:05 +08:00
parent b8fb76375c
commit c3593d3438
10 changed files with 846 additions and 20 deletions

View File

@@ -190,7 +190,8 @@ pub async fn create_item(
return Err(SaasError::InvalidInput("内容不能超过 100KB".into()));
}
let item = service::create_item(&state.db, &ctx.account_id, &req).await?;
let is_admin = ctx.role == "admin" || ctx.role == "super_admin";
let item = service::create_item(&state.db, &ctx.account_id, &req, is_admin).await?;
// 异步触发 embedding 生成
if let Err(e) = state.worker_dispatcher.dispatch(
@@ -219,6 +220,7 @@ pub async fn batch_create_items(
return Err(SaasError::InvalidInput("单次批量创建不能超过 50 条".into()));
}
let is_admin = ctx.role == "admin" || ctx.role == "super_admin";
let mut created = Vec::new();
for req in &items {
if req.title.trim().is_empty() || req.content.trim().is_empty() {
@@ -229,7 +231,7 @@ pub async fn batch_create_items(
tracing::warn!("Batch create: skipping item '{}' (content too long)", req.title);
continue;
}
match service::create_item(&state.db, &ctx.account_id, req).await {
match service::create_item(&state.db, &ctx.account_id, req, is_admin).await {
Ok(item) => {
if let Err(e) = state.worker_dispatcher.dispatch(
"generate_embedding",
@@ -534,6 +536,7 @@ pub async fn import_items(
return Err(SaasError::InvalidInput("单次导入不能超过 20 个文件".into()));
}
let is_admin = ctx.role == "admin" || ctx.role == "super_admin";
let mut created = Vec::new();
for file in &req.files {
// 内容长度检查(数据库限制 100KB
@@ -561,9 +564,10 @@ pub async fn import_items(
related_questions: None,
priority: None,
tags: file.tags.clone(),
visibility: None,
};
match service::create_item(&state.db, &ctx.account_id, &item_req).await {
match service::create_item(&state.db, &ctx.account_id, &item_req, is_admin).await {
Ok(item) => {
if let Err(e) = state.worker_dispatcher.dispatch(
"generate_embedding",
@@ -590,3 +594,94 @@ pub async fn import_items(
fn check_permission(ctx: &AuthContext, permission: &str) -> SaasResult<()> {
crate::auth::handlers::check_permission(ctx, permission)
}
fn is_admin(ctx: &AuthContext) -> bool {
ctx.role == "admin" || ctx.role == "super_admin"
}
// === 结构化数据源管理 ===
/// GET /api/v1/structured/sources
pub async fn list_structured_sources(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Query(query): Query<ListStructuredSourcesQuery>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let page = query.page.unwrap_or(1).max(1);
let page_size = query.page_size.unwrap_or(20).max(1).min(100);
let (sources, total) = service::list_structured_sources(
&state.db,
Some(&ctx.account_id),
query.industry_id.as_deref(),
query.status.as_deref(),
page,
page_size,
).await?;
Ok(Json(serde_json::json!({
"items": sources,
"total": total,
"page": page,
"page_size": page_size,
})))
}
/// GET /api/v1/structured/sources/:id
pub async fn get_structured_source(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let source = service::get_structured_source(&state.db, &id, Some(&ctx.account_id)).await?
.ok_or_else(|| SaasError::NotFound("数据源不存在".into()))?;
Ok(Json(serde_json::to_value(source).unwrap_or_default()))
}
/// GET /api/v1/structured/sources/:id/rows
pub async fn list_structured_source_rows(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
Query(query): Query<ListStructuredRowsQuery>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let page = query.page.unwrap_or(1).max(1);
let page_size = query.page_size.unwrap_or(50).max(1).min(200);
let (rows, total) = service::list_structured_rows(
&state.db, &id, Some(&ctx.account_id),
query.sheet_name.as_deref(), page, page_size,
).await?;
Ok(Json(serde_json::json!({
"rows": rows,
"total": total,
"page": page,
"page_size": page_size,
})))
}
/// DELETE /api/v1/structured/sources/:id
pub async fn delete_structured_source(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:admin")?;
service::delete_structured_source(&state.db, &id).await?;
Ok(Json(serde_json::json!({"deleted": true})))
}
/// POST /api/v1/structured/query
pub async fn query_structured(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(req): Json<StructuredQueryRequest>,
) -> SaasResult<Json<Vec<StructuredQueryResult>>> {
check_permission(&ctx, "knowledge:search")?;
let results = service::query_structured(&state.db, &req, Some(&ctx.account_id)).await?;
Ok(Json(results))
}