Files
zclaw_openfang/crates/zclaw-saas/src/knowledge/handlers.rs
iven 640df9937f
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
feat(knowledge): Phase D 统一搜索 + 种子知识冷启动
- search/recommend API 返回 UnifiedSearchResult (文档+结构化双通道)
- POST /api/v1/knowledge/seed 种子知识冷启动 (幂等, admin权限)
- seed_knowledge service: 按标题+行业查重, source=distillation
- SearchRequest 扩展: search_structured/search_documents/industry_id
2026-04-12 20:46:43 +08:00

915 lines
30 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! 知识库 HTTP 处理器
use axum::{
extract::{Extension, Multipart, Path, Query, State},
Json,
};
use crate::auth::types::AuthContext;
use crate::error::{SaasError, SaasResult};
use crate::state::AppState;
use super::service;
use super::types::*;
use super::extractors;
// === 分类管理 ===
/// GET /api/v1/knowledge/categories — 树形分类列表
pub async fn list_categories(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
) -> SaasResult<Json<Vec<CategoryResponse>>> {
check_permission(&ctx, "knowledge:read")?;
let tree = service::list_categories_tree(&state.db).await?;
Ok(Json(tree))
}
/// POST /api/v1/knowledge/categories — 创建分类
pub async fn create_category(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(req): Json<CreateCategoryRequest>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:write")?;
if req.name.trim().is_empty() {
return Err(SaasError::InvalidInput("分类名称不能为空".into()));
}
let cat = service::create_category(
&state.db,
req.name.trim(),
req.description.as_deref(),
req.parent_id.as_deref(),
req.icon.as_deref(),
).await?;
Ok(Json(serde_json::json!({
"id": cat.id,
"name": cat.name,
})))
}
/// PUT /api/v1/knowledge/categories/:id — 更新分类
pub async fn update_category(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
Json(req): Json<UpdateCategoryRequest>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:write")?;
if let Some(ref name) = req.name {
if name.trim().is_empty() {
return Err(SaasError::InvalidInput("分类名称不能为空".into()));
}
}
let cat = service::update_category(
&state.db,
&id,
req.name.as_deref().map(|n| n.trim()),
req.description.as_deref(),
req.parent_id.as_deref(),
req.icon.as_deref(),
).await?;
Ok(Json(serde_json::json!({
"id": cat.id,
"name": cat.name,
"updated": true,
})))
}
/// DELETE /api/v1/knowledge/categories/:id — 删除分类
pub async fn delete_category(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:admin")?;
service::delete_category(&state.db, &id).await?;
Ok(Json(serde_json::json!({"deleted": true})))
}
/// GET /api/v1/knowledge/categories/:id/items — 分类下条目列表
pub async fn list_category_items(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
Query(query): Query<ListItemsQuery>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let page = query.page.unwrap_or(1).max(1);
let page_size = query.page_size.unwrap_or(20).max(1).min(100);
let status_filter = query.status.as_deref().unwrap_or("active");
let (items, total) = service::list_items_by_category(
&state.db,
&id,
status_filter,
page,
page_size,
).await?;
Ok(Json(serde_json::json!({
"items": items,
"total": total,
"page": page,
"page_size": page_size,
})))
}
// === 知识条目 CRUD ===
/// GET /api/v1/knowledge/items — 分页列表
pub async fn list_items(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Query(query): Query<ListItemsQuery>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let page = query.page.unwrap_or(1).max(1).min(10000);
let page_size = query.page_size.unwrap_or(20).max(1).min(100);
let offset = (page - 1) * page_size;
// 转义 ILIKE 通配符,防止用户输入的 % 和 _ 被当作通配符
let keyword = query.keyword.as_ref().map(|k| {
k.replace('\\', "\\\\").replace('%', "\\%").replace('_', "\\_")
});
let items: Vec<KnowledgeItem> = sqlx::query_as(
"SELECT ki.* FROM knowledge_items ki \
JOIN knowledge_categories kc ON ki.category_id = kc.id \
WHERE ($1::text IS NULL OR ki.category_id = $1) \
AND ($2::text IS NULL OR ki.status = $2) \
AND ($3::text IS NULL OR ki.title ILIKE '%' || $3 || '%') \
ORDER BY ki.priority DESC, ki.updated_at DESC \
LIMIT $4 OFFSET $5"
)
.bind(&query.category_id)
.bind(&query.status)
.bind(&keyword)
.bind(page_size)
.bind(offset)
.fetch_all(&state.db)
.await?;
let total: (i64,) = sqlx::query_as(
"SELECT COUNT(*) FROM knowledge_items ki \
WHERE ($1::text IS NULL OR ki.category_id = $1) \
AND ($2::text IS NULL OR ki.status = $2) \
AND ($3::text IS NULL OR ki.title ILIKE '%' || $3 || '%')"
)
.bind(&query.category_id)
.bind(&query.status)
.bind(&keyword)
.fetch_one(&state.db)
.await?;
Ok(Json(serde_json::json!({
"items": items,
"total": total.0,
"page": page,
"page_size": page_size,
})))
}
/// POST /api/v1/knowledge/items — 创建条目
pub async fn create_item(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(req): Json<CreateItemRequest>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:write")?;
if req.title.trim().is_empty() || req.content.trim().is_empty() {
return Err(SaasError::InvalidInput("标题和内容不能为空".into()));
}
if req.content.len() > 100_000 {
return Err(SaasError::InvalidInput("内容不能超过 100KB".into()));
}
let is_admin = ctx.role == "admin" || ctx.role == "super_admin";
let item = service::create_item(&state.db, &ctx.account_id, &req, is_admin).await?;
// 异步触发 embedding 生成
if let Err(e) = state.worker_dispatcher.dispatch(
"generate_embedding",
serde_json::json!({ "item_id": item.id }),
).await {
tracing::warn!("Failed to dispatch embedding generation: {}", e);
}
Ok(Json(serde_json::json!({
"id": item.id,
"title": item.title,
"version": item.version,
})))
}
/// POST /api/v1/knowledge/items/batch — 批量创建
pub async fn batch_create_items(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(items): Json<Vec<CreateItemRequest>>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:write")?;
if items.len() > 50 {
return Err(SaasError::InvalidInput("单次批量创建不能超过 50 条".into()));
}
let is_admin = ctx.role == "admin" || ctx.role == "super_admin";
let mut created = Vec::new();
for req in &items {
if req.title.trim().is_empty() || req.content.trim().is_empty() {
tracing::warn!("Batch create: skipping item with empty title or content");
continue;
}
if req.content.len() > 100_000 {
tracing::warn!("Batch create: skipping item '{}' (content too long)", req.title);
continue;
}
match service::create_item(&state.db, &ctx.account_id, req, is_admin).await {
Ok(item) => {
if let Err(e) = state.worker_dispatcher.dispatch(
"generate_embedding",
serde_json::json!({ "item_id": item.id }),
).await {
tracing::warn!("[Knowledge] Failed to dispatch embedding for item {}: {}", item.id, e);
}
created.push(item.id);
}
Err(e) => {
tracing::warn!("Batch create item failed: {}", e);
}
}
}
Ok(Json(serde_json::json!({
"created_count": created.len(),
"ids": created,
})))
}
/// GET /api/v1/knowledge/items/:id — 条目详情
pub async fn get_item(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let item = service::get_item(&state.db, &id).await?
.ok_or_else(|| SaasError::NotFound("知识条目不存在".into()))?;
Ok(Json(serde_json::to_value(item).unwrap_or_default()))
}
/// PUT /api/v1/knowledge/items/:id — 更新条目
pub async fn update_item(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
Json(req): Json<UpdateItemRequest>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:write")?;
if let Some(ref content) = req.content {
if content.len() > 100_000 {
return Err(SaasError::InvalidInput("内容不能超过 100KB".into()));
}
}
let updated = service::update_item(&state.db, &id, &ctx.account_id, &req).await?;
// 触发 re-embedding
if let Err(e) = state.worker_dispatcher.dispatch(
"generate_embedding",
serde_json::json!({ "item_id": id }),
).await {
tracing::warn!("[Knowledge] Failed to dispatch re-embedding for item {}: {}", id, e);
}
Ok(Json(serde_json::json!({
"id": updated.id,
"version": updated.version,
})))
}
/// DELETE /api/v1/knowledge/items/:id — 删除条目
pub async fn delete_item(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:admin")?;
service::delete_item(&state.db, &id).await?;
Ok(Json(serde_json::json!({"deleted": true})))
}
// === 版本控制 ===
/// GET /api/v1/knowledge/items/:id/versions
// @reserved - no frontend caller
pub async fn list_versions(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let versions: Vec<KnowledgeVersion> = sqlx::query_as(
"SELECT * FROM knowledge_versions WHERE item_id = $1 ORDER BY version DESC"
)
.bind(&id)
.fetch_all(&state.db)
.await?;
Ok(Json(serde_json::json!({"versions": versions})))
}
/// GET /api/v1/knowledge/items/:id/versions/:v
// @reserved - no frontend caller
pub async fn get_version(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path((id, v)): Path<(String, i32)>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let version: KnowledgeVersion = sqlx::query_as(
"SELECT * FROM knowledge_versions WHERE item_id = $1 AND version = $2"
)
.bind(&id)
.bind(v)
.fetch_optional(&state.db)
.await?
.ok_or_else(|| SaasError::NotFound("版本不存在".into()))?;
Ok(Json(serde_json::to_value(version).unwrap_or_default()))
}
/// POST /api/v1/knowledge/items/:id/rollback/:v
// @reserved - no frontend caller
pub async fn rollback_version(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path((id, v)): Path<(String, i32)>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:admin")?;
let updated = service::rollback_version(&state.db, &id, v, &ctx.account_id).await?;
// 触发 re-embedding
if let Err(e) = state.worker_dispatcher.dispatch(
"generate_embedding",
serde_json::json!({ "item_id": id }),
).await {
tracing::warn!("[Knowledge] Failed to dispatch re-embedding after rollback for item {}: {}", id, e);
}
Ok(Json(serde_json::json!({
"id": updated.id,
"version": updated.version,
"rolled_back_to": v,
})))
}
// === 检索 ===
/// POST /api/v1/knowledge/search — 统一搜索(双通道:文档 + 结构化)
pub async fn search(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(req): Json<SearchRequest>,
) -> SaasResult<Json<UnifiedSearchResult>> {
check_permission(&ctx, "knowledge:search")?;
let results = service::unified_search(
&state.db,
&req,
Some(&ctx.account_id),
).await?;
Ok(Json(results))
}
/// POST /api/v1/knowledge/recommend — 关联推荐
pub async fn recommend(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(req): Json<SearchRequest>,
) -> SaasResult<Json<UnifiedSearchResult>> {
check_permission(&ctx, "knowledge:search")?;
let mut req = req;
req.min_score = Some(0.3);
req.search_structured = req.search_structured.or(Some(true));
let results = service::unified_search(
&state.db,
&req,
Some(&ctx.account_id),
).await?;
Ok(Json(results))
}
// === 分析看板 ===
/// GET /api/v1/knowledge/analytics/overview
pub async fn analytics_overview(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
) -> SaasResult<Json<AnalyticsOverview>> {
check_permission(&ctx, "knowledge:read")?;
let overview = service::analytics_overview(&state.db).await?;
Ok(Json(overview))
}
/// GET /api/v1/knowledge/analytics/trends
pub async fn analytics_trends(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
// 使用 serde_json::Value 行来避免 PgRow 序列化
let trends: Vec<(serde_json::Value,)> = sqlx::query_as(
"SELECT json_build_object(
'date', DATE(created_at),
'count', COUNT(*),
'injected_count', SUM(CASE WHEN was_injected THEN 1 ELSE 0 END)
) as row \
FROM knowledge_usage \
WHERE created_at >= NOW() - interval '30 days' \
GROUP BY DATE(created_at) ORDER BY DATE(created_at)"
)
.fetch_all(&state.db)
.await
.unwrap_or_default();
let trends: Vec<serde_json::Value> = trends.into_iter().map(|(v,)| v).collect();
Ok(Json(serde_json::json!({"trends": trends})))
}
/// GET /api/v1/knowledge/analytics/top-items
pub async fn analytics_top_items(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let items: Vec<(serde_json::Value,)> = sqlx::query_as(
"SELECT json_build_object(
'id', ki.id,
'title', ki.title,
'category', kc.name,
'ref_count', COUNT(ku.id)
) as row \
FROM knowledge_items ki \
JOIN knowledge_categories kc ON ki.category_id = kc.id \
LEFT JOIN knowledge_usage ku ON ku.item_id = ki.id \
WHERE ki.status = 'active' \
GROUP BY ki.id, ki.title, kc.name \
ORDER BY COUNT(ku.id) DESC LIMIT 20"
)
.fetch_all(&state.db)
.await
.unwrap_or_default();
let items: Vec<serde_json::Value> = items.into_iter().map(|(v,)| v).collect();
Ok(Json(serde_json::json!({"items": items})))
}
/// GET /api/v1/knowledge/analytics/quality
pub async fn analytics_quality(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let quality = service::analytics_quality(&state.db).await?;
Ok(Json(quality))
}
/// GET /api/v1/knowledge/analytics/gaps
pub async fn analytics_gaps(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let gaps = service::analytics_gaps(&state.db).await?;
Ok(Json(gaps))
}
// === 批量操作 ===
/// PATCH /api/v1/knowledge/categories/reorder — 批量排序
pub async fn reorder_categories(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(items): Json<Vec<ReorderItem>>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:write")?;
if items.is_empty() {
return Ok(Json(serde_json::json!({"reordered": false, "count": 0})));
}
if items.len() > 100 {
return Err(SaasError::InvalidInput("单次排序不能超过 100 个".into()));
}
// 使用事务保证原子性
let mut tx = state.db.begin().await?;
for item in &items {
sqlx::query("UPDATE knowledge_categories SET sort_order = $1, updated_at = NOW() WHERE id = $2")
.bind(item.sort_order)
.bind(&item.id)
.execute(&mut *tx)
.await?;
}
tx.commit().await?;
Ok(Json(serde_json::json!({"reordered": true, "count": items.len()})))
}
/// POST /api/v1/knowledge/items/import — Markdown 文件导入
pub async fn import_items(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(req): Json<ImportRequest>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:write")?;
if req.files.len() > 20 {
return Err(SaasError::InvalidInput("单次导入不能超过 20 个文件".into()));
}
let is_admin = ctx.role == "admin" || ctx.role == "super_admin";
let mut created = Vec::new();
for file in &req.files {
// 内容长度检查(数据库限制 100KB
if file.content.len() > 100_000 {
tracing::warn!("跳过文件 '{}': 内容超长 ({} bytes)", file.title.as_deref().unwrap_or("未命名"), file.content.len());
continue;
}
// 空内容检查
if file.content.trim().is_empty() {
tracing::warn!("跳过空文件: '{}'", file.title.as_deref().unwrap_or("未命名"));
continue;
}
let title = file.title.clone().unwrap_or_else(|| {
file.content.lines().next()
.map(|l| l.trim_start_matches('#').trim().to_string())
.unwrap_or_else(|| format!("导入条目 {}", created.len() + 1))
});
let item_req = CreateItemRequest {
category_id: req.category_id.clone(),
title,
content: file.content.clone(),
keywords: file.keywords.clone(),
related_questions: None,
priority: None,
tags: file.tags.clone(),
visibility: None,
};
match service::create_item(&state.db, &ctx.account_id, &item_req, is_admin).await {
Ok(item) => {
if let Err(e) = state.worker_dispatcher.dispatch(
"generate_embedding",
serde_json::json!({ "item_id": item.id }),
).await {
tracing::warn!("[Knowledge] Failed to dispatch embedding for item {}: {}", item.id, e);
}
created.push(item.id);
}
Err(e) => {
tracing::warn!("Import item '{}' failed: {}", item_req.title, e);
}
}
}
Ok(Json(serde_json::json!({
"created_count": created.len(),
"ids": created,
})))
}
// === 辅助函数 ===
fn check_permission(ctx: &AuthContext, permission: &str) -> SaasResult<()> {
crate::auth::handlers::check_permission(ctx, permission)
}
fn is_admin(ctx: &AuthContext) -> bool {
ctx.role == "admin" || ctx.role == "super_admin"
}
// === 结构化数据源管理 ===
/// GET /api/v1/structured/sources
pub async fn list_structured_sources(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Query(query): Query<ListStructuredSourcesQuery>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let page = query.page.unwrap_or(1).max(1);
let page_size = query.page_size.unwrap_or(20).max(1).min(100);
let (sources, total) = service::list_structured_sources(
&state.db,
Some(&ctx.account_id),
query.industry_id.as_deref(),
query.status.as_deref(),
page,
page_size,
).await?;
Ok(Json(serde_json::json!({
"items": sources,
"total": total,
"page": page,
"page_size": page_size,
})))
}
/// GET /api/v1/structured/sources/:id
pub async fn get_structured_source(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let source = service::get_structured_source(&state.db, &id, Some(&ctx.account_id)).await?
.ok_or_else(|| SaasError::NotFound("数据源不存在".into()))?;
Ok(Json(serde_json::to_value(source).unwrap_or_default()))
}
/// GET /api/v1/structured/sources/:id/rows
pub async fn list_structured_source_rows(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
Query(query): Query<ListStructuredRowsQuery>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:read")?;
let page = query.page.unwrap_or(1).max(1);
let page_size = query.page_size.unwrap_or(50).max(1).min(200);
let (rows, total) = service::list_structured_rows(
&state.db, &id, Some(&ctx.account_id),
query.sheet_name.as_deref(), page, page_size,
).await?;
Ok(Json(serde_json::json!({
"rows": rows,
"total": total,
"page": page,
"page_size": page_size,
})))
}
/// DELETE /api/v1/structured/sources/:id
pub async fn delete_structured_source(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Path(id): Path<String>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:admin")?;
service::delete_structured_source(&state.db, &id).await?;
Ok(Json(serde_json::json!({"deleted": true})))
}
/// POST /api/v1/structured/query
pub async fn query_structured(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(req): Json<StructuredQueryRequest>,
) -> SaasResult<Json<Vec<StructuredQueryResult>>> {
check_permission(&ctx, "knowledge:search")?;
let results = service::query_structured(&state.db, &req, Some(&ctx.account_id)).await?;
Ok(Json(results))
}
// === 文件上传 ===
/// POST /api/v1/knowledge/upload — multipart 文件上传
///
/// 支持 PDF/DOCX → RAG 管线Excel → 结构化管线
pub async fn upload_file(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
mut multipart: Multipart,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:write")?;
let is_admin = ctx.role == "admin" || ctx.role == "super_admin";
let mut results = Vec::new();
while let Some(field) = multipart.next_field().await.map_err(|e| {
SaasError::InvalidInput(format!("文件上传解析失败: {}", e))
})? {
let file_name = field.file_name().unwrap_or("unknown").to_string();
let data = field.bytes().await.map_err(|e| {
SaasError::InvalidInput(format!("文件读取失败: {}", e))
})?;
// 大小限制 20MB
if data.len() > 20 * 1024 * 1024 {
results.push(serde_json::json!({
"file": file_name,
"status": "error",
"error": "文件超过 20MB 限制"
}));
continue;
}
let format = match extractors::detect_format(&file_name) {
Some(f) => f,
None => {
results.push(serde_json::json!({
"file": file_name,
"status": "error",
"error": "不支持的文件格式"
}));
continue;
}
};
if format.is_structured() {
// Excel → 结构化通道
match handle_structured_upload(
&state, &ctx, is_admin, &data, &file_name,
).await {
Ok(result) => results.push(result),
Err(e) => results.push(serde_json::json!({
"file": file_name,
"status": "error",
"error": e.to_string()
})),
}
} else {
// PDF/DOCX/MD → 文档通道 (RAG)
match handle_document_upload(
&state, &ctx, is_admin, &data, &file_name, format,
).await {
Ok(result) => results.push(result),
Err(e) => results.push(serde_json::json!({
"file": file_name,
"status": "error",
"error": e.to_string()
})),
}
}
}
Ok(Json(serde_json::json!({
"results": results,
"count": results.len(),
})))
}
/// 处理文档类上传PDF/DOCX/MD → RAG 管线)
async fn handle_document_upload(
state: &AppState,
ctx: &AuthContext,
is_admin: bool,
data: &[u8],
file_name: &str,
format: extractors::DocumentFormat,
) -> SaasResult<serde_json::Value> {
let doc = match format {
extractors::DocumentFormat::Pdf => extractors::extract_pdf(data, file_name)?,
extractors::DocumentFormat::Docx => extractors::extract_docx(data, file_name)?,
extractors::DocumentFormat::Markdown => {
// Markdown 直通
let text = String::from_utf8_lossy(data).to_string();
let title = file_name.trim_end_matches(".md").trim_end_matches(".txt").to_string();
extractors::NormalizedDocument {
title,
sections: vec![extractors::DocumentSection {
heading: None,
content: text,
level: 1,
page_number: None,
}],
metadata: extractors::DocumentMetadata {
source_format: "markdown".to_string(),
file_name: file_name.to_string(),
total_pages: None,
total_sections: 1,
},
}
}
_ => return Err(SaasError::InvalidInput("不支持的文档格式".into())),
};
// 转为 Markdown 内容
let content = extractors::normalized_to_markdown(&doc);
if content.is_empty() {
return Err(SaasError::InvalidInput("文件内容为空".into()));
}
// 创建知识条目
let item_req = CreateItemRequest {
category_id: "uploaded".to_string(), // TODO: 从上传参数获取
title: doc.title.clone(),
content,
keywords: None,
related_questions: None,
priority: Some(5),
tags: Some(vec![format!("source:{}", doc.metadata.source_format)]),
visibility: None,
};
let item = service::create_item(&state.db, &ctx.account_id, &item_req, is_admin).await?;
// 触发分块
if let Err(e) = state.worker_dispatcher.dispatch(
"generate_embedding",
serde_json::json!({ "item_id": item.id }),
).await {
tracing::warn!("Upload: failed to dispatch embedding for {}: {}", item.id, e);
}
Ok(serde_json::json!({
"file": file_name,
"status": "ok",
"item_id": item.id,
"sections": doc.metadata.total_sections,
"format": doc.metadata.source_format,
}))
}
/// 处理结构化数据上传Excel → structured_rows
async fn handle_structured_upload(
state: &AppState,
ctx: &AuthContext,
is_admin: bool,
data: &[u8],
file_name: &str,
) -> SaasResult<serde_json::Value> {
let processed = extractors::extract_excel(data, file_name)?;
match processed {
extractors::ProcessedFile::Structured { title, sheet_names, column_headers, rows } => {
if rows.is_empty() {
return Err(SaasError::InvalidInput("Excel 文件没有数据行".into()));
}
// 创建结构化数据源
let source_req = CreateStructuredSourceRequest {
title,
description: None,
original_file_name: Some(file_name.to_string()),
sheet_names: Some(sheet_names.clone()),
column_headers: Some(column_headers.clone()),
visibility: None,
industry_id: None,
};
let source = service::create_structured_source(
&state.db, &ctx.account_id, is_admin, &source_req,
).await?;
// 批量写入行数据
let count = service::insert_structured_rows(
&state.db, &source.id, &rows,
).await?;
Ok(serde_json::json!({
"file": file_name,
"status": "ok",
"source_id": source.id,
"sheets": sheet_names,
"rows_imported": count,
"columns": column_headers.len(),
}))
}
_ => Err(SaasError::InvalidInput("意外的处理结果".into())),
}
}
// === 种子知识冷启动 ===
/// POST /api/v1/knowledge/seed — 触发种子知识冷启动
///
/// 需要 admin 权限,幂等(按标题+行业查重)
pub async fn seed_knowledge(
State(state): State<AppState>,
Extension(ctx): Extension<AuthContext>,
Json(req): Json<SeedKnowledgeRequest>,
) -> SaasResult<Json<serde_json::Value>> {
check_permission(&ctx, "knowledge:admin")?;
if req.items.len() > 100 {
return Err(SaasError::InvalidInput("单次种子不能超过 100 条".into()));
}
let created = service::seed_knowledge(
&state.db,
&req.industry_id,
req.category_id.as_deref().unwrap_or("seed"),
&req.items.iter().map(|i| (i.title.clone(), i.content.clone(), i.keywords.clone().unwrap_or_default())).collect::<Vec<_>>(),
&ctx.account_id,
).await?;
Ok(Json(serde_json::json!({
"industry_id": req.industry_id,
"created_count": created,
"total_submitted": req.items.len(),
})))
}