use sea_orm::{ ColumnTrait, ConnectionTrait, EntityTrait, PaginatorTrait, QueryFilter, QueryOrder, Set, }; use serde::{Deserialize, Serialize}; use uuid::Uuid; use crate::entity::ai_knowledge_bases; use crate::error::{AiError, AiResult}; // ─── DTO ─── #[derive(Debug, Deserialize, Serialize, utoipa::ToSchema)] pub struct CreateKnowledgeBaseReq { pub name: String, pub kb_type: String, pub description: Option, pub icon: Option, pub chunk_strategy: Option, pub intent_keywords: Option, pub embedding_model: Option, pub is_enabled: Option, } #[derive(Debug, Deserialize, Serialize, utoipa::ToSchema)] pub struct UpdateKnowledgeBaseReq { pub name: Option, pub kb_type: Option, pub description: Option, pub icon: Option, pub chunk_strategy: Option, pub intent_keywords: Option, pub embedding_model: Option, pub is_enabled: Option, } #[derive(Debug, Deserialize, utoipa::IntoParams)] pub struct ListKnowledgeBasesQuery { pub kb_type: Option, pub is_enabled: Option, pub page: Option, pub page_size: Option, } // ─── Service ─── pub struct KnowledgeV2Service { db: sea_orm::DatabaseConnection, } impl KnowledgeV2Service { pub fn new(db: sea_orm::DatabaseConnection) -> Self { Self { db } } pub async fn list( &self, tenant_id: Uuid, query: &ListKnowledgeBasesQuery, ) -> AiResult<(Vec, u64)> { let page = query.page.unwrap_or(1); let page_size = query.page_size.unwrap_or(20); let mut find = ai_knowledge_bases::Entity::find() .filter(ai_knowledge_bases::Column::TenantId.eq(tenant_id)) .filter(ai_knowledge_bases::Column::DeletedAt.is_null()); if let Some(ref kb_type) = query.kb_type { find = find.filter(ai_knowledge_bases::Column::KbType.eq(kb_type.as_str())); } if let Some(is_enabled) = query.is_enabled { find = find.filter(ai_knowledge_bases::Column::IsEnabled.eq(is_enabled)); } let paginator = find .order_by_desc(ai_knowledge_bases::Column::CreatedAt) .paginate(&self.db, page_size); let total = paginator.num_items().await?; let items = paginator.fetch_page(page - 1).await?; Ok((items, total)) } pub async fn get_by_id( &self, tenant_id: Uuid, id: Uuid, ) -> AiResult { ai_knowledge_bases::Entity::find_by_id(id) .one(&self.db) .await .map_err(|e| AiError::DbError(e.to_string()))? .filter(|m| m.tenant_id == tenant_id && m.deleted_at.is_none()) .ok_or_else(|| AiError::KnowledgeError("知识库不存在".into())) } pub async fn create( &self, tenant_id: Uuid, user_id: Uuid, req: CreateKnowledgeBaseReq, ) -> AiResult { let id = Uuid::now_v7(); let now = chrono::Utc::now(); let active = ai_knowledge_bases::ActiveModel { id: Set(id), tenant_id: Set(tenant_id), name: Set(req.name), kb_type: Set(req.kb_type), description: Set(req.description), icon: Set(req.icon), chunk_strategy: Set(req.chunk_strategy.unwrap_or( serde_json::json!({"strategy": "auto", "chunk_size": 500, "overlap": 50}), )), intent_keywords: Set(req.intent_keywords.unwrap_or(serde_json::json!([]))), embedding_model: Set(req.embedding_model), is_enabled: Set(req.is_enabled.unwrap_or(true)), document_count: Set(0), chunk_count: Set(0), created_at: Set(now), updated_at: Set(now), created_by: Set(Some(user_id)), updated_by: Set(Some(user_id)), deleted_at: Set(None), version_lock: Set(1), }; ai_knowledge_bases::Entity::insert(active) .exec(&self.db) .await .map_err(|e| AiError::DbError(e.to_string()))?; Ok(id) } pub async fn update( &self, tenant_id: Uuid, user_id: Uuid, id: Uuid, req: UpdateKnowledgeBaseReq, ) -> AiResult<()> { let existing = self.get_by_id(tenant_id, id).await?; let now = chrono::Utc::now(); let active = ai_knowledge_bases::ActiveModel { id: Set(existing.id), tenant_id: Set(existing.tenant_id), name: Set(req.name.unwrap_or(existing.name)), kb_type: Set(req.kb_type.unwrap_or(existing.kb_type)), description: Set(req.description.or(existing.description)), icon: Set(req.icon.or(existing.icon)), chunk_strategy: Set(req.chunk_strategy.unwrap_or(existing.chunk_strategy)), intent_keywords: Set(req.intent_keywords.unwrap_or(existing.intent_keywords)), embedding_model: Set(req.embedding_model.or(existing.embedding_model)), is_enabled: Set(req.is_enabled.unwrap_or(existing.is_enabled)), document_count: Set(existing.document_count), chunk_count: Set(existing.chunk_count), created_at: Set(existing.created_at), updated_at: Set(now), created_by: Set(existing.created_by), updated_by: Set(Some(user_id)), deleted_at: Set(existing.deleted_at), version_lock: Set(existing.version_lock + 1), }; ai_knowledge_bases::Entity::update(active) .exec(&self.db) .await .map_err(|e| AiError::DbError(e.to_string()))?; Ok(()) } pub async fn delete(&self, tenant_id: Uuid, id: Uuid) -> AiResult<()> { let existing = self.get_by_id(tenant_id, id).await?; let now = chrono::Utc::now(); let active = ai_knowledge_bases::ActiveModel { id: Set(existing.id), tenant_id: Set(existing.tenant_id), name: Set(existing.name), kb_type: Set(existing.kb_type), description: Set(existing.description), icon: Set(existing.icon), chunk_strategy: Set(existing.chunk_strategy), intent_keywords: Set(existing.intent_keywords), embedding_model: Set(existing.embedding_model), is_enabled: Set(existing.is_enabled), document_count: Set(existing.document_count), chunk_count: Set(existing.chunk_count), created_at: Set(existing.created_at), updated_at: Set(now), created_by: Set(existing.created_by), updated_by: Set(existing.updated_by), deleted_at: Set(Some(now)), version_lock: Set(existing.version_lock + 1), }; ai_knowledge_bases::Entity::update(active) .exec(&self.db) .await .map_err(|e| AiError::DbError(e.to_string()))?; Ok(()) } /// 原子递增文档计数(用于文档上传成功后) pub async fn increment_document_count(&self, kb_id: Uuid, delta: i32) -> AiResult<()> { let sql = r#" UPDATE ai_knowledge_bases SET document_count = document_count + $2, updated_at = $3, version_lock = version_lock + 1 WHERE id = $1 AND deleted_at IS NULL "#; let stmt = sea_orm::Statement::from_sql_and_values( sea_orm::DatabaseBackend::Postgres, sql, [ sea_orm::Value::from(kb_id), sea_orm::Value::from(delta), sea_orm::Value::from(chrono::Utc::now()), ], ); self.db .execute(stmt) .await .map_err(|e| AiError::DbError(e.to_string()))?; Ok(()) } /// 原子递增切片计数(用于切片生成后) pub async fn increment_chunk_count(&self, kb_id: Uuid, delta: i32) -> AiResult<()> { let sql = r#" UPDATE ai_knowledge_bases SET chunk_count = chunk_count + $2, updated_at = $3, version_lock = version_lock + 1 WHERE id = $1 AND deleted_at IS NULL "#; let stmt = sea_orm::Statement::from_sql_and_values( sea_orm::DatabaseBackend::Postgres, sql, [ sea_orm::Value::from(kb_id), sea_orm::Value::from(delta), sea_orm::Value::from(chrono::Utc::now()), ], ); self.db .execute(stmt) .await .map_err(|e| AiError::DbError(e.to_string()))?; Ok(()) } /// 向量相似度搜索:在指定知识库中搜索与 query_embedding 最相似的 top_k 个切片 pub async fn vector_search( &self, tenant_id: Uuid, kb_id: Uuid, query_embedding: &[f32], top_k: i64, ) -> AiResult> { let vector_str = crate::service::embedding::format_vector(query_embedding); let sql = r#" SELECT c.id, c.document_id, c.chunk_index, c.content, c.metadata, d.title AS doc_title, 1 - (c.embedding <=> $3::vector) AS similarity FROM ai_knowledge_chunks c JOIN ai_knowledge_documents d ON d.id = c.document_id WHERE c.tenant_id = $1 AND c.knowledge_base_id = $2 AND c.deleted_at IS NULL AND d.deleted_at IS NULL AND c.embedding IS NOT NULL ORDER BY c.embedding <=> $3::vector LIMIT $4 "#; let stmt = sea_orm::Statement::from_sql_and_values( sea_orm::DatabaseBackend::Postgres, sql, [ sea_orm::Value::from(tenant_id), sea_orm::Value::from(kb_id), sea_orm::Value::String(Some(Box::new(vector_str))), sea_orm::Value::from(top_k), ], ); let rows: Vec = sea_orm::FromQueryResult::find_by_statement(stmt) .all(&self.db) .await .map_err(|e| AiError::DbError(e.to_string()))?; Ok(rows.into_iter().map(SearchHit::from).collect()) } } #[derive(Debug, sea_orm::FromQueryResult)] struct SearchHitRow { id: Uuid, document_id: Uuid, chunk_index: i32, content: String, metadata: serde_json::Value, doc_title: String, similarity: f64, } #[derive(Debug, serde::Serialize)] pub struct SearchHit { pub chunk_id: Uuid, pub document_id: Uuid, pub chunk_index: i32, pub content: String, pub doc_title: String, pub similarity: f64, pub metadata: serde_json::Value, } impl From for SearchHit { fn from(row: SearchHitRow) -> Self { Self { chunk_id: row.id, document_id: row.document_id, chunk_index: row.chunk_index, content: row.content, doc_title: row.doc_title, similarity: row.similarity, metadata: row.metadata, } } }