hms/crates/erp-ai/src/service/knowledge_v2.rs

use sea_orm::{
    ColumnTrait, ConnectionTrait, EntityTrait, PaginatorTrait, QueryFilter, QueryOrder, Set,
};
use serde::{Deserialize, Serialize};
use uuid::Uuid;

use crate::entity::ai_knowledge_bases;
use crate::error::{AiError, AiResult};

// ─── DTO ───

#[derive(Debug, Deserialize, Serialize, utoipa::ToSchema)]
pub struct CreateKnowledgeBaseReq {
    pub name: String,
    pub kb_type: String,
    pub description: Option<String>,
    pub icon: Option<String>,
    pub chunk_strategy: Option<serde_json::Value>,
    pub intent_keywords: Option<serde_json::Value>,
    pub embedding_model: Option<String>,
    pub is_enabled: Option<bool>,
}

#[derive(Debug, Deserialize, Serialize, utoipa::ToSchema)]
pub struct UpdateKnowledgeBaseReq {
    pub name: Option<String>,
    pub kb_type: Option<String>,
    pub description: Option<String>,
    pub icon: Option<String>,
    pub chunk_strategy: Option<serde_json::Value>,
    pub intent_keywords: Option<serde_json::Value>,
    pub embedding_model: Option<String>,
    pub is_enabled: Option<bool>,
}

#[derive(Debug, Deserialize, utoipa::IntoParams)]
pub struct ListKnowledgeBasesQuery {
    pub kb_type: Option<String>,
    pub is_enabled: Option<bool>,
    pub page: Option<u64>,
    pub page_size: Option<u64>,
}

// ─── Service ───

pub struct KnowledgeV2Service {
    db: sea_orm::DatabaseConnection,
}

impl KnowledgeV2Service {
    pub fn new(db: sea_orm::DatabaseConnection) -> Self {
        Self { db }
    }

    pub async fn list(
        &self,
        tenant_id: Uuid,
        query: &ListKnowledgeBasesQuery,
    ) -> AiResult<(Vec<ai_knowledge_bases::Model>, u64)> {
        let page = query.page.unwrap_or(1);
        let page_size = query.page_size.unwrap_or(20);

        let mut find = ai_knowledge_bases::Entity::find()
            .filter(ai_knowledge_bases::Column::TenantId.eq(tenant_id))
            .filter(ai_knowledge_bases::Column::DeletedAt.is_null());

        if let Some(ref kb_type) = query.kb_type {
            find = find.filter(ai_knowledge_bases::Column::KbType.eq(kb_type.as_str()));
        }
        if let Some(is_enabled) = query.is_enabled {
            find = find.filter(ai_knowledge_bases::Column::IsEnabled.eq(is_enabled));
        }

        let paginator = find
            .order_by_desc(ai_knowledge_bases::Column::CreatedAt)
            .paginate(&self.db, page_size);

        let total = paginator.num_items().await?;
        let items = paginator.fetch_page(page - 1).await?;

        Ok((items, total))
    }

    pub async fn get_by_id(
        &self,
        tenant_id: Uuid,
        id: Uuid,
    ) -> AiResult<ai_knowledge_bases::Model> {
        ai_knowledge_bases::Entity::find_by_id(id)
            .one(&self.db)
            .await
            .map_err(|e| AiError::DbError(e.to_string()))?
            .filter(|m| m.tenant_id == tenant_id && m.deleted_at.is_none())
            .ok_or_else(|| AiError::KnowledgeError("知识库不存在".into()))
    }

    pub async fn create(
        &self,
        tenant_id: Uuid,
        user_id: Uuid,
        req: CreateKnowledgeBaseReq,
    ) -> AiResult<Uuid> {
        let id = Uuid::now_v7();
        let now = chrono::Utc::now();

        let active = ai_knowledge_bases::ActiveModel {
            id: Set(id),
            tenant_id: Set(tenant_id),
            name: Set(req.name),
            kb_type: Set(req.kb_type),
            description: Set(req.description),
            icon: Set(req.icon),
            chunk_strategy: Set(req.chunk_strategy.unwrap_or(
                serde_json::json!({"strategy": "auto", "chunk_size": 500, "overlap": 50}),
            )),
            intent_keywords: Set(req.intent_keywords.unwrap_or(serde_json::json!([]))),
            embedding_model: Set(req.embedding_model),
            is_enabled: Set(req.is_enabled.unwrap_or(true)),
            document_count: Set(0),
            chunk_count: Set(0),
            created_at: Set(now),
            updated_at: Set(now),
            created_by: Set(Some(user_id)),
            updated_by: Set(Some(user_id)),
            deleted_at: Set(None),
            version_lock: Set(1),
        };

        ai_knowledge_bases::Entity::insert(active)
            .exec(&self.db)
            .await
            .map_err(|e| AiError::DbError(e.to_string()))?;

        Ok(id)
    }

    pub async fn update(
        &self,
        tenant_id: Uuid,
        user_id: Uuid,
        id: Uuid,
        req: UpdateKnowledgeBaseReq,
    ) -> AiResult<()> {
        let existing = self.get_by_id(tenant_id, id).await?;
        let now = chrono::Utc::now();

        let active = ai_knowledge_bases::ActiveModel {
            id: Set(existing.id),
            tenant_id: Set(existing.tenant_id),
            name: Set(req.name.unwrap_or(existing.name)),
            kb_type: Set(req.kb_type.unwrap_or(existing.kb_type)),
            description: Set(req.description.or(existing.description)),
            icon: Set(req.icon.or(existing.icon)),
            chunk_strategy: Set(req.chunk_strategy.unwrap_or(existing.chunk_strategy)),
            intent_keywords: Set(req.intent_keywords.unwrap_or(existing.intent_keywords)),
            embedding_model: Set(req.embedding_model.or(existing.embedding_model)),
            is_enabled: Set(req.is_enabled.unwrap_or(existing.is_enabled)),
            document_count: Set(existing.document_count),
            chunk_count: Set(existing.chunk_count),
            created_at: Set(existing.created_at),
            updated_at: Set(now),
            created_by: Set(existing.created_by),
            updated_by: Set(Some(user_id)),
            deleted_at: Set(existing.deleted_at),
            version_lock: Set(existing.version_lock + 1),
        };

        ai_knowledge_bases::Entity::update(active)
            .exec(&self.db)
            .await
            .map_err(|e| AiError::DbError(e.to_string()))?;

        Ok(())
    }

    pub async fn delete(&self, tenant_id: Uuid, id: Uuid) -> AiResult<()> {
        let existing = self.get_by_id(tenant_id, id).await?;
        let now = chrono::Utc::now();

        let active = ai_knowledge_bases::ActiveModel {
            id: Set(existing.id),
            tenant_id: Set(existing.tenant_id),
            name: Set(existing.name),
            kb_type: Set(existing.kb_type),
            description: Set(existing.description),
            icon: Set(existing.icon),
            chunk_strategy: Set(existing.chunk_strategy),
            intent_keywords: Set(existing.intent_keywords),
            embedding_model: Set(existing.embedding_model),
            is_enabled: Set(existing.is_enabled),
            document_count: Set(existing.document_count),
            chunk_count: Set(existing.chunk_count),
            created_at: Set(existing.created_at),
            updated_at: Set(now),
            created_by: Set(existing.created_by),
            updated_by: Set(existing.updated_by),
            deleted_at: Set(Some(now)),
            version_lock: Set(existing.version_lock + 1),
        };

        ai_knowledge_bases::Entity::update(active)
            .exec(&self.db)
            .await
            .map_err(|e| AiError::DbError(e.to_string()))?;

        Ok(())
    }

    /// 原子递增文档计数（用于文档上传成功后）
    pub async fn increment_document_count(&self, kb_id: Uuid, delta: i32) -> AiResult<()> {
        let sql = r#"
            UPDATE ai_knowledge_bases
            SET document_count = document_count + $2,
                updated_at = $3,
                version_lock = version_lock + 1
            WHERE id = $1 AND deleted_at IS NULL
        "#;
        let stmt = sea_orm::Statement::from_sql_and_values(
            sea_orm::DatabaseBackend::Postgres,
            sql,
            [
                sea_orm::Value::from(kb_id),
                sea_orm::Value::from(delta),
                sea_orm::Value::from(chrono::Utc::now()),
            ],
        );
        self.db
            .execute(stmt)
            .await
            .map_err(|e| AiError::DbError(e.to_string()))?;
        Ok(())
    }

    /// 原子递增切片计数（用于切片生成后）
    pub async fn increment_chunk_count(&self, kb_id: Uuid, delta: i32) -> AiResult<()> {
        let sql = r#"
            UPDATE ai_knowledge_bases
            SET chunk_count = chunk_count + $2,
                updated_at = $3,
                version_lock = version_lock + 1
            WHERE id = $1 AND deleted_at IS NULL
        "#;
        let stmt = sea_orm::Statement::from_sql_and_values(
            sea_orm::DatabaseBackend::Postgres,
            sql,
            [
                sea_orm::Value::from(kb_id),
                sea_orm::Value::from(delta),
                sea_orm::Value::from(chrono::Utc::now()),
            ],
        );
        self.db
            .execute(stmt)
            .await
            .map_err(|e| AiError::DbError(e.to_string()))?;
        Ok(())
    }

    /// 向量相似度搜索：在指定知识库中搜索与 query_embedding 最相似的 top_k 个切片
    pub async fn vector_search(
        &self,
        tenant_id: Uuid,
        kb_id: Uuid,
        query_embedding: &[f32],
        top_k: i64,
    ) -> AiResult<Vec<SearchHit>> {
        let vector_str = crate::service::embedding::format_vector(query_embedding);
        let sql = r#"
            SELECT c.id, c.document_id, c.chunk_index, c.content, c.metadata,
                   d.title AS doc_title,
                   1 - (c.embedding <=> $3::vector) AS similarity
            FROM ai_knowledge_chunks c
            JOIN ai_knowledge_documents d ON d.id = c.document_id
            WHERE c.tenant_id = $1
              AND c.knowledge_base_id = $2
              AND c.deleted_at IS NULL
              AND d.deleted_at IS NULL
              AND c.embedding IS NOT NULL
            ORDER BY c.embedding <=> $3::vector
            LIMIT $4
        "#;
        let stmt = sea_orm::Statement::from_sql_and_values(
            sea_orm::DatabaseBackend::Postgres,
            sql,
            [
                sea_orm::Value::from(tenant_id),
                sea_orm::Value::from(kb_id),
                sea_orm::Value::String(Some(Box::new(vector_str))),
                sea_orm::Value::from(top_k),
            ],
        );

        let rows: Vec<SearchHitRow> = sea_orm::FromQueryResult::find_by_statement(stmt)
            .all(&self.db)
            .await
            .map_err(|e| AiError::DbError(e.to_string()))?;

        Ok(rows.into_iter().map(SearchHit::from).collect())
    }
}

#[derive(Debug, sea_orm::FromQueryResult)]
struct SearchHitRow {
    id: Uuid,
    document_id: Uuid,
    chunk_index: i32,
    content: String,
    metadata: serde_json::Value,
    doc_title: String,
    similarity: f64,
}

#[derive(Debug, serde::Serialize)]
pub struct SearchHit {
    pub chunk_id: Uuid,
    pub document_id: Uuid,
    pub chunk_index: i32,
    pub content: String,
    pub doc_title: String,
    pub similarity: f64,
    pub metadata: serde_json::Value,
}

impl From<SearchHitRow> for SearchHit {
    fn from(row: SearchHitRow) -> Self {
        Self {
            chunk_id: row.id,
            document_id: row.document_id,
            chunk_index: row.chunk_index,
            content: row.content,
            doc_title: row.doc_title,
            similarity: row.similarity,
            metadata: row.metadata,
        }
    }
}