From 0edb47563818d5bc836287ad4d3c28eef2039864 Mon Sep 17 00:00:00 2001 From: iven Date: Tue, 26 May 2026 23:10:57 +0800 Subject: [PATCH] =?UTF-8?q?feat(db):=20=E5=88=9B=E5=BB=BA=20ai=5Fknowledge?= =?UTF-8?q?=5Fdocuments=20+=20ai=5Fknowledge=5Fchunks=20=E8=A1=A8=E8=BF=81?= =?UTF-8?q?=E7=A7=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 文档表支持多种来源(上传/URL/手动)、处理状态追踪、嵌入进度计数。 切片表含 vector(1536) 列 + HNSW 索引用于向量相似搜索。 --- crates/erp-server/migration/src/lib.rs | 2 + ...26_000167_create_ai_knowledge_documents.rs | 297 ++++++++++++++++++ 2 files changed, 299 insertions(+) create mode 100644 crates/erp-server/migration/src/m20260526_000167_create_ai_knowledge_documents.rs diff --git a/crates/erp-server/migration/src/lib.rs b/crates/erp-server/migration/src/lib.rs index 667a169..57ae836 100644 --- a/crates/erp-server/migration/src/lib.rs +++ b/crates/erp-server/migration/src/lib.rs @@ -173,6 +173,7 @@ mod m20260526_000163_points_rule_unique_event_type; mod m20260526_000164_ai_prompt_add_analysis_type; mod m20260526_000165_ai_prompt_fix_analysis_type; mod m20260526_000166_create_ai_knowledge_bases; +mod m20260526_000167_create_ai_knowledge_documents; pub struct Migrator; @@ -353,6 +354,7 @@ impl MigratorTrait for Migrator { Box::new(m20260526_000164_ai_prompt_add_analysis_type::Migration), Box::new(m20260526_000165_ai_prompt_fix_analysis_type::Migration), Box::new(m20260526_000166_create_ai_knowledge_bases::Migration), + Box::new(m20260526_000167_create_ai_knowledge_documents::Migration), ] } } diff --git a/crates/erp-server/migration/src/m20260526_000167_create_ai_knowledge_documents.rs b/crates/erp-server/migration/src/m20260526_000167_create_ai_knowledge_documents.rs new file mode 100644 index 0000000..b4c50c5 --- /dev/null +++ b/crates/erp-server/migration/src/m20260526_000167_create_ai_knowledge_documents.rs @@ -0,0 +1,297 @@ +use sea_orm_migration::prelude::*; + +#[derive(DeriveMigrationName)] +pub struct Migration; + +#[derive(Iden)] +enum AiKnowledgeDocuments { + Table, + Id, + TenantId, + KnowledgeBaseId, + Title, + DocType, + SourceType, + SourceUrl, + FileName, + FileSize, + FileMimeType, + Content, + Status, + ChunkCount, + EmbeddedCount, + ErrorMessage, + ProcessingStartedAt, + ProcessingCompletedAt, + CreatedAt, + UpdatedAt, + CreatedBy, + UpdatedBy, + DeletedAt, + VersionLock, +} + +#[derive(Iden)] +enum AiKnowledgeChunks { + Table, + Id, + TenantId, + KnowledgeBaseId, + DocumentId, + ChunkIndex, + Content, + TokenCount, + StartOffset, + EndOffset, + PageNumber, + Metadata, + HitCount, + LastHitAt, + CreatedAt, + UpdatedAt, + CreatedBy, + UpdatedBy, + DeletedAt, +} + +#[async_trait::async_trait] +impl MigrationTrait for Migration { + async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> { + // ai_knowledge_documents + manager + .create_table( + Table::create() + .table(AiKnowledgeDocuments::Table) + .if_not_exists() + .col( + ColumnDef::new(AiKnowledgeDocuments::Id) + .uuid() + .not_null() + .primary_key(), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::TenantId) + .uuid() + .not_null(), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::KnowledgeBaseId) + .uuid() + .not_null(), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::Title) + .string_len(500) + .not_null(), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::DocType) + .string_len(30) + .not_null() + .default("manual"), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::SourceType) + .string_len(30) + .not_null() + .default("manual"), + ) + .col(ColumnDef::new(AiKnowledgeDocuments::SourceUrl).text()) + .col(ColumnDef::new(AiKnowledgeDocuments::FileName).string_len(500)) + .col(ColumnDef::new(AiKnowledgeDocuments::FileSize).big_integer()) + .col(ColumnDef::new(AiKnowledgeDocuments::FileMimeType).string_len(100)) + .col(ColumnDef::new(AiKnowledgeDocuments::Content).text()) + .col( + ColumnDef::new(AiKnowledgeDocuments::Status) + .string_len(20) + .not_null() + .default("pending"), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::ChunkCount) + .integer() + .not_null() + .default(0), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::EmbeddedCount) + .integer() + .not_null() + .default(0), + ) + .col(ColumnDef::new(AiKnowledgeDocuments::ErrorMessage).text()) + .col( + ColumnDef::new(AiKnowledgeDocuments::ProcessingStartedAt) + .timestamp_with_time_zone(), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::ProcessingCompletedAt) + .timestamp_with_time_zone(), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::CreatedAt) + .timestamp_with_time_zone() + .not_null() + .default(Expr::current_timestamp()), + ) + .col( + ColumnDef::new(AiKnowledgeDocuments::UpdatedAt) + .timestamp_with_time_zone() + .not_null() + .default(Expr::current_timestamp()), + ) + .col(ColumnDef::new(AiKnowledgeDocuments::CreatedBy).uuid()) + .col(ColumnDef::new(AiKnowledgeDocuments::UpdatedBy).uuid()) + .col(ColumnDef::new(AiKnowledgeDocuments::DeletedAt).timestamp_with_time_zone()) + .col( + ColumnDef::new(AiKnowledgeDocuments::VersionLock) + .integer() + .not_null() + .default(1), + ) + .to_owned(), + ) + .await?; + + manager + .create_index( + Index::create() + .if_not_exists() + .name("idx_doc_kb") + .table(AiKnowledgeDocuments::Table) + .col(AiKnowledgeDocuments::KnowledgeBaseId) + .to_owned(), + ) + .await?; + + manager + .create_index( + Index::create() + .if_not_exists() + .name("idx_doc_status") + .table(AiKnowledgeDocuments::Table) + .col(AiKnowledgeDocuments::KnowledgeBaseId) + .col(AiKnowledgeDocuments::Status) + .to_owned(), + ) + .await?; + + // ai_knowledge_chunks + manager + .create_table( + Table::create() + .table(AiKnowledgeChunks::Table) + .if_not_exists() + .col( + ColumnDef::new(AiKnowledgeChunks::Id) + .uuid() + .not_null() + .primary_key(), + ) + .col( + ColumnDef::new(AiKnowledgeChunks::TenantId) + .uuid() + .not_null(), + ) + .col( + ColumnDef::new(AiKnowledgeChunks::KnowledgeBaseId) + .uuid() + .not_null(), + ) + .col( + ColumnDef::new(AiKnowledgeChunks::DocumentId) + .uuid() + .not_null(), + ) + .col( + ColumnDef::new(AiKnowledgeChunks::ChunkIndex) + .integer() + .not_null(), + ) + .col(ColumnDef::new(AiKnowledgeChunks::Content).text().not_null()) + .col(ColumnDef::new(AiKnowledgeChunks::TokenCount).integer()) + // embedding is vector(1536) — added via raw SQL below + .col(ColumnDef::new(AiKnowledgeChunks::StartOffset).integer()) + .col(ColumnDef::new(AiKnowledgeChunks::EndOffset).integer()) + .col(ColumnDef::new(AiKnowledgeChunks::PageNumber).integer()) + .col( + ColumnDef::new(AiKnowledgeChunks::Metadata) + .json_binary() + .not_null() + .default(Expr::cust("'{}'")), + ) + .col( + ColumnDef::new(AiKnowledgeChunks::HitCount) + .integer() + .not_null() + .default(0), + ) + .col(ColumnDef::new(AiKnowledgeChunks::LastHitAt).timestamp_with_time_zone()) + .col( + ColumnDef::new(AiKnowledgeChunks::CreatedAt) + .timestamp_with_time_zone() + .not_null() + .default(Expr::current_timestamp()), + ) + .col( + ColumnDef::new(AiKnowledgeChunks::UpdatedAt) + .timestamp_with_time_zone() + .not_null() + .default(Expr::current_timestamp()), + ) + .col(ColumnDef::new(AiKnowledgeChunks::CreatedBy).uuid()) + .col(ColumnDef::new(AiKnowledgeChunks::UpdatedBy).uuid()) + .col(ColumnDef::new(AiKnowledgeChunks::DeletedAt).timestamp_with_time_zone()) + .to_owned(), + ) + .await?; + + // Add embedding column as vector type (SeaORM doesn't support this natively) + manager + .get_connection() + .execute_unprepared( + "ALTER TABLE ai_knowledge_chunks ADD COLUMN IF NOT EXISTS embedding vector(1536)", + ) + .await?; + + // HNSW index for vector similarity search + manager + .get_connection() + .execute_unprepared( + "CREATE INDEX IF NOT EXISTS idx_chunk_embedding ON ai_knowledge_chunks USING hnsw (embedding vector_cosine_ops)", + ) + .await?; + + manager + .create_index( + Index::create() + .if_not_exists() + .name("idx_chunk_document") + .table(AiKnowledgeChunks::Table) + .col(AiKnowledgeChunks::DocumentId) + .to_owned(), + ) + .await?; + + manager + .create_index( + Index::create() + .if_not_exists() + .name("idx_chunk_kb") + .table(AiKnowledgeChunks::Table) + .col(AiKnowledgeChunks::KnowledgeBaseId) + .to_owned(), + ) + .await + } + + async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> { + manager + .drop_table(Table::drop().table(AiKnowledgeChunks::Table).to_owned()) + .await?; + + manager + .drop_table(Table::drop().table(AiKnowledgeDocuments::Table).to_owned()) + .await + } +}