feat(db): 创建 ai_knowledge_documents + ai_knowledge_chunks 表迁移

文档表支持多种来源(上传/URL/手动)、处理状态追踪、嵌入进度计数。
切片表含 vector(1536) 列 + HNSW 索引用于向量相似搜索。
This commit is contained in:
iven
2026-05-26 23:10:57 +08:00
parent a7526455b4
commit 0edb475638
2 changed files with 299 additions and 0 deletions

View File

@@ -173,6 +173,7 @@ mod m20260526_000163_points_rule_unique_event_type;
mod m20260526_000164_ai_prompt_add_analysis_type;
mod m20260526_000165_ai_prompt_fix_analysis_type;
mod m20260526_000166_create_ai_knowledge_bases;
mod m20260526_000167_create_ai_knowledge_documents;
pub struct Migrator;
@@ -353,6 +354,7 @@ impl MigratorTrait for Migrator {
Box::new(m20260526_000164_ai_prompt_add_analysis_type::Migration),
Box::new(m20260526_000165_ai_prompt_fix_analysis_type::Migration),
Box::new(m20260526_000166_create_ai_knowledge_bases::Migration),
Box::new(m20260526_000167_create_ai_knowledge_documents::Migration),
]
}
}

View File

@@ -0,0 +1,297 @@
use sea_orm_migration::prelude::*;
#[derive(DeriveMigrationName)]
pub struct Migration;
#[derive(Iden)]
enum AiKnowledgeDocuments {
Table,
Id,
TenantId,
KnowledgeBaseId,
Title,
DocType,
SourceType,
SourceUrl,
FileName,
FileSize,
FileMimeType,
Content,
Status,
ChunkCount,
EmbeddedCount,
ErrorMessage,
ProcessingStartedAt,
ProcessingCompletedAt,
CreatedAt,
UpdatedAt,
CreatedBy,
UpdatedBy,
DeletedAt,
VersionLock,
}
#[derive(Iden)]
enum AiKnowledgeChunks {
Table,
Id,
TenantId,
KnowledgeBaseId,
DocumentId,
ChunkIndex,
Content,
TokenCount,
StartOffset,
EndOffset,
PageNumber,
Metadata,
HitCount,
LastHitAt,
CreatedAt,
UpdatedAt,
CreatedBy,
UpdatedBy,
DeletedAt,
}
#[async_trait::async_trait]
impl MigrationTrait for Migration {
async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
// ai_knowledge_documents
manager
.create_table(
Table::create()
.table(AiKnowledgeDocuments::Table)
.if_not_exists()
.col(
ColumnDef::new(AiKnowledgeDocuments::Id)
.uuid()
.not_null()
.primary_key(),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::TenantId)
.uuid()
.not_null(),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::KnowledgeBaseId)
.uuid()
.not_null(),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::Title)
.string_len(500)
.not_null(),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::DocType)
.string_len(30)
.not_null()
.default("manual"),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::SourceType)
.string_len(30)
.not_null()
.default("manual"),
)
.col(ColumnDef::new(AiKnowledgeDocuments::SourceUrl).text())
.col(ColumnDef::new(AiKnowledgeDocuments::FileName).string_len(500))
.col(ColumnDef::new(AiKnowledgeDocuments::FileSize).big_integer())
.col(ColumnDef::new(AiKnowledgeDocuments::FileMimeType).string_len(100))
.col(ColumnDef::new(AiKnowledgeDocuments::Content).text())
.col(
ColumnDef::new(AiKnowledgeDocuments::Status)
.string_len(20)
.not_null()
.default("pending"),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::ChunkCount)
.integer()
.not_null()
.default(0),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::EmbeddedCount)
.integer()
.not_null()
.default(0),
)
.col(ColumnDef::new(AiKnowledgeDocuments::ErrorMessage).text())
.col(
ColumnDef::new(AiKnowledgeDocuments::ProcessingStartedAt)
.timestamp_with_time_zone(),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::ProcessingCompletedAt)
.timestamp_with_time_zone(),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::CreatedAt)
.timestamp_with_time_zone()
.not_null()
.default(Expr::current_timestamp()),
)
.col(
ColumnDef::new(AiKnowledgeDocuments::UpdatedAt)
.timestamp_with_time_zone()
.not_null()
.default(Expr::current_timestamp()),
)
.col(ColumnDef::new(AiKnowledgeDocuments::CreatedBy).uuid())
.col(ColumnDef::new(AiKnowledgeDocuments::UpdatedBy).uuid())
.col(ColumnDef::new(AiKnowledgeDocuments::DeletedAt).timestamp_with_time_zone())
.col(
ColumnDef::new(AiKnowledgeDocuments::VersionLock)
.integer()
.not_null()
.default(1),
)
.to_owned(),
)
.await?;
manager
.create_index(
Index::create()
.if_not_exists()
.name("idx_doc_kb")
.table(AiKnowledgeDocuments::Table)
.col(AiKnowledgeDocuments::KnowledgeBaseId)
.to_owned(),
)
.await?;
manager
.create_index(
Index::create()
.if_not_exists()
.name("idx_doc_status")
.table(AiKnowledgeDocuments::Table)
.col(AiKnowledgeDocuments::KnowledgeBaseId)
.col(AiKnowledgeDocuments::Status)
.to_owned(),
)
.await?;
// ai_knowledge_chunks
manager
.create_table(
Table::create()
.table(AiKnowledgeChunks::Table)
.if_not_exists()
.col(
ColumnDef::new(AiKnowledgeChunks::Id)
.uuid()
.not_null()
.primary_key(),
)
.col(
ColumnDef::new(AiKnowledgeChunks::TenantId)
.uuid()
.not_null(),
)
.col(
ColumnDef::new(AiKnowledgeChunks::KnowledgeBaseId)
.uuid()
.not_null(),
)
.col(
ColumnDef::new(AiKnowledgeChunks::DocumentId)
.uuid()
.not_null(),
)
.col(
ColumnDef::new(AiKnowledgeChunks::ChunkIndex)
.integer()
.not_null(),
)
.col(ColumnDef::new(AiKnowledgeChunks::Content).text().not_null())
.col(ColumnDef::new(AiKnowledgeChunks::TokenCount).integer())
// embedding is vector(1536) — added via raw SQL below
.col(ColumnDef::new(AiKnowledgeChunks::StartOffset).integer())
.col(ColumnDef::new(AiKnowledgeChunks::EndOffset).integer())
.col(ColumnDef::new(AiKnowledgeChunks::PageNumber).integer())
.col(
ColumnDef::new(AiKnowledgeChunks::Metadata)
.json_binary()
.not_null()
.default(Expr::cust("'{}'")),
)
.col(
ColumnDef::new(AiKnowledgeChunks::HitCount)
.integer()
.not_null()
.default(0),
)
.col(ColumnDef::new(AiKnowledgeChunks::LastHitAt).timestamp_with_time_zone())
.col(
ColumnDef::new(AiKnowledgeChunks::CreatedAt)
.timestamp_with_time_zone()
.not_null()
.default(Expr::current_timestamp()),
)
.col(
ColumnDef::new(AiKnowledgeChunks::UpdatedAt)
.timestamp_with_time_zone()
.not_null()
.default(Expr::current_timestamp()),
)
.col(ColumnDef::new(AiKnowledgeChunks::CreatedBy).uuid())
.col(ColumnDef::new(AiKnowledgeChunks::UpdatedBy).uuid())
.col(ColumnDef::new(AiKnowledgeChunks::DeletedAt).timestamp_with_time_zone())
.to_owned(),
)
.await?;
// Add embedding column as vector type (SeaORM doesn't support this natively)
manager
.get_connection()
.execute_unprepared(
"ALTER TABLE ai_knowledge_chunks ADD COLUMN IF NOT EXISTS embedding vector(1536)",
)
.await?;
// HNSW index for vector similarity search
manager
.get_connection()
.execute_unprepared(
"CREATE INDEX IF NOT EXISTS idx_chunk_embedding ON ai_knowledge_chunks USING hnsw (embedding vector_cosine_ops)",
)
.await?;
manager
.create_index(
Index::create()
.if_not_exists()
.name("idx_chunk_document")
.table(AiKnowledgeChunks::Table)
.col(AiKnowledgeChunks::DocumentId)
.to_owned(),
)
.await?;
manager
.create_index(
Index::create()
.if_not_exists()
.name("idx_chunk_kb")
.table(AiKnowledgeChunks::Table)
.col(AiKnowledgeChunks::KnowledgeBaseId)
.to_owned(),
)
.await
}
async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
manager
.drop_table(Table::drop().table(AiKnowledgeChunks::Table).to_owned())
.await?;
manager
.drop_table(Table::drop().table(AiKnowledgeDocuments::Table).to_owned())
.await
}
}