feat(db): 创建 ai_knowledge_documents + ai_knowledge_chunks 表迁移
文档表支持多种来源(上传/URL/手动)、处理状态追踪、嵌入进度计数。 切片表含 vector(1536) 列 + HNSW 索引用于向量相似搜索。
This commit is contained in:
@@ -173,6 +173,7 @@ mod m20260526_000163_points_rule_unique_event_type;
|
||||
mod m20260526_000164_ai_prompt_add_analysis_type;
|
||||
mod m20260526_000165_ai_prompt_fix_analysis_type;
|
||||
mod m20260526_000166_create_ai_knowledge_bases;
|
||||
mod m20260526_000167_create_ai_knowledge_documents;
|
||||
|
||||
pub struct Migrator;
|
||||
|
||||
@@ -353,6 +354,7 @@ impl MigratorTrait for Migrator {
|
||||
Box::new(m20260526_000164_ai_prompt_add_analysis_type::Migration),
|
||||
Box::new(m20260526_000165_ai_prompt_fix_analysis_type::Migration),
|
||||
Box::new(m20260526_000166_create_ai_knowledge_bases::Migration),
|
||||
Box::new(m20260526_000167_create_ai_knowledge_documents::Migration),
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,297 @@
|
||||
use sea_orm_migration::prelude::*;
|
||||
|
||||
#[derive(DeriveMigrationName)]
|
||||
pub struct Migration;
|
||||
|
||||
#[derive(Iden)]
|
||||
enum AiKnowledgeDocuments {
|
||||
Table,
|
||||
Id,
|
||||
TenantId,
|
||||
KnowledgeBaseId,
|
||||
Title,
|
||||
DocType,
|
||||
SourceType,
|
||||
SourceUrl,
|
||||
FileName,
|
||||
FileSize,
|
||||
FileMimeType,
|
||||
Content,
|
||||
Status,
|
||||
ChunkCount,
|
||||
EmbeddedCount,
|
||||
ErrorMessage,
|
||||
ProcessingStartedAt,
|
||||
ProcessingCompletedAt,
|
||||
CreatedAt,
|
||||
UpdatedAt,
|
||||
CreatedBy,
|
||||
UpdatedBy,
|
||||
DeletedAt,
|
||||
VersionLock,
|
||||
}
|
||||
|
||||
#[derive(Iden)]
|
||||
enum AiKnowledgeChunks {
|
||||
Table,
|
||||
Id,
|
||||
TenantId,
|
||||
KnowledgeBaseId,
|
||||
DocumentId,
|
||||
ChunkIndex,
|
||||
Content,
|
||||
TokenCount,
|
||||
StartOffset,
|
||||
EndOffset,
|
||||
PageNumber,
|
||||
Metadata,
|
||||
HitCount,
|
||||
LastHitAt,
|
||||
CreatedAt,
|
||||
UpdatedAt,
|
||||
CreatedBy,
|
||||
UpdatedBy,
|
||||
DeletedAt,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MigrationTrait for Migration {
|
||||
async fn up(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
||||
// ai_knowledge_documents
|
||||
manager
|
||||
.create_table(
|
||||
Table::create()
|
||||
.table(AiKnowledgeDocuments::Table)
|
||||
.if_not_exists()
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::Id)
|
||||
.uuid()
|
||||
.not_null()
|
||||
.primary_key(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::TenantId)
|
||||
.uuid()
|
||||
.not_null(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::KnowledgeBaseId)
|
||||
.uuid()
|
||||
.not_null(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::Title)
|
||||
.string_len(500)
|
||||
.not_null(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::DocType)
|
||||
.string_len(30)
|
||||
.not_null()
|
||||
.default("manual"),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::SourceType)
|
||||
.string_len(30)
|
||||
.not_null()
|
||||
.default("manual"),
|
||||
)
|
||||
.col(ColumnDef::new(AiKnowledgeDocuments::SourceUrl).text())
|
||||
.col(ColumnDef::new(AiKnowledgeDocuments::FileName).string_len(500))
|
||||
.col(ColumnDef::new(AiKnowledgeDocuments::FileSize).big_integer())
|
||||
.col(ColumnDef::new(AiKnowledgeDocuments::FileMimeType).string_len(100))
|
||||
.col(ColumnDef::new(AiKnowledgeDocuments::Content).text())
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::Status)
|
||||
.string_len(20)
|
||||
.not_null()
|
||||
.default("pending"),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::ChunkCount)
|
||||
.integer()
|
||||
.not_null()
|
||||
.default(0),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::EmbeddedCount)
|
||||
.integer()
|
||||
.not_null()
|
||||
.default(0),
|
||||
)
|
||||
.col(ColumnDef::new(AiKnowledgeDocuments::ErrorMessage).text())
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::ProcessingStartedAt)
|
||||
.timestamp_with_time_zone(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::ProcessingCompletedAt)
|
||||
.timestamp_with_time_zone(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::CreatedAt)
|
||||
.timestamp_with_time_zone()
|
||||
.not_null()
|
||||
.default(Expr::current_timestamp()),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::UpdatedAt)
|
||||
.timestamp_with_time_zone()
|
||||
.not_null()
|
||||
.default(Expr::current_timestamp()),
|
||||
)
|
||||
.col(ColumnDef::new(AiKnowledgeDocuments::CreatedBy).uuid())
|
||||
.col(ColumnDef::new(AiKnowledgeDocuments::UpdatedBy).uuid())
|
||||
.col(ColumnDef::new(AiKnowledgeDocuments::DeletedAt).timestamp_with_time_zone())
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeDocuments::VersionLock)
|
||||
.integer()
|
||||
.not_null()
|
||||
.default(1),
|
||||
)
|
||||
.to_owned(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.create_index(
|
||||
Index::create()
|
||||
.if_not_exists()
|
||||
.name("idx_doc_kb")
|
||||
.table(AiKnowledgeDocuments::Table)
|
||||
.col(AiKnowledgeDocuments::KnowledgeBaseId)
|
||||
.to_owned(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.create_index(
|
||||
Index::create()
|
||||
.if_not_exists()
|
||||
.name("idx_doc_status")
|
||||
.table(AiKnowledgeDocuments::Table)
|
||||
.col(AiKnowledgeDocuments::KnowledgeBaseId)
|
||||
.col(AiKnowledgeDocuments::Status)
|
||||
.to_owned(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// ai_knowledge_chunks
|
||||
manager
|
||||
.create_table(
|
||||
Table::create()
|
||||
.table(AiKnowledgeChunks::Table)
|
||||
.if_not_exists()
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeChunks::Id)
|
||||
.uuid()
|
||||
.not_null()
|
||||
.primary_key(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeChunks::TenantId)
|
||||
.uuid()
|
||||
.not_null(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeChunks::KnowledgeBaseId)
|
||||
.uuid()
|
||||
.not_null(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeChunks::DocumentId)
|
||||
.uuid()
|
||||
.not_null(),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeChunks::ChunkIndex)
|
||||
.integer()
|
||||
.not_null(),
|
||||
)
|
||||
.col(ColumnDef::new(AiKnowledgeChunks::Content).text().not_null())
|
||||
.col(ColumnDef::new(AiKnowledgeChunks::TokenCount).integer())
|
||||
// embedding is vector(1536) — added via raw SQL below
|
||||
.col(ColumnDef::new(AiKnowledgeChunks::StartOffset).integer())
|
||||
.col(ColumnDef::new(AiKnowledgeChunks::EndOffset).integer())
|
||||
.col(ColumnDef::new(AiKnowledgeChunks::PageNumber).integer())
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeChunks::Metadata)
|
||||
.json_binary()
|
||||
.not_null()
|
||||
.default(Expr::cust("'{}'")),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeChunks::HitCount)
|
||||
.integer()
|
||||
.not_null()
|
||||
.default(0),
|
||||
)
|
||||
.col(ColumnDef::new(AiKnowledgeChunks::LastHitAt).timestamp_with_time_zone())
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeChunks::CreatedAt)
|
||||
.timestamp_with_time_zone()
|
||||
.not_null()
|
||||
.default(Expr::current_timestamp()),
|
||||
)
|
||||
.col(
|
||||
ColumnDef::new(AiKnowledgeChunks::UpdatedAt)
|
||||
.timestamp_with_time_zone()
|
||||
.not_null()
|
||||
.default(Expr::current_timestamp()),
|
||||
)
|
||||
.col(ColumnDef::new(AiKnowledgeChunks::CreatedBy).uuid())
|
||||
.col(ColumnDef::new(AiKnowledgeChunks::UpdatedBy).uuid())
|
||||
.col(ColumnDef::new(AiKnowledgeChunks::DeletedAt).timestamp_with_time_zone())
|
||||
.to_owned(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
// Add embedding column as vector type (SeaORM doesn't support this natively)
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared(
|
||||
"ALTER TABLE ai_knowledge_chunks ADD COLUMN IF NOT EXISTS embedding vector(1536)",
|
||||
)
|
||||
.await?;
|
||||
|
||||
// HNSW index for vector similarity search
|
||||
manager
|
||||
.get_connection()
|
||||
.execute_unprepared(
|
||||
"CREATE INDEX IF NOT EXISTS idx_chunk_embedding ON ai_knowledge_chunks USING hnsw (embedding vector_cosine_ops)",
|
||||
)
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.create_index(
|
||||
Index::create()
|
||||
.if_not_exists()
|
||||
.name("idx_chunk_document")
|
||||
.table(AiKnowledgeChunks::Table)
|
||||
.col(AiKnowledgeChunks::DocumentId)
|
||||
.to_owned(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.create_index(
|
||||
Index::create()
|
||||
.if_not_exists()
|
||||
.name("idx_chunk_kb")
|
||||
.table(AiKnowledgeChunks::Table)
|
||||
.col(AiKnowledgeChunks::KnowledgeBaseId)
|
||||
.to_owned(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
async fn down(&self, manager: &SchemaManager) -> Result<(), DbErr> {
|
||||
manager
|
||||
.drop_table(Table::drop().table(AiKnowledgeChunks::Table).to_owned())
|
||||
.await?;
|
||||
|
||||
manager
|
||||
.drop_table(Table::drop().table(AiKnowledgeDocuments::Table).to_owned())
|
||||
.await
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user