feat(ai): 向量搜索 + hit test API
- KnowledgeV2Service.vector_search: pgvector 余弦相似度搜索 - SearchHit DTO: chunk_id/document_id/similarity/metadata - hit_test handler: POST /ai/documents/hit-test (embed query → 搜索) - AiState 添加 embedding 字段,共享 EmbeddingService 实例 - top_k 限制最大 20 Phase 2 Task 11
This commit is contained in:
@@ -255,4 +255,82 @@ impl KnowledgeV2Service {
|
||||
.map_err(|e| AiError::DbError(e.to_string()))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// 向量相似度搜索:在指定知识库中搜索与 query_embedding 最相似的 top_k 个切片
|
||||
pub async fn vector_search(
|
||||
&self,
|
||||
tenant_id: Uuid,
|
||||
kb_id: Uuid,
|
||||
query_embedding: &[f32],
|
||||
top_k: i64,
|
||||
) -> AiResult<Vec<SearchHit>> {
|
||||
let vector_str = crate::service::embedding::format_vector(query_embedding);
|
||||
let sql = r#"
|
||||
SELECT c.id, c.document_id, c.chunk_index, c.content, c.metadata,
|
||||
d.title AS doc_title,
|
||||
1 - (c.embedding <=> $3::vector) AS similarity
|
||||
FROM ai_knowledge_chunks c
|
||||
JOIN ai_knowledge_documents d ON d.id = c.document_id
|
||||
WHERE c.tenant_id = $1
|
||||
AND c.knowledge_base_id = $2
|
||||
AND c.deleted_at IS NULL
|
||||
AND d.deleted_at IS NULL
|
||||
AND c.embedding IS NOT NULL
|
||||
ORDER BY c.embedding <=> $3::vector
|
||||
LIMIT $4
|
||||
"#;
|
||||
let stmt = sea_orm::Statement::from_sql_and_values(
|
||||
sea_orm::DatabaseBackend::Postgres,
|
||||
sql,
|
||||
[
|
||||
sea_orm::Value::from(tenant_id),
|
||||
sea_orm::Value::from(kb_id),
|
||||
sea_orm::Value::String(Some(Box::new(vector_str))),
|
||||
sea_orm::Value::from(top_k),
|
||||
],
|
||||
);
|
||||
|
||||
let rows: Vec<SearchHitRow> = sea_orm::FromQueryResult::find_by_statement(stmt)
|
||||
.all(&self.db)
|
||||
.await
|
||||
.map_err(|e| AiError::DbError(e.to_string()))?;
|
||||
|
||||
Ok(rows.into_iter().map(SearchHit::from).collect())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, sea_orm::FromQueryResult)]
|
||||
struct SearchHitRow {
|
||||
id: Uuid,
|
||||
document_id: Uuid,
|
||||
chunk_index: i32,
|
||||
content: String,
|
||||
metadata: serde_json::Value,
|
||||
doc_title: String,
|
||||
similarity: f64,
|
||||
}
|
||||
|
||||
#[derive(Debug, serde::Serialize)]
|
||||
pub struct SearchHit {
|
||||
pub chunk_id: Uuid,
|
||||
pub document_id: Uuid,
|
||||
pub chunk_index: i32,
|
||||
pub content: String,
|
||||
pub doc_title: String,
|
||||
pub similarity: f64,
|
||||
pub metadata: serde_json::Value,
|
||||
}
|
||||
|
||||
impl From<SearchHitRow> for SearchHit {
|
||||
fn from(row: SearchHitRow) -> Self {
|
||||
Self {
|
||||
chunk_id: row.id,
|
||||
document_id: row.document_id,
|
||||
chunk_index: row.chunk_index,
|
||||
content: row.content,
|
||||
doc_title: row.doc_title,
|
||||
similarity: row.similarity,
|
||||
metadata: row.metadata,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user