feat(saas): add GenerateEmbedding worker for knowledge chunking

- Markdown-aware content splitting (512 token chunks with 64 overlap)
- CJK keyword extraction from chunk content with stop-word filtering
- Full refresh strategy (delete old chunks → re-insert on update)
- Phase 2 placeholder for vector embedding API integration

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
iven
2026-04-02 00:23:38 +08:00
parent ef60f9a183
commit 830e9fa301
3 changed files with 157 additions and 1 deletions

View File

@@ -12,6 +12,7 @@ use zclaw_saas::workers::cleanup_rate_limit::CleanupRateLimitWorker;
use zclaw_saas::workers::record_usage::RecordUsageWorker;
use zclaw_saas::workers::update_last_used::UpdateLastUsedWorker;
use zclaw_saas::workers::aggregate_usage::AggregateUsageWorker;
use zclaw_saas::workers::generate_embedding::GenerateEmbeddingWorker;
#[tokio::main]
async fn main() -> anyhow::Result<()> {
@@ -46,7 +47,8 @@ async fn main() -> anyhow::Result<()> {
dispatcher.register(RecordUsageWorker);
dispatcher.register(UpdateLastUsedWorker);
dispatcher.register(AggregateUsageWorker);
info!("Worker dispatcher initialized (6 workers registered)");
dispatcher.register(GenerateEmbeddingWorker);
info!("Worker dispatcher initialized (7 workers registered)");
// 优雅停机令牌 — 取消后所有 SSE 流和长连接立即终止
let shutdown_token = CancellationToken::new();