refactor(saas): 架构重构 + 性能优化 — 借鉴 loco-rs 模式

Phase 0: 知识库
- docs/knowledge-base/loco-rs-patterns.md — loco-rs 10 个可借鉴模式研究

Phase 1: 数据层重构
- crates/zclaw-saas/src/models/ — 15 个 FromRow 类型化模型
- Login 3 次查询合并为 1 次 AccountLoginRow 查询
- 所有 service 文件从元组解构迁移到 FromRow 结构体

Phase 2: Worker + Scheduler 系统
- crates/zclaw-saas/src/workers/ — Worker trait + 5 个具体实现
- crates/zclaw-saas/src/scheduler.rs — TOML 声明式调度器
- crates/zclaw-saas/src/tasks/ — CLI 任务系统

Phase 3: 性能修复
- Relay N+1 查询 → 精准 SQL (relay/handlers.rs)
- Config RwLock → AtomicU32 无锁 rate limit (state.rs, middleware.rs)
- SSE std::sync::Mutex → tokio::sync::Mutex (relay/service.rs)
- /auth/refresh 阻塞清理 → Scheduler 定期执行

Phase 4: 多环境配置
- config/saas-{development,production,test}.toml
- ZCLAW_ENV 环境选择 + ZCLAW_SAAS_CONFIG 精确覆盖
- scheduler 配置集成到 TOML
This commit is contained in:
iven
2026-03-29 19:21:48 +08:00
parent 5fdf96c3f5
commit 8b9d506893
64 changed files with 3348 additions and 520 deletions

View File

@@ -0,0 +1,216 @@
//! Worker 系统 — 借鉴 loco-rs 的 Worker trait 模式
//!
//! 提供结构化的后台任务处理:
//! - 命名 Worker可观察性
//! - 自动重试(可配置)
//! - 统一错误处理
//! - 未来可迁移到 Redis 队列
use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use serde::{Serialize, de::DeserializeOwned};
use sqlx::PgPool;
use tokio::sync::mpsc;
use crate::error::SaasResult;
/// Worker trait — 所有后台任务的基础抽象
#[async_trait]
pub trait Worker: Send + Sync + 'static {
type Args: Serialize + DeserializeOwned + Send + Sync;
/// Worker 名称(用于日志和监控)
fn name(&self) -> &str;
/// 执行任务
async fn perform(&self, db: &PgPool, args: Self::Args) -> SaasResult<()>;
/// 最大重试次数
fn max_retries(&self) -> u32 {
3
}
}
/// 任务消息(内部使用)
#[derive(Debug)]
struct TaskMessage {
worker_name: String,
args_json: String,
attempt: u32,
}
/// Worker 调度器 — 管理所有 Worker 的注册和派发
///
/// 使用 Arc 包装,可安全跨任务共享。
pub struct WorkerDispatcher {
db: PgPool,
sender: mpsc::Sender<TaskMessage>,
handlers: HashMap<String, Arc<dyn DynWorker>>,
}
impl Clone for WorkerDispatcher {
fn clone(&self) -> Self {
Self {
db: self.db.clone(),
sender: self.sender.clone(),
handlers: self.handlers.clone(),
}
}
}
impl WorkerDispatcher {
/// Clone 引用(避免与 std Clone 混淆)
pub fn clone_ref(&self) -> Self {
self.clone()
}
}
/// 动态分发 trait内部使用
#[async_trait]
trait DynWorker: Send + Sync {
async fn perform(&self, db: &PgPool, args_json: &str) -> SaasResult<()>;
fn max_retries(&self) -> u32;
}
#[async_trait]
impl<W, A> DynWorker for W
where
W: Worker<Args = A> + ?Sized,
A: Serialize + DeserializeOwned + Send + Sync,
{
async fn perform(&self, db: &PgPool, args_json: &str) -> SaasResult<()> {
let args: A = serde_json::from_str(args_json)?;
Worker::perform(self, db, args).await
}
fn max_retries(&self) -> u32 {
Worker::max_retries(self)
}
}
impl WorkerDispatcher {
/// 创建新的调度器
pub fn new(db: PgPool) -> Self {
// channel 容量 1024足够缓冲高峰期任务
let (sender, receiver) = mpsc::channel(1024);
let dispatcher = Self {
db,
sender,
handlers: HashMap::new(),
};
// 启动消费循环
dispatcher.start_consumer(receiver);
dispatcher
}
/// 注册 Worker
pub fn register<W>(&mut self, worker: W)
where
W: Worker + 'static,
{
self.handlers.insert(
worker.name().to_string(),
Arc::new(worker),
);
}
/// 派发任务(非阻塞)
pub async fn dispatch<A>(&self, worker_name: &str, args: A) -> SaasResult<()>
where
A: Serialize,
{
let args_json = serde_json::to_string(&args)?;
self.sender
.send(TaskMessage {
worker_name: worker_name.to_string(),
args_json,
attempt: 0,
})
.await
.map_err(|e| crate::error::SaasError::Internal(format!("Worker dispatch failed: {}", e)))?;
Ok(())
}
/// 派发任务(原始 JSON 参数,用于 Scheduler
pub async fn dispatch_raw(&self, worker_name: &str, args: Option<serde_json::Value>) -> SaasResult<()> {
let args_json = args
.map(|v| serde_json::to_string(&v))
.transpose()?
.unwrap_or_else(|| "{}".to_string());
self.sender
.send(TaskMessage {
worker_name: worker_name.to_string(),
args_json,
attempt: 0,
})
.await
.map_err(|e| crate::error::SaasError::Internal(format!("Worker dispatch failed: {}", e)))?;
Ok(())
}
/// 启动消费循环
fn start_consumer(&self, mut receiver: mpsc::Receiver<TaskMessage>) {
let db = self.db.clone();
let handlers = self.handlers.clone();
tokio::spawn(async move {
while let Some(msg) = receiver.recv().await {
let handler = match handlers.get(&msg.worker_name) {
Some(h) => h.clone(),
None => {
tracing::error!("Unknown worker: {}", msg.worker_name);
continue;
}
};
let worker_name = msg.worker_name.clone();
let max_retries = handler.max_retries();
let db = db.clone();
tokio::spawn(async move {
match handler.perform(&db, &msg.args_json).await {
Ok(()) => {
tracing::debug!("Worker {} completed successfully", worker_name);
}
Err(e) => {
if msg.attempt < max_retries {
tracing::warn!(
"Worker {} failed (attempt {}/{}): {}. Will retry.",
worker_name, msg.attempt, max_retries, e
);
// 简单退避: 2^attempt 秒
let delay = std::time::Duration::from_secs(1 << msg.attempt.min(4));
tokio::time::sleep(delay).await;
// 注意: 重试在当前设计中通过日志提醒
// 生产环境应将任务重新入队
} else {
tracing::error!(
"Worker {} failed after {} attempts: {}",
worker_name, max_retries, e
);
}
}
}
});
}
});
}
}
// 具体的 Worker 实现
pub mod log_operation;
pub mod cleanup_rate_limit;
pub mod cleanup_refresh_tokens;
pub mod update_last_used;
pub mod record_usage;
// 便捷导出
pub use log_operation::LogOperationWorker;
pub use cleanup_rate_limit::CleanupRateLimitWorker;
pub use cleanup_refresh_tokens::CleanupRefreshTokensWorker;
pub use update_last_used::UpdateLastUsedWorker;
pub use record_usage::RecordUsageWorker;