refactor(saas): 架构重构 + 性能优化 — 借鉴 loco-rs 模式

Phase 0: 知识库
- docs/knowledge-base/loco-rs-patterns.md — loco-rs 10 个可借鉴模式研究

Phase 1: 数据层重构
- crates/zclaw-saas/src/models/ — 15 个 FromRow 类型化模型
- Login 3 次查询合并为 1 次 AccountLoginRow 查询
- 所有 service 文件从元组解构迁移到 FromRow 结构体

Phase 2: Worker + Scheduler 系统
- crates/zclaw-saas/src/workers/ — Worker trait + 5 个具体实现
- crates/zclaw-saas/src/scheduler.rs — TOML 声明式调度器
- crates/zclaw-saas/src/tasks/ — CLI 任务系统

Phase 3: 性能修复
- Relay N+1 查询 → 精准 SQL (relay/handlers.rs)
- Config RwLock → AtomicU32 无锁 rate limit (state.rs, middleware.rs)
- SSE std::sync::Mutex → tokio::sync::Mutex (relay/service.rs)
- /auth/refresh 阻塞清理 → Scheduler 定期执行

Phase 4: 多环境配置
- config/saas-{development,production,test}.toml
- ZCLAW_ENV 环境选择 + ZCLAW_SAAS_CONFIG 精确覆盖
- scheduler 配置集成到 TOML
This commit is contained in:
iven
2026-03-29 19:21:48 +08:00
parent 5fdf96c3f5
commit 8b9d506893
64 changed files with 3348 additions and 520 deletions

View File

@@ -0,0 +1,30 @@
//! 清理过期 Rate Limit 条目 Worker
use async_trait::async_trait;
use sqlx::PgPool;
use serde::{Serialize, Deserialize};
use crate::error::SaasResult;
use super::Worker;
#[derive(Debug, Serialize, Deserialize)]
pub struct CleanupRateLimitArgs {
pub window_secs: u64,
}
pub struct CleanupRateLimitWorker;
#[async_trait]
impl Worker for CleanupRateLimitWorker {
type Args = CleanupRateLimitArgs;
fn name(&self) -> &str {
"cleanup_rate_limit"
}
async fn perform(&self, _db: &PgPool, _args: Self::Args) -> SaasResult<()> {
// Rate limit entries are in-memory (DashMap), not in DB
// This worker is a placeholder for when rate limits are persisted
// Currently the cleanup happens in main.rs background task
Ok(())
}
}

View File

@@ -0,0 +1,36 @@
//! 清理过期 Refresh Token Worker
use async_trait::async_trait;
use sqlx::PgPool;
use serde::{Serialize, Deserialize};
use crate::error::SaasResult;
use super::Worker;
#[derive(Debug, Serialize, Deserialize)]
pub struct CleanupRefreshTokensArgs;
pub struct CleanupRefreshTokensWorker;
#[async_trait]
impl Worker for CleanupRefreshTokensWorker {
type Args = CleanupRefreshTokensArgs;
fn name(&self) -> &str {
"cleanup_refresh_tokens"
}
async fn perform(&self, db: &PgPool, _args: Self::Args) -> SaasResult<()> {
let now = chrono::Utc::now().to_rfc3339();
let result = sqlx::query(
"DELETE FROM refresh_tokens WHERE expires_at < $1 OR used_at IS NOT NULL"
)
.bind(&now)
.execute(db)
.await?;
if result.rows_affected() > 0 {
tracing::info!("Cleaned up {} expired/used refresh tokens", result.rows_affected());
}
Ok(())
}
}

View File

@@ -0,0 +1,46 @@
//! 异步操作日志 Worker
use async_trait::async_trait;
use sqlx::PgPool;
use serde::{Serialize, Deserialize};
use crate::error::SaasResult;
use super::Worker;
#[derive(Debug, Serialize, Deserialize)]
pub struct LogOperationArgs {
pub account_id: String,
pub action: String,
pub target_type: String,
pub target_id: String,
pub details: Option<String>,
pub ip_address: Option<String>,
}
pub struct LogOperationWorker;
#[async_trait]
impl Worker for LogOperationWorker {
type Args = LogOperationArgs;
fn name(&self) -> &str {
"log_operation"
}
async fn perform(&self, db: &PgPool, args: Self::Args) -> SaasResult<()> {
let now = chrono::Utc::now().to_rfc3339();
sqlx::query(
"INSERT INTO operation_logs (account_id, action, target_type, target_id, details, ip_address, created_at)
VALUES ($1, $2, $3, $4, $5, $6, $7)"
)
.bind(&args.account_id)
.bind(&args.action)
.bind(&args.target_type)
.bind(&args.target_id)
.bind(&args.details)
.bind(&args.ip_address)
.bind(&now)
.execute(db)
.await?;
Ok(())
}
}

View File

@@ -0,0 +1,216 @@
//! Worker 系统 — 借鉴 loco-rs 的 Worker trait 模式
//!
//! 提供结构化的后台任务处理:
//! - 命名 Worker可观察性
//! - 自动重试(可配置)
//! - 统一错误处理
//! - 未来可迁移到 Redis 队列
use std::collections::HashMap;
use std::sync::Arc;
use async_trait::async_trait;
use serde::{Serialize, de::DeserializeOwned};
use sqlx::PgPool;
use tokio::sync::mpsc;
use crate::error::SaasResult;
/// Worker trait — 所有后台任务的基础抽象
#[async_trait]
pub trait Worker: Send + Sync + 'static {
type Args: Serialize + DeserializeOwned + Send + Sync;
/// Worker 名称(用于日志和监控)
fn name(&self) -> &str;
/// 执行任务
async fn perform(&self, db: &PgPool, args: Self::Args) -> SaasResult<()>;
/// 最大重试次数
fn max_retries(&self) -> u32 {
3
}
}
/// 任务消息(内部使用)
#[derive(Debug)]
struct TaskMessage {
worker_name: String,
args_json: String,
attempt: u32,
}
/// Worker 调度器 — 管理所有 Worker 的注册和派发
///
/// 使用 Arc 包装,可安全跨任务共享。
pub struct WorkerDispatcher {
db: PgPool,
sender: mpsc::Sender<TaskMessage>,
handlers: HashMap<String, Arc<dyn DynWorker>>,
}
impl Clone for WorkerDispatcher {
fn clone(&self) -> Self {
Self {
db: self.db.clone(),
sender: self.sender.clone(),
handlers: self.handlers.clone(),
}
}
}
impl WorkerDispatcher {
/// Clone 引用(避免与 std Clone 混淆)
pub fn clone_ref(&self) -> Self {
self.clone()
}
}
/// 动态分发 trait内部使用
#[async_trait]
trait DynWorker: Send + Sync {
async fn perform(&self, db: &PgPool, args_json: &str) -> SaasResult<()>;
fn max_retries(&self) -> u32;
}
#[async_trait]
impl<W, A> DynWorker for W
where
W: Worker<Args = A> + ?Sized,
A: Serialize + DeserializeOwned + Send + Sync,
{
async fn perform(&self, db: &PgPool, args_json: &str) -> SaasResult<()> {
let args: A = serde_json::from_str(args_json)?;
Worker::perform(self, db, args).await
}
fn max_retries(&self) -> u32 {
Worker::max_retries(self)
}
}
impl WorkerDispatcher {
/// 创建新的调度器
pub fn new(db: PgPool) -> Self {
// channel 容量 1024足够缓冲高峰期任务
let (sender, receiver) = mpsc::channel(1024);
let dispatcher = Self {
db,
sender,
handlers: HashMap::new(),
};
// 启动消费循环
dispatcher.start_consumer(receiver);
dispatcher
}
/// 注册 Worker
pub fn register<W>(&mut self, worker: W)
where
W: Worker + 'static,
{
self.handlers.insert(
worker.name().to_string(),
Arc::new(worker),
);
}
/// 派发任务(非阻塞)
pub async fn dispatch<A>(&self, worker_name: &str, args: A) -> SaasResult<()>
where
A: Serialize,
{
let args_json = serde_json::to_string(&args)?;
self.sender
.send(TaskMessage {
worker_name: worker_name.to_string(),
args_json,
attempt: 0,
})
.await
.map_err(|e| crate::error::SaasError::Internal(format!("Worker dispatch failed: {}", e)))?;
Ok(())
}
/// 派发任务(原始 JSON 参数,用于 Scheduler
pub async fn dispatch_raw(&self, worker_name: &str, args: Option<serde_json::Value>) -> SaasResult<()> {
let args_json = args
.map(|v| serde_json::to_string(&v))
.transpose()?
.unwrap_or_else(|| "{}".to_string());
self.sender
.send(TaskMessage {
worker_name: worker_name.to_string(),
args_json,
attempt: 0,
})
.await
.map_err(|e| crate::error::SaasError::Internal(format!("Worker dispatch failed: {}", e)))?;
Ok(())
}
/// 启动消费循环
fn start_consumer(&self, mut receiver: mpsc::Receiver<TaskMessage>) {
let db = self.db.clone();
let handlers = self.handlers.clone();
tokio::spawn(async move {
while let Some(msg) = receiver.recv().await {
let handler = match handlers.get(&msg.worker_name) {
Some(h) => h.clone(),
None => {
tracing::error!("Unknown worker: {}", msg.worker_name);
continue;
}
};
let worker_name = msg.worker_name.clone();
let max_retries = handler.max_retries();
let db = db.clone();
tokio::spawn(async move {
match handler.perform(&db, &msg.args_json).await {
Ok(()) => {
tracing::debug!("Worker {} completed successfully", worker_name);
}
Err(e) => {
if msg.attempt < max_retries {
tracing::warn!(
"Worker {} failed (attempt {}/{}): {}. Will retry.",
worker_name, msg.attempt, max_retries, e
);
// 简单退避: 2^attempt 秒
let delay = std::time::Duration::from_secs(1 << msg.attempt.min(4));
tokio::time::sleep(delay).await;
// 注意: 重试在当前设计中通过日志提醒
// 生产环境应将任务重新入队
} else {
tracing::error!(
"Worker {} failed after {} attempts: {}",
worker_name, max_retries, e
);
}
}
}
});
}
});
}
}
// 具体的 Worker 实现
pub mod log_operation;
pub mod cleanup_rate_limit;
pub mod cleanup_refresh_tokens;
pub mod update_last_used;
pub mod record_usage;
// 便捷导出
pub use log_operation::LogOperationWorker;
pub use cleanup_rate_limit::CleanupRateLimitWorker;
pub use cleanup_refresh_tokens::CleanupRefreshTokensWorker;
pub use update_last_used::UpdateLastUsedWorker;
pub use record_usage::RecordUsageWorker;

View File

@@ -0,0 +1,50 @@
//! 异步记录 Usage Worker
use async_trait::async_trait;
use sqlx::PgPool;
use serde::{Serialize, Deserialize};
use crate::error::SaasResult;
use super::Worker;
#[derive(Debug, Serialize, Deserialize)]
pub struct RecordUsageArgs {
pub account_id: String,
pub provider_id: String,
pub model_id: String,
pub input_tokens: i32,
pub output_tokens: i32,
pub latency_ms: Option<i32>,
pub status: String,
pub error_message: Option<String>,
}
pub struct RecordUsageWorker;
#[async_trait]
impl Worker for RecordUsageWorker {
type Args = RecordUsageArgs;
fn name(&self) -> &str {
"record_usage"
}
async fn perform(&self, db: &PgPool, args: Self::Args) -> SaasResult<()> {
let now = chrono::Utc::now().to_rfc3339();
sqlx::query(
"INSERT INTO usage_records (account_id, provider_id, model_id, input_tokens, output_tokens, latency_ms, status, error_message, created_at)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)"
)
.bind(&args.account_id)
.bind(&args.provider_id)
.bind(&args.model_id)
.bind(args.input_tokens)
.bind(args.output_tokens)
.bind(args.latency_ms)
.bind(&args.status)
.bind(&args.error_message)
.bind(&now)
.execute(db)
.await?;
Ok(())
}
}

View File

@@ -0,0 +1,33 @@
//! 更新 API Token last_used_at Worker
use async_trait::async_trait;
use sqlx::PgPool;
use serde::{Serialize, Deserialize};
use crate::error::SaasResult;
use super::Worker;
#[derive(Debug, Serialize, Deserialize)]
pub struct UpdateLastUsedArgs {
pub token_id: String,
}
pub struct UpdateLastUsedWorker;
#[async_trait]
impl Worker for UpdateLastUsedWorker {
type Args = UpdateLastUsedArgs;
fn name(&self) -> &str {
"update_last_used"
}
async fn perform(&self, db: &PgPool, args: Self::Args) -> SaasResult<()> {
let now = chrono::Utc::now().to_rfc3339();
sqlx::query("UPDATE api_tokens SET last_used_at = $1 WHERE id = $2")
.bind(&now)
.bind(&args.token_id)
.execute(db)
.await?;
Ok(())
}
}