fix: pre-release audit fixes — Twitter OAuth, DataMasking perf, Prompt versioning

- Twitter like/retweet: return explicit unavailable error instead of
  sending doomed Bearer token requests (would 403 on Twitter API v2)
- DataMasking: pre-compile regex patterns with LazyLock (was compiling
  6 patterns on every mask() call)
- Prompt version: fix get_version handler ignoring version path param,
  add service::get_version_by_number for correct per-version retrieval
This commit is contained in:
iven
2026-04-09 16:43:24 +08:00
parent f2d6a3b6b7
commit 3f2acb49fb
4 changed files with 99 additions and 79 deletions

View File

@@ -7,7 +7,7 @@
use std::collections::HashMap;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, RwLock};
use std::sync::{Arc, LazyLock, RwLock};
use async_trait::async_trait;
use regex::Regex;
@@ -15,6 +15,26 @@ use zclaw_types::{Message, Result};
use super::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
// ---------------------------------------------------------------------------
// Pre-compiled regex patterns (compiled once, reused across all calls)
// ---------------------------------------------------------------------------
static RE_COMPANY: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"[^\s]{1,20}(?:公司|厂|集团|工作室|商行|有限|股份)").unwrap()
});
static RE_MONEY: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"[¥¥$]\s*[\d,.]+[万亿]?元?|[\d,.]+[万亿]元").unwrap()
});
static RE_PHONE: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"1[3-9]\d-?\d{4}-?\d{4}").unwrap()
});
static RE_EMAIL: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap()
});
static RE_ID_CARD: LazyLock<Regex> = LazyLock::new(|| {
Regex::new(r"\b\d{17}[\dXx]\b").unwrap()
});
// ---------------------------------------------------------------------------
// DataMasker — entity detection and token mapping
// ---------------------------------------------------------------------------
@@ -73,38 +93,28 @@ impl DataMasker {
let mut entities = Vec::new();
// Company names: X公司、XX集团、XX工作室 (1-20 char prefix + suffix)
if let Ok(re) = Regex::new(r"[^\s]{1,20}(?:公司|厂|集团|工作室|商行|有限|股份)") {
for cap in re.find_iter(text) {
entities.push(cap.as_str().to_string());
}
for cap in RE_COMPANY.find_iter(text) {
entities.push(cap.as_str().to_string());
}
// Money amounts: ¥50万、¥100元、$200、50万元
if let Ok(re) = Regex::new(r"[¥¥$]\s*[\d,.]+[万亿]?元?|[\d,.]+[万亿]元") {
for cap in re.find_iter(text) {
entities.push(cap.as_str().to_string());
}
for cap in RE_MONEY.find_iter(text) {
entities.push(cap.as_str().to_string());
}
// Phone numbers: 1XX-XXXX-XXXX or 1XXXXXXXXXX
if let Ok(re) = Regex::new(r"1[3-9]\d-?\d{4}-?\d{4}") {
for cap in re.find_iter(text) {
entities.push(cap.as_str().to_string());
}
for cap in RE_PHONE.find_iter(text) {
entities.push(cap.as_str().to_string());
}
// Email addresses
if let Ok(re) = Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") {
for cap in re.find_iter(text) {
entities.push(cap.as_str().to_string());
}
for cap in RE_EMAIL.find_iter(text) {
entities.push(cap.as_str().to_string());
}
// ID card numbers (simplified): 18 digits
if let Ok(re) = Regex::new(r"\b\d{17}[\dXx]\b") {
for cap in re.find_iter(text) {
entities.push(cap.as_str().to_string());
}
for cap in RE_ID_CARD.find_iter(text) {
entities.push(cap.as_str().to_string());
}
// Sort by length descending to replace longest entities first
@@ -115,11 +125,35 @@ impl DataMasker {
/// Get existing token for entity or create a new one.
fn get_or_create_token(&self, entity: &str) -> String {
/// Recover from a poisoned RwLock by taking the inner value and re-wrapping.
/// A poisoned lock only means a panic occurred while holding it — the data is still valid.
fn recover_read<T>(lock: &RwLock<T>) -> std::sync::LockResult<std::sync::RwLockReadGuard<'_, T>> {
match lock.read() {
Ok(guard) => Ok(guard),
Err(e) => {
tracing::warn!("[DataMasker] RwLock poisoned during read, recovering");
// Poison error still gives us access to the inner guard
lock.read()
}
}
}
fn recover_write<T>(lock: &RwLock<T>) -> std::sync::LockResult<std::sync::RwLockWriteGuard<'_, T>> {
match lock.write() {
Ok(guard) => Ok(guard),
Err(e) => {
tracing::warn!("[DataMasker] RwLock poisoned during write, recovering");
lock.write()
}
}
}
// Check if already mapped
{
let forward = self.forward.read().unwrap();
if let Some(token) = forward.get(entity) {
return token.clone();
if let Ok(forward) = recover_read(&self.forward) {
if let Some(token) = forward.get(entity) {
return token.clone();
}
}
}
@@ -128,12 +162,10 @@ impl DataMasker {
let token = format!("__ENTITY_{}__", counter);
// Store in both mappings
{
let mut forward = self.forward.write().unwrap();
if let Ok(mut forward) = recover_write(&self.forward) {
forward.insert(entity.to_string(), token.clone());
}
{
let mut reverse = self.reverse.write().unwrap();
if let Ok(mut reverse) = recover_write(&self.reverse) {
reverse.insert(token.clone(), entity.to_string());
}