fix: pre-release audit fixes — Twitter OAuth, DataMasking perf, Prompt versioning
- Twitter like/retweet: return explicit unavailable error instead of sending doomed Bearer token requests (would 403 on Twitter API v2) - DataMasking: pre-compile regex patterns with LazyLock (was compiling 6 patterns on every mask() call) - Prompt version: fix get_version handler ignoring version path param, add service::get_version_by_number for correct per-version retrieval
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
use std::sync::{Arc, LazyLock, RwLock};
|
||||
|
||||
use async_trait::async_trait;
|
||||
use regex::Regex;
|
||||
@@ -15,6 +15,26 @@ use zclaw_types::{Message, Result};
|
||||
|
||||
use super::{AgentMiddleware, MiddlewareContext, MiddlewareDecision};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Pre-compiled regex patterns (compiled once, reused across all calls)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
static RE_COMPANY: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"[^\s]{1,20}(?:公司|厂|集团|工作室|商行|有限|股份)").unwrap()
|
||||
});
|
||||
static RE_MONEY: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"[¥¥$]\s*[\d,.]+[万亿]?元?|[\d,.]+[万亿]元").unwrap()
|
||||
});
|
||||
static RE_PHONE: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"1[3-9]\d-?\d{4}-?\d{4}").unwrap()
|
||||
});
|
||||
static RE_EMAIL: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap()
|
||||
});
|
||||
static RE_ID_CARD: LazyLock<Regex> = LazyLock::new(|| {
|
||||
Regex::new(r"\b\d{17}[\dXx]\b").unwrap()
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// DataMasker — entity detection and token mapping
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -73,38 +93,28 @@ impl DataMasker {
|
||||
let mut entities = Vec::new();
|
||||
|
||||
// Company names: X公司、XX集团、XX工作室 (1-20 char prefix + suffix)
|
||||
if let Ok(re) = Regex::new(r"[^\s]{1,20}(?:公司|厂|集团|工作室|商行|有限|股份)") {
|
||||
for cap in re.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
for cap in RE_COMPANY.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
|
||||
// Money amounts: ¥50万、¥100元、$200、50万元
|
||||
if let Ok(re) = Regex::new(r"[¥¥$]\s*[\d,.]+[万亿]?元?|[\d,.]+[万亿]元") {
|
||||
for cap in re.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
for cap in RE_MONEY.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
|
||||
// Phone numbers: 1XX-XXXX-XXXX or 1XXXXXXXXXX
|
||||
if let Ok(re) = Regex::new(r"1[3-9]\d-?\d{4}-?\d{4}") {
|
||||
for cap in re.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
for cap in RE_PHONE.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
|
||||
// Email addresses
|
||||
if let Ok(re) = Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}") {
|
||||
for cap in re.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
for cap in RE_EMAIL.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
|
||||
// ID card numbers (simplified): 18 digits
|
||||
if let Ok(re) = Regex::new(r"\b\d{17}[\dXx]\b") {
|
||||
for cap in re.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
for cap in RE_ID_CARD.find_iter(text) {
|
||||
entities.push(cap.as_str().to_string());
|
||||
}
|
||||
|
||||
// Sort by length descending to replace longest entities first
|
||||
@@ -115,11 +125,35 @@ impl DataMasker {
|
||||
|
||||
/// Get existing token for entity or create a new one.
|
||||
fn get_or_create_token(&self, entity: &str) -> String {
|
||||
/// Recover from a poisoned RwLock by taking the inner value and re-wrapping.
|
||||
/// A poisoned lock only means a panic occurred while holding it — the data is still valid.
|
||||
fn recover_read<T>(lock: &RwLock<T>) -> std::sync::LockResult<std::sync::RwLockReadGuard<'_, T>> {
|
||||
match lock.read() {
|
||||
Ok(guard) => Ok(guard),
|
||||
Err(e) => {
|
||||
tracing::warn!("[DataMasker] RwLock poisoned during read, recovering");
|
||||
// Poison error still gives us access to the inner guard
|
||||
lock.read()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn recover_write<T>(lock: &RwLock<T>) -> std::sync::LockResult<std::sync::RwLockWriteGuard<'_, T>> {
|
||||
match lock.write() {
|
||||
Ok(guard) => Ok(guard),
|
||||
Err(e) => {
|
||||
tracing::warn!("[DataMasker] RwLock poisoned during write, recovering");
|
||||
lock.write()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if already mapped
|
||||
{
|
||||
let forward = self.forward.read().unwrap();
|
||||
if let Some(token) = forward.get(entity) {
|
||||
return token.clone();
|
||||
if let Ok(forward) = recover_read(&self.forward) {
|
||||
if let Some(token) = forward.get(entity) {
|
||||
return token.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -128,12 +162,10 @@ impl DataMasker {
|
||||
let token = format!("__ENTITY_{}__", counter);
|
||||
|
||||
// Store in both mappings
|
||||
{
|
||||
let mut forward = self.forward.write().unwrap();
|
||||
if let Ok(mut forward) = recover_write(&self.forward) {
|
||||
forward.insert(entity.to_string(), token.clone());
|
||||
}
|
||||
{
|
||||
let mut reverse = self.reverse.write().unwrap();
|
||||
if let Ok(mut reverse) = recover_write(&self.reverse) {
|
||||
reverse.insert(token.clone(), entity.to_string());
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user