docs(ai): 实施计划 Chunk 3 (AI Provider + Claude SSE + 脱敏 + Prompt 引擎)

This commit is contained in:
iven
2026-04-25 12:46:55 +08:00
parent ed8548563d
commit 956089bcc8

View File

@@ -785,3 +785,558 @@ git commit -m "feat(ai): 添加 SeaORM Entity (ai_prompt/ai_analysis/ai_usage)"
```
---
## Chunk 3: AI Provider 抽象 + Claude SSE + 数据脱敏
### Task 6: AiProvider trait + Claude 实现
**Files:**
- Create: `crates/erp-ai/src/provider/mod.rs`
- Create: `crates/erp-ai/src/provider/claude.rs`
- Create: `crates/erp-ai/src/dto.rs`
- [ ] **Step 1: 创建 DTO 定义**
```rust
// crates/erp-ai/src/dto.rs
use serde::{Deserialize, Serialize};
// === 分析请求 ===
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalyzeRequest {
pub analysis_type: AnalysisType,
pub source_ref: String, // report_id 或 patient_id + metrics
pub options: AnalyzeOptions,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
#[serde(rename_all = "snake_case")]
pub enum AnalysisType {
LabReport,
Trends,
CheckupPlan,
ReportSummary,
}
impl AnalysisType {
pub fn as_str(&self) -> &str {
match self {
Self::LabReport => "lab_report",
Self::Trends => "trend",
Self::CheckupPlan => "checkup_plan",
Self::ReportSummary => "report_summary",
}
}
pub fn prompt_name(&self) -> &str {
match self {
Self::LabReport => "lab_report_interpretation",
Self::Trends => "health_trend_analysis",
Self::CheckupPlan => "personalized_checkup_plan",
Self::ReportSummary => "report_summary_generation",
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AnalyzeOptions {
pub detail_level: Option<String>, // patient_friendly | professional
pub language: Option<String>, // zh-CN
}
impl Default for AnalyzeOptions {
fn default() -> Self {
Self {
detail_level: Some("patient_friendly".into()),
language: Some("zh-CN".into()),
}
}
}
// === AI Provider 请求/响应 ===
#[derive(Debug, Clone)]
pub struct GenerateRequest {
pub system_prompt: String,
pub user_prompt: String,
pub model: String,
pub temperature: f32,
pub max_tokens: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GenerateResponse {
pub content: String,
pub model: String,
pub input_tokens: u32,
pub output_tokens: u32,
pub duration_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StreamChunk {
pub content: String,
pub index: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StreamDone {
pub analysis_id: uuid::Uuid,
pub status: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StreamMetadata {
pub model: String,
pub tokens: TokenUsage,
pub duration_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TokenUsage {
pub input: u32,
pub output: u32,
}
// === SSE 事件 ===
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type")]
pub enum AnalysisSseEvent {
#[serde(rename = "chunk")]
Chunk { content: String, index: u32 },
#[serde(rename = "metadata")]
Metadata { model: String, tokens: TokenUsage, duration_ms: u64 },
#[serde(rename = "done")]
Done { analysis_id: uuid::Uuid, status: String },
#[serde(rename = "error")]
Error { message: String },
}
```
- [ ] **Step 2: 创建 provider/mod.rs**
```rust
// crates/erp-ai/src/provider/mod.rs
pub mod claude;
use async_trait::async_trait;
use futures::Stream;
use pin_project_lite::pin_project;
use std::pin::Pin;
use crate::dto::GenerateRequest;
use crate::error::AiResult;
/// AI 提供商 trait
#[async_trait]
pub trait AiProvider: Send + Sync {
/// 流式生成 — 返回 Pin<Box<dyn Stream>> 避免 async_trait + impl Trait 不兼容
async fn stream_generate(
&self,
req: GenerateRequest,
) -> AiResult<Pin<Box<dyn Stream<Item = AiResult<String>> + Send>>>;
/// 非流式生成
async fn generate(&self, req: GenerateRequest) -> AiResult<crate::dto::GenerateResponse>;
/// 提供商名称
fn name(&self) -> &str;
/// 健康检查
async fn health_check(&self) -> AiResult<bool>;
}
```
> 注意: 需要在 Cargo.toml 中添加 `pin-project-lite` 或使用 `futures` 的 `pin_mut!` 宏。检查 `futures` 是否已导出 `Stream` trait。
- [ ] **Step 3: 创建 provider/claude.rs (Claude API SSE 实现)**
```rust
// crates/erp-ai/src/provider/claude.rs
use async_trait::async_trait;
use async_stream::stream;
use futures::{Stream, StreamExt};
use reqwest::Client;
use serde::{Deserialize, Serialize};
use std::pin::Pin;
use super::AiProvider;
use crate::dto::GenerateRequest;
use crate::error::{AiError, AiResult};
#[derive(Debug, Clone)]
pub struct ClaudeProvider {
client: Client,
api_key: String,
base_url: String,
}
impl ClaudeProvider {
pub fn new(api_key: String) -> Self {
Self {
client: Client::new(),
api_key,
base_url: "https://api.anthropic.com".into(),
}
}
pub fn with_base_url(mut self, url: String) -> Self {
self.base_url = url;
self
}
}
// Claude API 请求/响应结构
#[derive(Serialize)]
struct ClaudeRequest {
model: String,
max_tokens: u32,
temperature: f32,
system: String,
messages: Vec<ClaudeMessage>,
stream: bool,
}
#[derive(Serialize)]
struct ClaudeMessage {
role: String,
content: String,
}
#[derive(Deserialize)]
struct ClaudeStreamEvent {
#[serde(rename = "type")]
event_type: String,
delta: Option<ClaudeDelta>,
message: Option<ClaudeMessageResp>,
}
#[derive(Deserialize)]
struct ClaudeDelta {
text: Option<String>,
}
#[derive(Deserialize)]
struct ClaudeMessageResp {
usage: Option<ClaudeUsage>,
}
#[derive(Deserialize)]
struct ClaudeUsage {
input_tokens: u32,
output_tokens: u32,
}
#[async_trait]
impl AiProvider for ClaudeProvider {
async fn stream_generate(
&self,
req: GenerateRequest,
) -> AiResult<Pin<Box<dyn Stream<Item = AiResult<String>> + Send>>> {
let claude_req = ClaudeRequest {
model: req.model,
max_tokens: req.max_tokens,
temperature: req.temperature,
system: req.system_prompt,
messages: vec![ClaudeMessage {
role: "user".into(),
content: req.user_prompt,
}],
stream: true,
};
let response = self
.client
.post(format!("{}/v1/messages", self.base_url))
.header("x-api-key", &self.api_key)
.header("anthropic-version", "2023-06-01")
.header("content-type", "application/json")
.json(&claude_req)
.send()
.await
.map_err(|e| AiError::ProviderError(format!("Claude API 请求失败: {e}")))?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
return Err(AiError::ProviderError(format!("Claude API 错误 {status}: {body}")));
}
let stream = Box::pin(stream! {
let mut stream = response.bytes_stream();
while let Some(chunk_result) = stream.next().await {
let bytes = match chunk_result {
Ok(b) => b,
Err(e) => {
yield Err(AiError::ProviderError(format!("流读取错误: {e}")));
break;
}
};
let text = String::from_utf8_lossy(&bytes);
for line in text.lines() {
if let Some(data) = line.strip_prefix("data: ") {
if data == "[DONE]" {
return;
}
if let Ok(event) = serde_json::from_str::<ClaudeStreamEvent>(data) {
if event.event_type == "content_block_delta" {
if let Some(delta) = event.delta {
if let Some(text) = delta.text {
yield Ok(text);
}
}
}
}
}
}
}
});
Ok(stream)
}
async fn generate(&self, req: GenerateRequest) -> AiResult<crate::dto::GenerateResponse> {
let start = std::time::Instant::now();
let claude_req = ClaudeRequest {
model: req.model.clone(),
max_tokens: req.max_tokens,
temperature: req.temperature,
system: req.system_prompt,
messages: vec![ClaudeMessage {
role: "user".into(),
content: req.user_prompt,
}],
stream: false,
};
let resp = self
.client
.post(format!("{}/v1/messages", self.base_url))
.header("x-api-key", &self.api_key)
.header("anthropic-version", "2023-06-01")
.header("content-type", "application/json")
.json(&claude_req)
.send()
.await
.map_err(|e| AiError::ProviderError(e.to_string()))?;
let status = resp.status();
let body = resp.text().await.map_err(|e| AiError::ProviderError(e.to_string()))?;
if !status.is_success() {
return Err(AiError::ProviderError(format!("Claude {status}: {body}")));
}
let start_time = start;
// 解析非流式响应
let parsed: serde_json::Value = serde_json::from_str(&body)
.map_err(|e| AiError::ProviderError(format!("解析响应失败: {e}")))?;
let content = parsed["content"][0]["text"]
.as_str()
.unwrap_or("")
.to_string();
let input_tokens = parsed["usage"]["input_tokens"].as_u64().unwrap_or(0) as u32;
let output_tokens = parsed["usage"]["output_tokens"].as_u64().unwrap_or(0) as u32;
Ok(crate::dto::GenerateResponse {
content,
model: req.model,
input_tokens,
output_tokens,
duration_ms: start_time.elapsed().as_millis() as u64,
})
}
fn name(&self) -> &str {
"claude"
}
async fn health_check(&self) -> AiResult<bool> {
// 简单检查: 发一个最小请求验证 API key 有效
let resp = self
.client
.post(format!("{}/v1/messages", self.base_url))
.header("x-api-key", &self.api_key)
.header("anthropic-version", "2023-06-01")
.header("content-type", "application/json")
.json(&serde_json::json!({
"model": "claude-sonnet-4-6",
"max_tokens": 1,
"messages": [{"role": "user", "content": "hi"}]
}))
.send()
.await;
match resp {
Ok(r) => Ok(r.status().is_success() || r.status().as_u16() == 400),
Err(_) => Ok(false),
}
}
}
```
- [ ] **Step 4: 更新 lib.rs 添加新模块**
```rust
// crates/erp-ai/src/lib.rs
pub mod dto;
pub mod entity;
pub mod error;
pub mod provider;
pub use error::{AiError, AiResult};
```
- [ ] **Step 5: 验证编译**
```bash
cargo check -p erp-ai
```
> 如果 `pin-project-lite` 缺失,在 Cargo.toml 中添加,或直接使用 `std::pin::Pin<Box<...>>` 无需 pin-project。
- [ ] **Step 6: 提交**
```bash
git add crates/erp-ai/src/
git commit -m "feat(ai): AiProvider trait + Claude SSE 流式实现 + DTO 定义"
```
---
### Task 7: 数据脱敏服务 + Prompt 模板引擎
**Files:**
- Create: `crates/erp-ai/src/sanitization/mod.rs`
- Create: `crates/erp-ai/src/prompt/mod.rs`
- [ ] **Step 1: 创建 sanitization/mod.rs**
```rust
// crates/erp-ai/src/sanitization/mod.rs
use erp_core::{
HealthReportDto, LabReportDto, PatientSummaryDto, VitalSignDto,
};
use serde_json::Value;
use crate::error::{AiError, AiResult};
/// 数据脱敏服务 — 确保发送给 AI 的数据不含 PII
/// HealthDataProvider 返回的 DTO 已经是脱敏的(只有年龄/性别/医疗数据)
/// 此服务做二次检查和安全约束注入
pub struct SanitizationService;
impl SanitizationService {
pub fn new() -> Self {
Self
}
/// 验证 DTO 中不包含意外泄漏的 PII 字段
/// 并生成安全的 JSON 数据供 Prompt 模板使用
pub fn sanitize_lab_report(&self, report: &LabReportDto) -> AiResult<Value> {
// LabReportDto 已由 HealthDataProvider 脱敏
// 此处做二次验证 + 转换为模板友好的 JSON
let sanitized = serde_json::to_value(report)
.map_err(|e| AiError::SanitizationError(format!("序列化失败: {e}")))?;
self.verify_no_pii(&sanitized)?;
Ok(sanitized)
}
pub fn sanitize_vital_signs(&self, signs: &[VitalSignDto]) -> AiResult<Value> {
let sanitized = serde_json::to_value(signs)
.map_err(|e| AiError::SanitizationError(format!("序列化失败: {e}")))?;
self.verify_no_pii(&sanitized)?;
Ok(sanitized)
}
pub fn sanitize_patient_summary(&self, summary: &PatientSummaryDto) -> AiResult<Value> {
let sanitized = serde_json::to_value(summary)
.map_err(|e| AiError::SanitizationError(format!("序列化失败: {e}")))?;
self.verify_no_pii(&sanitized)?;
Ok(sanitized)
}
pub fn sanitize_health_report(&self, report: &HealthReportDto) -> AiResult<Value> {
let sanitized = serde_json::to_value(report)
.map_err(|e| AiError::SanitizationError(format!("序列化失败: {e}")))?;
self.verify_no_pii(&sanitized)?;
Ok(sanitized)
}
/// 二次验证: 确保没有意外泄漏的 PII
fn verify_no_pii(&self, value: &Value) -> AiResult<()> {
let pii_keys = ["name", "phone", "id_number", "address", "birth_date", "email"];
if let Value::Object(map) = value {
for key in pii_keys {
if map.contains_key(key) {
return Err(AiError::SanitizationError(
format!("检测到疑似 PII 字段: {key}"),
));
}
}
}
Ok(())
}
}
```
- [ ] **Step 2: 创建 prompt/mod.rs**
```rust
// crates/erp-ai/src/prompt/mod.rs
use handlebars::Handlebars;
use serde_json::Value;
use crate::error::{AiError, AiResult};
/// Prompt 模板渲染引擎
pub struct PromptRenderer {
registry: Handlebars<'static>,
}
impl PromptRenderer {
pub fn new() -> Self {
let mut registry = Handlebars::new();
registry.set_strict_mode(true);
Self { registry }
}
/// 渲染 Prompt 模板 — 使用 Handlebars {{variable}} 语法
/// JSON 序列化注入,不做字符串拼接,防止 Prompt 注入
pub fn render(&self, template: &str, data: &Value) -> AiResult<String> {
self.registry
.render_template(template, data)
.map_err(|e| AiError::TemplateError(format!("模板渲染失败: {e}")))
}
}
```
- [ ] **Step 3: 更新 lib.rs**
```rust
pub mod prompt;
pub mod sanitization;
```
- [ ] **Step 4: 验证编译**
```bash
cargo check -p erp-ai
```
- [ ] **Step 5: 提交**
```bash
git add crates/erp-ai/src/
git commit -m "feat(ai): 数据脱敏服务 + Prompt 模板渲染引擎"
```
---