Files
zclaw_openfang/crates/zclaw-skills/src/wasm_runner.rs
iven 450569dc88
Some checks failed
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Rust Check (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled
fix: 审计后续 3 项修复 — 残留清理 + FTS5 CJK + HTTP 大小限制
1. Shell Hands 残留清理 (3处):
   - message.rs: 移除过时的 zclaw_hands::slideshow 注释
   - user_profiler.rs: slideshow 偏好改为 RecentTopic
   - handStore.test.ts: 移除 speech mock 数据 (3→2)

2. zclaw-growth FTS5 CJK 查询修复:
   - sanitize_fts_query CJK 路径从精确短语改为 token OR 组合
   - "Rust 编程" → "rust" OR "编程" (之前是 "rust 编程" 精确匹配)
   - 修复 test_memory_lifecycle + test_semantic_search_ranking

3. WASM HTTP 响应大小限制:
   - Content-Length 预检 + 读取后截断 (1MB 上限)
   - read_to_string 改为显式错误处理

651 测试全通过,0 失败。
2026-04-18 09:23:58 +08:00

511 lines
19 KiB
Rust

//! WASM skill runner — executes WASM modules in a wasmtime sandbox.
//!
//! **Status**: Active module — fully implemented with real wasmtime integration.
//! Unlike Director/A2A (feature-gated off), this module is compiled by default
//! but only invoked when a `.wasm` skill is loaded. No feature gate needed.
//!
//! Guest modules target `wasm32-wasi` and communicate via stdin/stdout JSON.
//! Host provides optional functions: `zclaw_log`, `zclaw_http_fetch`, `zclaw_file_read`.
use async_trait::async_trait;
use serde_json::Value;
use std::io::Read as IoRead;
use std::path::PathBuf;
use tracing::{debug, warn};
use wasmtime::*;
use wasmtime_wasi::p1::{self, WasiP1Ctx};
use wasmtime_wasi::DirPerms;
use wasmtime_wasi::FilePerms;
use wasmtime_wasi::WasiCtxBuilder;
use zclaw_types::Result;
use crate::{Skill, SkillContext, SkillManifest, SkillResult};
/// Maximum WASM binary size (10 MB).
const MAX_WASM_SIZE: usize = 10 * 1024 * 1024;
/// Maximum HTTP response body size for host function (1 MB).
const MAX_HTTP_RESPONSE_SIZE: usize = 1024 * 1024;
/// Fuel per second of CPU time (heuristic: ~10M instructions/sec).
const FUEL_PER_SEC: u64 = 10_000_000;
/// WASM skill that runs in a wasmtime sandbox.
#[derive(Debug)]
pub struct WasmSkill {
manifest: SkillManifest,
wasm_bytes: Vec<u8>,
}
impl WasmSkill {
/// Load and validate a WASM skill from the given `.wasm` file.
pub fn new(manifest: SkillManifest, wasm_path: PathBuf) -> Result<Self> {
let metadata = std::fs::metadata(&wasm_path).map_err(|e| {
zclaw_types::ZclawError::ToolError(format!(
"Cannot read WASM file {}: {}",
wasm_path.display(),
e
))
})?;
let file_size = metadata.len() as usize;
if file_size > MAX_WASM_SIZE {
return Err(zclaw_types::ZclawError::InvalidInput(format!(
"WASM file too large: {} bytes (max {} bytes)",
file_size, MAX_WASM_SIZE
)));
}
let wasm_bytes = std::fs::read(&wasm_path).map_err(|e| {
zclaw_types::ZclawError::ToolError(format!(
"Failed to read WASM file {}: {}",
wasm_path.display(),
e
))
})?;
// Validate the module before accepting it.
let engine = Engine::new(&create_engine_config())
.map_err(|e| zclaw_types::ZclawError::ToolError(format!("Engine init failed: {}", e)))?;
Module::validate(&engine, &wasm_bytes).map_err(|e| {
zclaw_types::ZclawError::InvalidInput(format!("Invalid WASM module: {}", e))
})?;
Ok(Self {
manifest,
wasm_bytes,
})
}
}
#[async_trait]
impl Skill for WasmSkill {
fn manifest(&self) -> &SkillManifest {
&self.manifest
}
async fn execute(&self, context: &SkillContext, input: Value) -> Result<SkillResult> {
let start = std::time::Instant::now();
let wasm_bytes = self.wasm_bytes.clone();
let timeout_secs = context.timeout_secs;
let network_allowed = context.network_allowed;
let file_access_allowed = context.file_access_allowed;
let working_dir = context.working_dir.clone();
let env_vars = context.env.clone();
let input_json = serde_json::to_string(&input).unwrap_or_default();
// Run synchronous wasmtime calls on a blocking thread.
let result = tokio::task::spawn_blocking(move || -> Result<SkillResult> {
run_wasm(
&wasm_bytes,
&input_json,
timeout_secs,
network_allowed,
file_access_allowed,
working_dir.as_deref(),
&env_vars,
)
})
.await
.map_err(|e| zclaw_types::ZclawError::ToolError(format!("WASM task panicked: {}", e)))?;
let duration_ms = start.elapsed().as_millis() as u64;
match result {
Ok(mut sr) => {
sr.duration_ms = Some(duration_ms);
Ok(sr)
}
Err(e) => Ok(SkillResult {
success: false,
output: Value::Null,
error: Some(e.to_string()),
duration_ms: Some(duration_ms),
tokens_used: None,
}),
}
}
}
/// Core WASM execution logic (blocking).
fn run_wasm(
wasm_bytes: &[u8],
input_json: &str,
timeout_secs: u64,
network_allowed: bool,
file_access_allowed: bool,
working_dir: Option<&std::path::Path>,
env_vars: &std::collections::HashMap<String, String>,
) -> Result<SkillResult> {
let config = create_engine_config();
let engine = Engine::new(&config)
.map_err(|e| zclaw_types::ZclawError::ToolError(format!("Engine creation failed: {}", e)))?;
let module = Module::from_binary(&engine, wasm_bytes)
.map_err(|e| zclaw_types::ZclawError::ToolError(format!("Module compilation failed: {}", e)))?;
// Set up WASI context with piped stdin/stdout.
let stdout_pipe = wasmtime_wasi::p2::pipe::MemoryOutputPipe::new(1024 * 1024); // 1 MB capacity
let mut wasi_builder = WasiCtxBuilder::new();
wasi_builder
.stdin(Box::new(wasmtime_wasi::p2::pipe::MemoryInputPipe::new(
input_json.as_bytes().to_vec(),
)))
.stdout(Box::new(stdout_pipe.clone()))
.stderr(Box::new(wasmtime_wasi::p2::pipe::SinkOutputStream));
// Pass skill context as environment variables.
let env_pairs: Vec<(String, String)> = env_vars
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
if !env_pairs.is_empty() {
wasi_builder.envs(&env_pairs);
}
// Optionally preopen working directory (read-only).
if file_access_allowed {
if let Some(dir) = working_dir {
wasi_builder
.preopened_dir(dir, "/workspace", DirPerms::READ, FilePerms::READ)
.map_err(|e| {
zclaw_types::ZclawError::ToolError(format!("Failed to preopen dir: {}", e))
})?;
}
}
let wasi_ctx: WasiP1Ctx = wasi_builder.build_p1();
let mut linker: Linker<WasiP1Ctx> = Linker::new(&engine);
p1::add_to_linker_sync(&mut linker, |t| t)
.map_err(|e| zclaw_types::ZclawError::ToolError(format!("WASI linker setup failed: {}", e)))?;
// Add host functions.
add_host_functions(&mut linker, network_allowed)?;
let fuel = timeout_secs * FUEL_PER_SEC;
let mut store = Store::new(&engine, wasi_ctx);
store.set_fuel(fuel).map_err(|e| {
zclaw_types::ZclawError::ToolError(format!("Failed to set fuel: {}", e))
})?;
let instance = linker
.instantiate(&mut store, &module)
.map_err(|e| zclaw_types::ZclawError::ToolError(format!("WASM instantiation failed: {}", e)))?;
// Run the `_start` function.
let start_fn = instance
.get_typed_func::<(), ()>(&mut store, "_start")
.map_err(|e| {
zclaw_types::ZclawError::ToolError(format!("WASM module has no _start: {}", e))
})?;
start_fn
.call(&mut store, ())
.map_err(|e| zclaw_types::ZclawError::ToolError(format!("WASM execution failed: {}", e)))?;
// Read captured stdout.
let stdout_data = stdout_pipe.contents();
let stdout_str = String::from_utf8_lossy(&stdout_data);
debug!("[WasmSkill] stdout length: {} bytes", stdout_str.len());
// Try to parse as JSON.
let output = if stdout_str.trim().is_empty() {
Value::Null
} else {
serde_json::from_str::<Value>(stdout_str.trim())
.unwrap_or_else(|_| Value::String(stdout_str.trim().to_string()))
};
Ok(SkillResult::success(output))
}
/// Configure wasmtime engine with sandbox settings.
fn create_engine_config() -> Config {
let mut config = Config::new();
config
.consume_fuel(true)
.max_wasm_stack(2 << 20) // 2 MB stack
.wasm_memory64(false);
config
}
/// Add ZCLAW host functions to the wasmtime linker.
fn add_host_functions(linker: &mut Linker<WasiP1Ctx>, network_allowed: bool) -> Result<()> {
linker
.func_wrap(
"env",
"zclaw_log",
|mut caller: Caller<'_, WasiP1Ctx>, ptr: u32, len: u32| {
let msg = read_guest_string(&mut caller, ptr, len);
debug!("[WasmSkill] guest log: {}", msg);
},
)
.map_err(|e| {
zclaw_types::ZclawError::ToolError(format!("Failed to add zclaw_log: {}", e))
})?;
// zclaw_http_fetch(url_ptr, url_len, out_ptr, out_cap) -> bytes_written (-1 = error)
// Performs a synchronous GET request. Result is written to guest memory as JSON string.
let net = network_allowed;
linker
.func_wrap(
"env",
"zclaw_http_fetch",
move |mut caller: Caller<'_, WasiP1Ctx>,
url_ptr: u32,
url_len: u32,
out_ptr: u32,
out_cap: u32|
-> i32 {
if !net {
warn!("[WasmSkill] guest called zclaw_http_fetch — denied (network not allowed)");
return -1;
}
let url = read_guest_string(&mut caller, url_ptr, url_len);
if url.is_empty() {
return -1;
}
// Security: validate URL scheme to prevent SSRF.
// Only http:// and https:// are allowed.
let parsed = match url::Url::parse(&url) {
Ok(u) => u,
Err(_) => {
warn!("[WasmSkill] http_fetch denied — invalid URL: {}", url);
return -1;
}
};
let scheme = parsed.scheme();
if scheme != "http" && scheme != "https" {
warn!("[WasmSkill] http_fetch denied — unsupported scheme: {}", scheme);
return -1;
}
// Block private/loopback hosts to prevent SSRF
if let Some(host) = parsed.host_str() {
let lower = host.to_lowercase();
if lower == "localhost"
|| lower.starts_with("127.")
|| lower.starts_with("10.")
|| lower.starts_with("192.168.")
|| lower.starts_with("169.254.")
|| lower.starts_with("0.")
|| lower.ends_with(".internal")
|| lower.ends_with(".local")
{
warn!("[WasmSkill] http_fetch denied — private/loopback host: {}", host);
return -1;
}
// Also block 172.16.0.0/12 range
if lower.starts_with("172.") {
if let Ok(second) = lower.split('.').nth(1).unwrap_or("0").parse::<u8>() {
if (16..=31).contains(&second) {
warn!("[WasmSkill] http_fetch denied — private host (172.16-31.x.x): {}", host);
return -1;
}
}
}
}
debug!("[WasmSkill] guest http_fetch: {}", url);
// Synchronous HTTP GET (we're already on a blocking thread)
let agent = ureq::Agent::config_builder()
.timeout_global(Some(std::time::Duration::from_secs(10)))
.build()
.new_agent();
let response = agent.get(&url).call();
match response {
Ok(mut resp) => {
// Enforce response size limit before reading body
let content_length = resp.header("content-length")
.and_then(|v| v.to_str().ok())
.and_then(|v| v.parse::<usize>().ok());
if let Some(len) = content_length {
if len > MAX_HTTP_RESPONSE_SIZE {
warn!("[WasmSkill] http_fetch denied — response too large: {} bytes (max {})", len, MAX_HTTP_RESPONSE_SIZE);
return -1;
}
}
let mut body = String::new();
match resp.body_mut().read_to_string(&mut body) {
Ok(_) => {
if body.len() > MAX_HTTP_RESPONSE_SIZE {
warn!("[WasmSkill] http_fetch — response exceeded limit after read, truncating");
body.truncate(MAX_HTTP_RESPONSE_SIZE);
}
write_guest_bytes(&mut caller, out_ptr, out_cap, body.as_bytes())
}
Err(e) => {
warn!("[WasmSkill] http_fetch body read error: {}", e);
-1
}
}
}
}
Err(e) => {
warn!("[WasmSkill] http_fetch error for {}: {}", url, e);
-1
}
}
},
)
.map_err(|e| {
zclaw_types::ZclawError::ToolError(format!("Failed to add zclaw_http_fetch: {}", e))
})?;
// zclaw_file_read(path_ptr, path_len, out_ptr, out_cap) -> bytes_written (-1 = error)
// Reads a file from the preopened /workspace directory. Paths must be relative.
linker
.func_wrap(
"env",
"zclaw_file_read",
|mut caller: Caller<'_, WasiP1Ctx>,
path_ptr: u32,
path_len: u32,
out_ptr: u32,
out_cap: u32|
-> i32 {
let path = read_guest_string(&mut caller, path_ptr, path_len);
if path.is_empty() {
return -1;
}
// Security: validate path stays within /workspace sandbox.
// Reject absolute paths, and filter any path component that
// is ".." (e.g. "foo/../../etc/passwd").
let joined = std::path::Path::new("/workspace").join(&path);
let mut safe = true;
for comp in joined.components() {
match comp {
std::path::Component::ParentDir => {
safe = false;
break;
}
std::path::Component::RootDir | std::path::Component::Prefix(_) => {
safe = false;
break;
}
_ => {} // Normal, CurDir — ok
}
}
if !safe {
warn!("[WasmSkill] guest file_read denied — path escapes sandbox: {}", path);
return -1;
}
match std::fs::read(&joined) {
Ok(data) => write_guest_bytes(&mut caller, out_ptr, out_cap, &data),
Err(e) => {
debug!("[WasmSkill] file_read error for {}: {}", path, e);
-1
}
}
},
)
.map_err(|e| {
zclaw_types::ZclawError::ToolError(format!("Failed to add zclaw_file_read: {}", e))
})?;
Ok(())
}
/// Read a string from WASM guest memory.
fn read_guest_string(caller: &mut Caller<'_, WasiP1Ctx>, ptr: u32, len: u32) -> String {
let mem = match caller.get_export("memory") {
Some(Extern::Memory(m)) => m,
_ => return String::new(),
};
let offset = ptr as usize;
let length = len as usize;
let data = mem.data(&caller);
if offset + length > data.len() {
return String::new();
}
String::from_utf8_lossy(&data[offset..offset + length]).into_owned()
}
/// Write bytes to WASM guest memory. Returns the number of bytes written, or -1 on overflow.
fn write_guest_bytes(caller: &mut Caller<'_, WasiP1Ctx>, ptr: u32, cap: u32, data: &[u8]) -> i32 {
let mem = match caller.get_export("memory") {
Some(Extern::Memory(m)) => m,
_ => return -1,
};
let offset = ptr as usize;
let capacity = cap as usize;
let write_len = data.len().min(capacity);
if offset + write_len > mem.data_size(&caller) {
return -1;
}
// Safety: we've bounds-checked the write region.
mem.data_mut(&mut *caller)[offset..offset + write_len].copy_from_slice(&data[..write_len]);
write_len as i32
}
#[cfg(test)]
mod tests {
use super::*;
fn test_manifest(id: &str) -> SkillManifest {
SkillManifest {
id: zclaw_types::SkillId::new(id),
name: "Test".into(),
description: "Test skill".into(),
version: "1.0".into(),
author: None,
mode: crate::SkillMode::Wasm,
capabilities: vec![],
input_schema: None,
output_schema: None,
tags: vec![],
category: None,
triggers: vec![],
tools: vec![],
enabled: true,
}
}
#[test]
fn test_oversized_rejection() {
let dir = std::env::temp_dir().join("zclaw_test_oversized");
let _ = std::fs::create_dir(&dir);
let wasm_path = dir.join("big.wasm");
let big_data = vec![0u8; MAX_WASM_SIZE + 1];
std::fs::write(&wasm_path, &big_data).unwrap();
let result = WasmSkill::new(test_manifest("test-oversized"), wasm_path.clone());
let _ = std::fs::remove_file(&wasm_path);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("too large"),
"Expected 'too large', got: {}",
err_msg
);
}
#[test]
fn test_invalid_binary_rejection() {
let dir = std::env::temp_dir().join("zclaw_test_invalid");
let _ = std::fs::create_dir(&dir);
let wasm_path = dir.join("bad.wasm");
std::fs::write(&wasm_path, b"not a real wasm module").unwrap();
let result = WasmSkill::new(test_manifest("test-invalid"), wasm_path.clone());
let _ = std::fs::remove_file(&wasm_path);
assert!(result.is_err());
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("Invalid WASM module"),
"Expected 'Invalid WASM module', got: {}",
err_msg
);
}
}