refactor: 清理未使用代码并添加未来功能标记
Some checks failed
CI / Rust Check (push) Has been cancelled
CI / Lint & TypeCheck (push) Has been cancelled
CI / Unit Tests (push) Has been cancelled
CI / Build Frontend (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
CI / E2E Tests (push) Has been cancelled

style: 统一代码格式和注释风格

docs: 更新多个功能文档的完整度和状态

feat(runtime): 添加路径验证工具支持

fix(pipeline): 改进条件判断和变量解析逻辑

test(types): 为ID类型添加全面测试用例

chore: 更新依赖项和Cargo.lock文件

perf(mcp): 优化MCP协议传输和错误处理
This commit is contained in:
iven
2026-03-25 21:55:12 +08:00
parent aa6a9cbd84
commit bf6d81f9c6
109 changed files with 12271 additions and 815 deletions

View File

@@ -5,12 +5,14 @@ mod file_write;
mod shell_exec;
mod web_fetch;
mod execute_skill;
mod path_validator;
pub use file_read::FileReadTool;
pub use file_write::FileWriteTool;
pub use shell_exec::ShellExecTool;
pub use web_fetch::WebFetchTool;
pub use execute_skill::ExecuteSkillTool;
pub use path_validator::{PathValidator, PathValidatorConfig};
use crate::tool::ToolRegistry;

View File

@@ -1,10 +1,13 @@
//! File read tool
//! File read tool with path validation
use async_trait::async_trait;
use serde_json::{json, Value};
use zclaw_types::{Result, ZclawError};
use std::fs;
use std::io::Read;
use crate::tool::{Tool, ToolContext};
use super::path_validator::PathValidator;
pub struct FileReadTool;
@@ -21,7 +24,7 @@ impl Tool for FileReadTool {
}
fn description(&self) -> &str {
"Read the contents of a file from the filesystem"
"Read the contents of a file from the filesystem. The file must be within allowed paths."
}
fn input_schema(&self) -> Value {
@@ -31,20 +34,78 @@ impl Tool for FileReadTool {
"path": {
"type": "string",
"description": "The path to the file to read"
},
"encoding": {
"type": "string",
"description": "Text encoding to use (default: utf-8)",
"enum": ["utf-8", "ascii", "binary"]
}
},
"required": ["path"]
})
}
async fn execute(&self, input: Value, _context: &ToolContext) -> Result<Value> {
async fn execute(&self, input: Value, context: &ToolContext) -> Result<Value> {
let path = input["path"].as_str()
.ok_or_else(|| ZclawError::InvalidInput("Missing 'path' parameter".into()))?;
// TODO: Implement actual file reading with path validation
Ok(json!({
"content": format!("File content placeholder for: {}", path)
}))
let encoding = input["encoding"].as_str().unwrap_or("utf-8");
// Validate path using context's path validator or create default
let validator = context.path_validator.as_ref()
.map(|v| v.clone())
.unwrap_or_else(|| {
// Create default validator with workspace as allowed path
let mut validator = PathValidator::new();
if let Some(ref workspace) = context.working_directory {
validator = validator.with_workspace(std::path::PathBuf::from(workspace));
}
validator
});
// Validate path for read access
let validated_path = validator.validate_read(path)?;
// Read file content
let mut file = fs::File::open(&validated_path)
.map_err(|e| ZclawError::ToolError(format!("Failed to open file: {}", e)))?;
let metadata = fs::metadata(&validated_path)
.map_err(|e| ZclawError::ToolError(format!("Failed to read file metadata: {}", e)))?;
let file_size = metadata.len();
match encoding {
"binary" => {
let mut buffer = Vec::with_capacity(file_size as usize);
file.read_to_end(&mut buffer)
.map_err(|e| ZclawError::ToolError(format!("Failed to read file: {}", e)))?;
// Return base64 encoded binary content
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
let encoded = BASE64.encode(&buffer);
Ok(json!({
"content": encoded,
"encoding": "base64",
"size": file_size,
"path": validated_path.to_string_lossy()
}))
}
_ => {
// Text mode (utf-8 or ascii)
let mut content = String::with_capacity(file_size as usize);
file.read_to_string(&mut content)
.map_err(|e| ZclawError::ToolError(format!("Failed to read file: {}", e)))?;
Ok(json!({
"content": content,
"encoding": encoding,
"size": file_size,
"path": validated_path.to_string_lossy()
}))
}
}
}
}
@@ -53,3 +114,38 @@ impl Default for FileReadTool {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
use crate::tool::builtin::PathValidator;
#[tokio::test]
async fn test_read_file() {
let mut temp_file = NamedTempFile::new().unwrap();
writeln!(temp_file, "Hello, World!").unwrap();
let path = temp_file.path().to_str().unwrap();
let input = json!({ "path": path });
// Configure PathValidator to allow temp directory (use canonicalized path)
let temp_dir = std::env::temp_dir().canonicalize().unwrap_or(std::env::temp_dir());
let path_validator = Some(PathValidator::new().with_workspace(temp_dir));
let context = ToolContext {
agent_id: zclaw_types::AgentId::new(),
working_directory: None,
session_id: None,
skill_executor: None,
path_validator,
};
let tool = FileReadTool::new();
let result = tool.execute(input, &context).await.unwrap();
assert!(result["content"].as_str().unwrap().contains("Hello, World!"));
assert_eq!(result["encoding"].as_str().unwrap(), "utf-8");
}
}

View File

@@ -1,10 +1,13 @@
//! File write tool
//! File write tool with path validation
use async_trait::async_trait;
use serde_json::{json, Value};
use zclaw_types::{Result, ZclawError};
use std::fs;
use std::io::Write;
use crate::tool::{Tool, ToolContext};
use super::path_validator::PathValidator;
pub struct FileWriteTool;
@@ -21,7 +24,7 @@ impl Tool for FileWriteTool {
}
fn description(&self) -> &str {
"Write content to a file on the filesystem"
"Write content to a file on the filesystem. The file must be within allowed paths."
}
fn input_schema(&self) -> Value {
@@ -35,22 +38,92 @@ impl Tool for FileWriteTool {
"content": {
"type": "string",
"description": "The content to write to the file"
},
"mode": {
"type": "string",
"description": "Write mode: 'create' (fail if exists), 'overwrite' (replace), 'append' (add to end)",
"enum": ["create", "overwrite", "append"],
"default": "create"
},
"encoding": {
"type": "string",
"description": "Content encoding (default: utf-8)",
"enum": ["utf-8", "base64"]
}
},
"required": ["path", "content"]
})
}
async fn execute(&self, input: Value, _context: &ToolContext) -> Result<Value> {
let _path = input["path"].as_str()
async fn execute(&self, input: Value, context: &ToolContext) -> Result<Value> {
let path = input["path"].as_str()
.ok_or_else(|| ZclawError::InvalidInput("Missing 'path' parameter".into()))?;
let content = input["content"].as_str()
.ok_or_else(|| ZclawError::InvalidInput("Missing 'content' parameter".into()))?;
// TODO: Implement actual file writing with path validation
let mode = input["mode"].as_str().unwrap_or("create");
let encoding = input["encoding"].as_str().unwrap_or("utf-8");
// Validate path using context's path validator or create default
let validator = context.path_validator.as_ref()
.map(|v| v.clone())
.unwrap_or_else(|| {
// Create default validator with workspace as allowed path
let mut validator = PathValidator::new();
if let Some(ref workspace) = context.working_directory {
validator = validator.with_workspace(std::path::PathBuf::from(workspace));
}
validator
});
// Validate path for write access
let validated_path = validator.validate_write(path)?;
// Decode content based on encoding
let bytes = match encoding {
"base64" => {
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
BASE64.decode(content)
.map_err(|e| ZclawError::InvalidInput(format!("Invalid base64 content: {}", e)))?
}
_ => content.as_bytes().to_vec()
};
// Check if file exists and handle mode
let file_exists = validated_path.exists();
if file_exists && mode == "create" {
return Err(ZclawError::InvalidInput(format!(
"File already exists: {}",
validated_path.display()
)));
}
// Write file
let mut file = match mode {
"append" => {
fs::OpenOptions::new()
.create(true)
.append(true)
.open(&validated_path)
.map_err(|e| ZclawError::ToolError(format!("Failed to open file for append: {}", e)))?
}
_ => {
// create or overwrite
fs::File::create(&validated_path)
.map_err(|e| ZclawError::ToolError(format!("Failed to create file: {}", e)))?
}
};
file.write_all(&bytes)
.map_err(|e| ZclawError::ToolError(format!("Failed to write file: {}", e)))?;
Ok(json!({
"success": true,
"bytes_written": content.len()
"bytes_written": bytes.len(),
"path": validated_path.to_string_lossy(),
"mode": mode
}))
}
}
@@ -60,3 +133,85 @@ impl Default for FileWriteTool {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
use crate::tool::builtin::PathValidator;
fn create_test_context_with_tempdir(dir: &std::path::Path) -> ToolContext {
// Use canonicalized path to handle Windows extended-length paths
let workspace = dir.canonicalize().unwrap_or_else(|_| dir.to_path_buf());
let path_validator = Some(PathValidator::new().with_workspace(workspace));
ToolContext {
agent_id: zclaw_types::AgentId::new(),
working_directory: None,
session_id: None,
skill_executor: None,
path_validator,
}
}
#[tokio::test]
async fn test_write_new_file() {
let dir = tempdir().unwrap();
let path = dir.path().join("test.txt").to_str().unwrap().to_string();
let input = json!({
"path": path,
"content": "Hello, World!"
});
let context = create_test_context_with_tempdir(dir.path());
let tool = FileWriteTool::new();
let result = tool.execute(input, &context).await.unwrap();
assert!(result["success"].as_bool().unwrap());
assert_eq!(result["bytes_written"].as_u64().unwrap(), 13);
}
#[tokio::test]
async fn test_create_mode_fails_on_existing() {
let dir = tempdir().unwrap();
let path = dir.path().join("existing.txt");
fs::write(&path, "existing content").unwrap();
let input = json!({
"path": path.to_str().unwrap(),
"content": "new content",
"mode": "create"
});
let context = create_test_context_with_tempdir(dir.path());
let tool = FileWriteTool::new();
let result = tool.execute(input, &context).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_overwrite_mode() {
let dir = tempdir().unwrap();
let path = dir.path().join("test.txt");
fs::write(&path, "old content").unwrap();
let input = json!({
"path": path.to_str().unwrap(),
"content": "new content",
"mode": "overwrite"
});
let context = create_test_context_with_tempdir(dir.path());
let tool = FileWriteTool::new();
let result = tool.execute(input, &context).await.unwrap();
assert!(result["success"].as_bool().unwrap());
let content = fs::read_to_string(&path).unwrap();
assert_eq!(content, "new content");
}
}

View File

@@ -0,0 +1,461 @@
//! Path validation for file system tools
//!
//! Provides security validation for file paths to prevent:
//! - Path traversal attacks (../)
//! - Access to blocked system directories
//! - Access outside allowed workspace directories
//!
//! # Security Policy (Default Deny)
//!
//! This validator follows a **default deny** security policy:
//! - If no `allowed_paths` are configured AND no `workspace_root` is set,
//! all path access is denied by default
//! - This prevents accidental exposure of sensitive files when the validator
//! is used without proper configuration
//! - To enable file access, you MUST either:
//! 1. Set explicit `allowed_paths` in the configuration, OR
//! 2. Configure a `workspace_root` directory
//!
//! Example configuration:
//! ```ignore
//! let validator = PathValidator::with_config(config)
//! .with_workspace(PathBuf::from("/safe/workspace"));
//! ```
use std::path::{Path, PathBuf, Component};
use zclaw_types::{Result, ZclawError};
/// Path validator configuration
#[derive(Debug, Clone)]
pub struct PathValidatorConfig {
/// Allowed directory prefixes (empty = allow all within workspace)
pub allowed_paths: Vec<PathBuf>,
/// Blocked paths (always denied, even if in allowed_paths)
pub blocked_paths: Vec<PathBuf>,
/// Maximum file size in bytes (0 = no limit)
pub max_file_size: u64,
/// Whether to allow symbolic links
pub allow_symlinks: bool,
}
impl Default for PathValidatorConfig {
fn default() -> Self {
Self {
allowed_paths: Vec::new(),
blocked_paths: default_blocked_paths(),
max_file_size: 10 * 1024 * 1024, // 10MB default
allow_symlinks: false,
}
}
}
impl PathValidatorConfig {
/// Create config from security.toml settings
pub fn from_config(allowed: &[String], blocked: &[String], max_size: &str) -> Self {
let allowed_paths: Vec<PathBuf> = allowed
.iter()
.map(|p| expand_tilde(p))
.collect();
let blocked_paths: Vec<PathBuf> = blocked
.iter()
.map(|p| PathBuf::from(p))
.chain(default_blocked_paths())
.collect();
let max_file_size = parse_size(max_size).unwrap_or(10 * 1024 * 1024);
Self {
allowed_paths,
blocked_paths,
max_file_size,
allow_symlinks: false,
}
}
}
/// Default blocked paths for security
fn default_blocked_paths() -> Vec<PathBuf> {
vec![
// Unix sensitive files
PathBuf::from("/etc/shadow"),
PathBuf::from("/etc/passwd"),
PathBuf::from("/etc/sudoers"),
PathBuf::from("/root"),
PathBuf::from("/proc"),
PathBuf::from("/sys"),
// Windows sensitive paths
PathBuf::from("C:\\Windows\\System32\\config"),
PathBuf::from("C:\\Users\\Administrator"),
// SSH keys
PathBuf::from("/.ssh"),
PathBuf::from("/root/.ssh"),
// Environment files
PathBuf::from(".env"),
PathBuf::from(".env.local"),
PathBuf::from(".env.production"),
]
}
/// Expand tilde in path to home directory
fn expand_tilde(path: &str) -> PathBuf {
if path.starts_with('~') {
if let Some(home) = dirs::home_dir() {
if path == "~" {
return home;
}
if path.starts_with("~/") || path.starts_with("~\\") {
return home.join(&path[2..]);
}
}
}
PathBuf::from(path)
}
/// Parse size string like "10MB", "1GB", etc.
fn parse_size(s: &str) -> Option<u64> {
let s = s.trim().to_uppercase();
let (num, unit) = if s.ends_with("GB") {
(s.trim_end_matches("GB").trim(), 1024 * 1024 * 1024)
} else if s.ends_with("MB") {
(s.trim_end_matches("MB").trim(), 1024 * 1024)
} else if s.ends_with("KB") {
(s.trim_end_matches("KB").trim(), 1024)
} else if s.ends_with("B") {
(s.trim_end_matches("B").trim(), 1)
} else {
(s.as_str(), 1)
};
num.parse::<u64>().ok().map(|n| n * unit)
}
/// Path validator for file system security
#[derive(Debug, Clone)]
pub struct PathValidator {
config: PathValidatorConfig,
workspace_root: Option<PathBuf>,
}
impl PathValidator {
/// Create a new path validator with default config
pub fn new() -> Self {
Self {
config: PathValidatorConfig::default(),
workspace_root: None,
}
}
/// Create a path validator with custom config
pub fn with_config(config: PathValidatorConfig) -> Self {
Self {
config,
workspace_root: None,
}
}
/// Set the workspace root directory
pub fn with_workspace(mut self, workspace: PathBuf) -> Self {
self.workspace_root = Some(workspace);
self
}
/// Validate a path for read access
pub fn validate_read(&self, path: &str) -> Result<PathBuf> {
let canonical = self.resolve_and_validate(path)?;
// Check if file exists
if !canonical.exists() {
return Err(ZclawError::InvalidInput(format!(
"File does not exist: {}",
path
)));
}
// Check if it's a file (not directory)
if !canonical.is_file() {
return Err(ZclawError::InvalidInput(format!(
"Path is not a file: {}",
path
)));
}
// Check file size
if self.config.max_file_size > 0 {
if let Ok(metadata) = std::fs::metadata(&canonical) {
if metadata.len() > self.config.max_file_size {
return Err(ZclawError::InvalidInput(format!(
"File too large: {} bytes (max: {} bytes)",
metadata.len(),
self.config.max_file_size
)));
}
}
}
Ok(canonical)
}
/// Validate a path for write access
pub fn validate_write(&self, path: &str) -> Result<PathBuf> {
let canonical = self.resolve_and_validate(path)?;
// Check parent directory exists
if let Some(parent) = canonical.parent() {
if !parent.exists() {
return Err(ZclawError::InvalidInput(format!(
"Parent directory does not exist: {}",
parent.display()
)));
}
}
// If file exists, check it's not blocked
if canonical.exists() && !canonical.is_file() {
return Err(ZclawError::InvalidInput(format!(
"Path exists but is not a file: {}",
path
)));
}
Ok(canonical)
}
/// Resolve and validate a path
fn resolve_and_validate(&self, path: &str) -> Result<PathBuf> {
// Expand tilde
let expanded = expand_tilde(path);
let path_buf = PathBuf::from(&expanded);
// Check for path traversal
self.check_path_traversal(&path_buf)?;
// Resolve to canonical path
let canonical = if path_buf.exists() {
path_buf
.canonicalize()
.map_err(|e| ZclawError::InvalidInput(format!("Cannot resolve path: {}", e)))?
} else {
// For non-existent files, resolve parent and join
let parent = path_buf.parent().unwrap_or(Path::new("."));
let canonical_parent = parent
.canonicalize()
.map_err(|e| ZclawError::InvalidInput(format!("Cannot resolve parent path: {}", e)))?;
canonical_parent.join(path_buf.file_name().unwrap_or_default())
};
// Check blocked paths
self.check_blocked(&canonical)?;
// Check allowed paths
self.check_allowed(&canonical)?;
// Check symlinks
if !self.config.allow_symlinks {
self.check_symlink(&canonical)?;
}
Ok(canonical)
}
/// Check for path traversal attacks
fn check_path_traversal(&self, path: &Path) -> Result<()> {
for component in path.components() {
if let Component::ParentDir = component {
// Allow .. if workspace is configured (will be validated in check_allowed)
// Deny .. if no workspace is configured (more restrictive)
if self.workspace_root.is_none() {
// Without workspace, be more restrictive
return Err(ZclawError::InvalidInput(
"Path traversal not allowed outside workspace".to_string()
));
}
}
}
Ok(())
}
/// Check if path is in blocked list
fn check_blocked(&self, path: &Path) -> Result<()> {
for blocked in &self.config.blocked_paths {
if path.starts_with(blocked) || path == blocked {
return Err(ZclawError::InvalidInput(format!(
"Access to this path is blocked: {}",
path.display()
)));
}
}
Ok(())
}
/// Check if path is in allowed list
///
/// # Security: Default Deny Policy
///
/// This method implements a strict default-deny security policy:
/// - If `allowed_paths` is empty AND no `workspace_root` is configured,
/// access is **denied by default** with a clear error message
/// - This prevents accidental exposure of the entire filesystem
/// when the validator is misconfigured or used without setup
fn check_allowed(&self, path: &Path) -> Result<()> {
// If no allowed paths specified, check workspace
if self.config.allowed_paths.is_empty() {
if let Some(ref workspace) = self.workspace_root {
// Workspace is configured - validate path is within it
if !path.starts_with(workspace) {
return Err(ZclawError::InvalidInput(format!(
"Path outside workspace: {} (workspace: {})",
path.display(),
workspace.display()
)));
}
return Ok(());
} else {
// SECURITY: No allowed_paths AND no workspace_root configured
// Default to DENY - do not allow unrestricted filesystem access
return Err(ZclawError::InvalidInput(
"Path access denied: no workspace or allowed paths configured. \
To enable file access, configure either 'allowed_paths' in security.toml \
or set a workspace_root directory."
.to_string(),
));
}
}
// Check against allowed paths
for allowed in &self.config.allowed_paths {
if path.starts_with(allowed) {
return Ok(());
}
}
Err(ZclawError::InvalidInput(format!(
"Path not in allowed directories: {}",
path.display()
)))
}
/// Check for symbolic links
fn check_symlink(&self, path: &Path) -> Result<()> {
if path.exists() {
let metadata = std::fs::symlink_metadata(path)
.map_err(|e| ZclawError::InvalidInput(format!("Cannot read path metadata: {}", e)))?;
if metadata.file_type().is_symlink() {
return Err(ZclawError::InvalidInput(
"Symbolic links are not allowed".to_string()
));
}
}
Ok(())
}
}
impl Default for PathValidator {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_size() {
assert_eq!(parse_size("10MB"), Some(10 * 1024 * 1024));
assert_eq!(parse_size("1GB"), Some(1024 * 1024 * 1024));
assert_eq!(parse_size("512KB"), Some(512 * 1024));
assert_eq!(parse_size("1024B"), Some(1024));
}
#[test]
fn test_expand_tilde() {
let home = dirs::home_dir().unwrap_or_default();
assert_eq!(expand_tilde("~"), home);
assert!(expand_tilde("~/test").starts_with(&home));
assert_eq!(expand_tilde("/absolute/path"), PathBuf::from("/absolute/path"));
}
#[test]
fn test_blocked_paths() {
let validator = PathValidator::new();
// These should be blocked (blocked paths take precedence)
assert!(validator.resolve_and_validate("/etc/shadow").is_err());
assert!(validator.resolve_and_validate("/etc/passwd").is_err());
}
#[test]
fn test_path_traversal() {
// Without workspace, traversal should fail
let no_workspace = PathValidator::new();
assert!(no_workspace.resolve_and_validate("../../../etc/passwd").is_err());
}
#[test]
fn test_default_deny_without_configuration() {
// SECURITY TEST: Verify default deny policy when no configuration is set
// A validator with no allowed_paths and no workspace_root should deny all access
let validator = PathValidator::new();
// Even valid paths should be denied when not configured
let result = validator.check_allowed(Path::new("/some/random/path"));
assert!(result.is_err(), "Expected denial when no configuration is set");
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("no workspace or allowed paths configured"),
"Error message should explain configuration requirement, got: {}",
err_msg
);
}
#[test]
fn test_allows_with_workspace_root() {
// When workspace_root is set, paths within workspace should be allowed
let workspace = std::env::temp_dir();
let validator = PathValidator::new()
.with_workspace(workspace.clone());
// Path within workspace should pass the allowed check
let test_path = workspace.join("test_file.txt");
let result = validator.check_allowed(&test_path);
assert!(result.is_ok(), "Path within workspace should be allowed");
}
#[test]
fn test_allows_with_explicit_allowed_paths() {
// When allowed_paths is configured, those paths should be allowed
let temp_dir = std::env::temp_dir();
let config = PathValidatorConfig {
allowed_paths: vec![temp_dir.clone()],
blocked_paths: vec![],
max_file_size: 0,
allow_symlinks: false,
};
let validator = PathValidator::with_config(config);
// Path within allowed_paths should pass
let test_path = temp_dir.join("test_file.txt");
let result = validator.check_allowed(&test_path);
assert!(result.is_ok(), "Path in allowed_paths should be allowed");
}
#[test]
fn test_denies_outside_workspace() {
// Paths outside workspace_root should be denied
let validator = PathValidator::new()
.with_workspace(PathBuf::from("/safe/workspace"));
let result = validator.check_allowed(Path::new("/other/location"));
assert!(result.is_err(), "Path outside workspace should be denied");
let err_msg = result.unwrap_err().to_string();
assert!(
err_msg.contains("Path outside workspace"),
"Error should indicate path is outside workspace, got: {}",
err_msg
);
}
}

View File

@@ -10,6 +10,24 @@ use zclaw_types::{Result, ZclawError};
use crate::tool::{Tool, ToolContext};
/// Parse a command string into program and arguments using proper shell quoting
fn parse_command(command: &str) -> Result<(String, Vec<String>)> {
// Use shlex for proper shell-style quoting support
let parts = shlex::split(command)
.ok_or_else(|| ZclawError::InvalidInput(
format!("Failed to parse command: invalid quoting in '{}'", command)
))?;
if parts.is_empty() {
return Err(ZclawError::InvalidInput("Empty command".into()));
}
let program = parts[0].clone();
let args = parts[1..].to_vec();
Ok((program, args))
}
/// Security configuration for shell execution
#[derive(Debug, Clone, Deserialize)]
pub struct ShellSecurityConfig {
@@ -167,18 +185,12 @@ impl Tool for ShellExecTool {
// Security check
self.config.is_command_allowed(command)?;
// Parse command into program and args
let parts: Vec<&str> = command.split_whitespace().collect();
if parts.is_empty() {
return Err(ZclawError::InvalidInput("Empty command".into()));
}
let program = parts[0];
let args = &parts[1..];
// Parse command into program and args using proper shell quoting
let (program, args) = parse_command(command)?;
// Build command
let mut cmd = Command::new(program);
cmd.args(args);
let mut cmd = Command::new(&program);
cmd.args(&args);
if let Some(dir) = cwd {
cmd.current_dir(dir);
@@ -190,24 +202,35 @@ impl Tool for ShellExecTool {
.stderr(Stdio::piped());
let start = Instant::now();
let timeout_duration = Duration::from_secs(timeout_secs);
// Execute command
let output = tokio::task::spawn_blocking(move || {
cmd.output()
})
.await
.map_err(|e| ZclawError::ToolError(format!("Task spawn error: {}", e)))?
.map_err(|e| ZclawError::ToolError(format!("Command execution failed: {}", e)))?;
// Execute command with proper timeout (timeout applies DURING execution)
let output_result = tokio::time::timeout(
timeout_duration,
tokio::task::spawn_blocking(move || {
cmd.output()
})
).await;
let output = match output_result {
// Timeout triggered - command took too long
Err(_) => {
return Err(ZclawError::Timeout(
format!("Command timed out after {} seconds", timeout_secs)
));
}
// Spawn blocking task completed
Ok(Ok(result)) => {
result.map_err(|e| ZclawError::ToolError(format!("Command execution failed: {}", e)))?
}
// Spawn blocking task panicked or was cancelled
Ok(Err(e)) => {
return Err(ZclawError::ToolError(format!("Task spawn error: {}", e)));
}
};
let duration = start.elapsed();
// Check timeout
if duration > Duration::from_secs(timeout_secs) {
return Err(ZclawError::Timeout(
format!("Command timed out after {} seconds", timeout_secs)
));
}
// Truncate output if too large
let stdout = String::from_utf8_lossy(&output.stdout);
let stderr = String::from_utf8_lossy(&output.stderr);
@@ -271,4 +294,37 @@ mod tests {
// Should block non-whitelisted commands
assert!(config.is_command_allowed("dangerous_command").is_err());
}
#[test]
fn test_parse_command_simple() {
let (program, args) = parse_command("ls -la").unwrap();
assert_eq!(program, "ls");
assert_eq!(args, vec!["-la"]);
}
#[test]
fn test_parse_command_with_quotes() {
let (program, args) = parse_command("echo \"hello world\"").unwrap();
assert_eq!(program, "echo");
assert_eq!(args, vec!["hello world"]);
}
#[test]
fn test_parse_command_with_single_quotes() {
let (program, args) = parse_command("echo 'hello world'").unwrap();
assert_eq!(program, "echo");
assert_eq!(args, vec!["hello world"]);
}
#[test]
fn test_parse_command_complex() {
let (program, args) = parse_command("git commit -m \"Initial commit\"").unwrap();
assert_eq!(program, "git");
assert_eq!(args, vec!["commit", "-m", "Initial commit"]);
}
#[test]
fn test_parse_command_empty() {
assert!(parse_command("").is_err());
}
}

View File

@@ -1,16 +1,343 @@
//! Web fetch tool
//! Web fetch tool with SSRF protection
//!
//! This module provides a secure web fetching capability with comprehensive
//! SSRF (Server-Side Request Forgery) protection including:
//! - Private IP range blocking (RFC 1918)
//! - Cloud metadata endpoint blocking (169.254.169.254)
//! - Localhost/loopback blocking
//! - Redirect protection with recursive checks
//! - Timeout control
//! - Response size limits
use async_trait::async_trait;
use reqwest::redirect::Policy;
use serde_json::{json, Value};
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::time::Duration;
use url::Url;
use zclaw_types::{Result, ZclawError};
use crate::tool::{Tool, ToolContext};
pub struct WebFetchTool;
/// Maximum response size in bytes (10 MB)
const MAX_RESPONSE_SIZE: u64 = 10 * 1024 * 1024;
/// Request timeout in seconds
const REQUEST_TIMEOUT_SECS: u64 = 30;
/// Maximum number of redirect hops allowed
const MAX_REDIRECT_HOPS: usize = 5;
/// Maximum URL length
const MAX_URL_LENGTH: usize = 2048;
pub struct WebFetchTool {
client: reqwest::Client,
}
impl WebFetchTool {
pub fn new() -> Self {
Self
// Build a client with redirect policy that we control
// We'll handle redirects manually to validate each target
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(REQUEST_TIMEOUT_SECS))
.redirect(Policy::none()) // Handle redirects manually for SSRF validation
.user_agent("ZCLAW/1.0")
.build()
.unwrap_or_else(|_| reqwest::Client::new());
Self { client }
}
/// Validate a URL for SSRF safety
///
/// This checks:
/// - URL scheme (only http/https allowed)
/// - Private IP ranges (RFC 1918)
/// - Loopback addresses
/// - Cloud metadata endpoints
/// - Link-local addresses
fn validate_url(&self, url_str: &str) -> Result<Url> {
// Check URL length
if url_str.len() > MAX_URL_LENGTH {
return Err(ZclawError::InvalidInput(format!(
"URL exceeds maximum length of {} characters",
MAX_URL_LENGTH
)));
}
// Parse URL
let url = Url::parse(url_str)
.map_err(|e| ZclawError::InvalidInput(format!("Invalid URL: {}", e)))?;
// Check scheme - only allow http and https
match url.scheme() {
"http" | "https" => {}
scheme => {
return Err(ZclawError::InvalidInput(format!(
"URL scheme '{}' is not allowed. Only http and https are permitted.",
scheme
)));
}
}
// Extract host - for IPv6, url.host_str() returns the address without brackets
// But url::Url also provides host() which gives us the parsed Host type
let host = url
.host_str()
.ok_or_else(|| ZclawError::InvalidInput("URL must have a host".into()))?;
// Check if host is an IP address or domain
// For IPv6 in URLs, host_str returns the address with brackets, e.g., "[::1]"
// We need to strip the brackets for parsing
let host_for_parsing = if host.starts_with('[') && host.ends_with(']') {
&host[1..host.len()-1]
} else {
host
};
if let Ok(ip) = host_for_parsing.parse::<IpAddr>() {
self.validate_ip_address(&ip)?;
} else {
// For domain names, we need to resolve and check the IP
// This is handled during the actual request, but we do basic checks here
self.validate_hostname(host)?;
}
Ok(url)
}
/// Validate an IP address for SSRF safety
fn validate_ip_address(&self, ip: &IpAddr) -> Result<()> {
match ip {
IpAddr::V4(ipv4) => self.validate_ipv4(ipv4)?,
IpAddr::V6(ipv6) => self.validate_ipv6(ipv6)?,
}
Ok(())
}
/// Validate IPv4 address
fn validate_ipv4(&self, ip: &Ipv4Addr) -> Result<()> {
let octets = ip.octets();
// Block loopback (127.0.0.0/8)
if octets[0] == 127 {
return Err(ZclawError::InvalidInput(
"Access to loopback addresses (127.x.x.x) is not allowed".into(),
));
}
// Block private ranges (RFC 1918)
// 10.0.0.0/8
if octets[0] == 10 {
return Err(ZclawError::InvalidInput(
"Access to private IP range 10.x.x.x is not allowed".into(),
));
}
// 172.16.0.0/12 (172.16.0.0 - 172.31.255.255)
if octets[0] == 172 && (16..=31).contains(&octets[1]) {
return Err(ZclawError::InvalidInput(
"Access to private IP range 172.16-31.x.x is not allowed".into(),
));
}
// 192.168.0.0/16
if octets[0] == 192 && octets[1] == 168 {
return Err(ZclawError::InvalidInput(
"Access to private IP range 192.168.x.x is not allowed".into(),
));
}
// Block cloud metadata endpoint (169.254.169.254)
if octets[0] == 169 && octets[1] == 254 && octets[2] == 169 && octets[3] == 254 {
return Err(ZclawError::InvalidInput(
"Access to cloud metadata endpoint (169.254.169.254) is not allowed".into(),
));
}
// Block link-local addresses (169.254.0.0/16)
if octets[0] == 169 && octets[1] == 254 {
return Err(ZclawError::InvalidInput(
"Access to link-local addresses (169.254.x.x) is not allowed".into(),
));
}
// Block 0.0.0.0/8 (current network)
if octets[0] == 0 {
return Err(ZclawError::InvalidInput(
"Access to 0.x.x.x addresses is not allowed".into(),
));
}
// Block broadcast address
if *ip == Ipv4Addr::new(255, 255, 255, 255) {
return Err(ZclawError::InvalidInput(
"Access to broadcast address is not allowed".into(),
));
}
// Block multicast addresses (224.0.0.0/4)
if (224..=239).contains(&octets[0]) {
return Err(ZclawError::InvalidInput(
"Access to multicast addresses is not allowed".into(),
));
}
Ok(())
}
/// Validate IPv6 address
fn validate_ipv6(&self, ip: &Ipv6Addr) -> Result<()> {
// Block loopback (::1)
if *ip == Ipv6Addr::LOCALHOST {
return Err(ZclawError::InvalidInput(
"Access to IPv6 loopback address (::1) is not allowed".into(),
));
}
// Block unspecified address (::)
if *ip == Ipv6Addr::UNSPECIFIED {
return Err(ZclawError::InvalidInput(
"Access to unspecified IPv6 address (::) is not allowed".into(),
));
}
// Block IPv4-mapped IPv6 addresses (::ffff:0:0/96)
// These could bypass IPv4 checks
if ip.to_string().starts_with("::ffff:") {
// Extract the embedded IPv4 and validate it
let segments = ip.segments();
// IPv4-mapped format: 0:0:0:0:0:ffff:xxxx:xxxx
if segments[5] == 0xffff {
let v4_addr = ((segments[6] as u32) << 16) | (segments[7] as u32);
let ipv4 = Ipv4Addr::from(v4_addr);
self.validate_ipv4(&ipv4)?;
}
}
// Block link-local IPv6 (fe80::/10)
let segments = ip.segments();
if (segments[0] & 0xffc0) == 0xfe80 {
return Err(ZclawError::InvalidInput(
"Access to IPv6 link-local addresses is not allowed".into(),
));
}
// Block unique local addresses (fc00::/7) - IPv6 equivalent of private ranges
if (segments[0] & 0xfe00) == 0xfc00 {
return Err(ZclawError::InvalidInput(
"Access to IPv6 unique local addresses is not allowed".into(),
));
}
Ok(())
}
/// Validate a hostname for potential SSRF attacks
fn validate_hostname(&self, host: &str) -> Result<()> {
let host_lower = host.to_lowercase();
// Block localhost variants
let blocked_hosts = [
"localhost",
"localhost.localdomain",
"ip6-localhost",
"ip6-loopback",
"metadata.google.internal",
"metadata",
"kubernetes.default",
"kubernetes.default.svc",
];
for blocked in &blocked_hosts {
if host_lower == *blocked || host_lower.ends_with(&format!(".{}", blocked)) {
return Err(ZclawError::InvalidInput(format!(
"Access to '{}' is not allowed",
host
)));
}
}
// Block hostnames that look like IP addresses (decimal, octal, hex encoding)
// These could be used to bypass IP checks
self.check_hostname_ip_bypass(&host_lower)?;
Ok(())
}
/// Check for hostname-based IP bypass attempts
fn check_hostname_ip_bypass(&self, host: &str) -> Result<()> {
// Check for decimal IP encoding (e.g., 2130706433 = 127.0.0.1)
if host.chars().all(|c| c.is_ascii_digit()) {
if let Ok(num) = host.parse::<u32>() {
let ip = Ipv4Addr::from(num);
self.validate_ipv4(&ip)?;
}
}
// Check for domains that might resolve to private IPs
// This is not exhaustive but catches common patterns
// The actual DNS resolution check happens during the request
Ok(())
}
/// Follow redirects with SSRF validation
async fn follow_redirects_safe(&self, url: Url, max_hops: usize) -> Result<(Url, reqwest::Response)> {
let mut current_url = url;
let mut hops = 0;
loop {
// Validate the current URL
current_url = self.validate_url(current_url.as_str())?;
// Make the request
let response = self
.client
.get(current_url.clone())
.send()
.await
.map_err(|e| ZclawError::ToolError(format!("Request failed: {}", e)))?;
// Check if it's a redirect
let status = response.status();
if status.is_redirection() {
hops += 1;
if hops > max_hops {
return Err(ZclawError::InvalidInput(format!(
"Too many redirects (max {})",
max_hops
)));
}
// Get the redirect location
let location = response
.headers()
.get(reqwest::header::LOCATION)
.and_then(|h| h.to_str().ok())
.ok_or_else(|| {
ZclawError::ToolError("Redirect without Location header".into())
})?;
// Resolve the location against the current URL
let new_url = current_url.join(location).map_err(|e| {
ZclawError::InvalidInput(format!("Invalid redirect location: {}", e))
})?;
tracing::debug!(
"Following redirect {} -> {}",
current_url.as_str(),
new_url.as_str()
);
current_url = new_url;
// Continue loop to validate and follow
} else {
// Not a redirect, return the response
return Ok((current_url, response));
}
}
}
}
@@ -21,7 +348,7 @@ impl Tool for WebFetchTool {
}
fn description(&self) -> &str {
"Fetch content from a URL"
"Fetch content from a URL with SSRF protection"
}
fn input_schema(&self) -> Value {
@@ -30,12 +357,29 @@ impl Tool for WebFetchTool {
"properties": {
"url": {
"type": "string",
"description": "The URL to fetch"
"description": "The URL to fetch (must be http or https)"
},
"method": {
"type": "string",
"enum": ["GET", "POST"],
"description": "HTTP method (default: GET)"
},
"headers": {
"type": "object",
"description": "Optional HTTP headers (key-value pairs)",
"additionalProperties": {
"type": "string"
}
},
"body": {
"type": "string",
"description": "Request body for POST requests"
},
"timeout": {
"type": "integer",
"description": "Timeout in seconds (default: 30, max: 60)",
"minimum": 1,
"maximum": 60
}
},
"required": ["url"]
@@ -43,13 +387,167 @@ impl Tool for WebFetchTool {
}
async fn execute(&self, input: Value, _context: &ToolContext) -> Result<Value> {
let url = input["url"].as_str()
let url_str = input["url"]
.as_str()
.ok_or_else(|| ZclawError::InvalidInput("Missing 'url' parameter".into()))?;
// TODO: Implement actual web fetching with SSRF protection
let method = input["method"].as_str().unwrap_or("GET").to_uppercase();
let timeout_secs = input["timeout"].as_u64().unwrap_or(REQUEST_TIMEOUT_SECS).min(60);
// Validate URL for SSRF
let url = self.validate_url(url_str)?;
tracing::info!("WebFetch: Fetching {} with method {}", url.as_str(), method);
// Build request with validated URL
let mut request_builder = match method.as_str() {
"GET" => self.client.get(url.clone()),
"POST" => {
let mut builder = self.client.post(url.clone());
if let Some(body) = input["body"].as_str() {
builder = builder.body(body.to_string());
}
builder
}
_ => {
return Err(ZclawError::InvalidInput(format!(
"Unsupported HTTP method: {}",
method
)));
}
};
// Add custom headers if provided
if let Some(headers) = input["headers"].as_object() {
for (key, value) in headers {
if let Some(value_str) = value.as_str() {
// Block dangerous headers
let key_lower = key.to_lowercase();
if key_lower == "host" {
continue; // Don't allow overriding host
}
if key_lower.starts_with("x-forwarded") {
continue; // Block proxy header injection
}
let header_name = reqwest::header::HeaderName::try_from(key.as_str())
.map_err(|e| {
ZclawError::InvalidInput(format!("Invalid header name '{}': {}", key, e))
})?;
let header_value = reqwest::header::HeaderValue::from_str(value_str)
.map_err(|e| {
ZclawError::InvalidInput(format!("Invalid header value: {}", e))
})?;
request_builder = request_builder.header(header_name, header_value);
}
}
}
// Set timeout
let request_builder = request_builder.timeout(Duration::from_secs(timeout_secs));
// Execute with redirect handling
let response = request_builder
.send()
.await
.map_err(|e| {
let error_msg = e.to_string();
// Provide user-friendly error messages
if error_msg.contains("dns") || error_msg.contains("resolve") {
ZclawError::ToolError(format!(
"Failed to resolve hostname: {}. Please check the URL.",
url.host_str().unwrap_or("unknown")
))
} else if error_msg.contains("timeout") {
ZclawError::ToolError(format!(
"Request timed out after {} seconds",
timeout_secs
))
} else if error_msg.contains("connection refused") {
ZclawError::ToolError(
"Connection refused. The server may be down or unreachable.".into(),
)
} else {
ZclawError::ToolError(format!("Request failed: {}", error_msg))
}
})?;
// Handle redirects manually with SSRF validation
let (final_url, response) = if response.status().is_redirection() {
// Start redirect following process
let location = response
.headers()
.get(reqwest::header::LOCATION)
.and_then(|h| h.to_str().ok())
.ok_or_else(|| {
ZclawError::ToolError("Redirect without Location header".into())
})?;
let redirect_url = url.join(location).map_err(|e| {
ZclawError::InvalidInput(format!("Invalid redirect location: {}", e))
})?;
self.follow_redirects_safe(redirect_url, MAX_REDIRECT_HOPS).await?
} else {
(url, response)
};
// Check response status
let status = response.status();
let status_code = status.as_u16();
// Check content length before reading body
if let Some(content_length) = response.content_length() {
if content_length > MAX_RESPONSE_SIZE {
return Err(ZclawError::ToolError(format!(
"Response too large: {} bytes (max: {} bytes)",
content_length, MAX_RESPONSE_SIZE
)));
}
}
// Get content type BEFORE consuming response with bytes()
let content_type = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|h| h.to_str().ok())
.unwrap_or("text/plain")
.to_string();
// Read response body with size limit
let bytes = response.bytes().await.map_err(|e| {
ZclawError::ToolError(format!("Failed to read response body: {}", e))
})?;
// Double-check size after reading
if bytes.len() as u64 > MAX_RESPONSE_SIZE {
return Err(ZclawError::ToolError(format!(
"Response too large: {} bytes (max: {} bytes)",
bytes.len(),
MAX_RESPONSE_SIZE
)));
}
// Try to decode as UTF-8, fall back to base64 for binary
let content = String::from_utf8(bytes.to_vec()).unwrap_or_else(|_| {
use base64::Engine;
base64::engine::general_purpose::STANDARD.encode(&bytes)
});
tracing::info!(
"WebFetch: Successfully fetched {} bytes from {} (status: {})",
content.len(),
final_url.as_str(),
status_code
);
Ok(json!({
"status": 200,
"content": format!("Fetched content placeholder for: {}", url)
"status": status_code,
"url": final_url.as_str(),
"content_type": content_type,
"content": content,
"size": content.len()
}))
}
}
@@ -59,3 +557,91 @@ impl Default for WebFetchTool {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_validate_localhost() {
let tool = WebFetchTool::new();
// Test localhost
assert!(tool.validate_url("http://localhost/test").is_err());
assert!(tool.validate_url("http://127.0.0.1/test").is_err());
assert!(tool.validate_url("http://127.0.0.2/test").is_err());
}
#[test]
fn test_validate_private_ips() {
let tool = WebFetchTool::new();
// Test 10.x.x.x
assert!(tool.validate_url("http://10.0.0.1/test").is_err());
assert!(tool.validate_url("http://10.255.255.255/test").is_err());
// Test 172.16-31.x.x
assert!(tool.validate_url("http://172.16.0.1/test").is_err());
assert!(tool.validate_url("http://172.31.255.255/test").is_err());
// 172.15.x.x should be allowed
assert!(tool.validate_url("http://172.15.0.1/test").is_ok());
// Test 192.168.x.x
assert!(tool.validate_url("http://192.168.0.1/test").is_err());
assert!(tool.validate_url("http://192.168.255.255/test").is_err());
}
#[test]
fn test_validate_cloud_metadata() {
let tool = WebFetchTool::new();
// Test cloud metadata endpoint
assert!(tool.validate_url("http://169.254.169.254/metadata").is_err());
}
#[test]
fn test_validate_ipv6() {
let tool = WebFetchTool::new();
// Test IPv6 loopback
assert!(tool.validate_url("http://[::1]/test").is_err());
// Test IPv6 unspecified
assert!(tool.validate_url("http://[::]/test").is_err());
// Test IPv4-mapped loopback
assert!(tool.validate_url("http://[::ffff:127.0.0.1]/test").is_err());
}
#[test]
fn test_validate_scheme() {
let tool = WebFetchTool::new();
// Only http and https allowed
assert!(tool.validate_url("ftp://example.com/test").is_err());
assert!(tool.validate_url("file:///etc/passwd").is_err());
assert!(tool.validate_url("javascript:alert(1)").is_err());
// http and https should be allowed (URL parsing succeeds)
assert!(tool.validate_url("http://example.com/test").is_ok());
assert!(tool.validate_url("https://example.com/test").is_ok());
}
#[test]
fn test_validate_blocked_hostnames() {
let tool = WebFetchTool::new();
assert!(tool.validate_url("http://localhost/test").is_err());
assert!(tool.validate_url("http://metadata.google.internal/test").is_err());
assert!(tool.validate_url("http://kubernetes.default/test").is_err());
}
#[test]
fn test_validate_url_length() {
let tool = WebFetchTool::new();
// Create a URL that's too long
let long_url = format!("http://example.com/{}", "a".repeat(3000));
assert!(tool.validate_url(&long_url).is_err());
}
}