Files
zclaw_openfang/desktop/src/lib/__tests__/autonomy-manager.test.ts
iven 8e630882c7 feat(l4): add AutonomyManager for tiered authorization system (Phase 3)
Implements L4 self-evolution authorization with:

Autonomy Levels:
- Supervised: All actions require user confirmation
- Assisted: Low-risk actions auto-execute, high-risk need approval
- Autonomous: Agent decides, only high-impact actions notify

Features:
- Risk-based action classification (low/medium/high)
- Importance threshold for auto-approval
- Approval workflow with pending queue
- Full audit logging with rollback support
- Configurable action permissions per level

Security:
- High-risk actions ALWAYS require confirmation
- Self-modification disabled by default even in autonomous mode
- All autonomous actions logged for audit
- One-click rollback to any historical state

Tests: 30 passing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-16 10:49:49 +08:00

365 lines
10 KiB
TypeScript

/**
* AutonomyManager Tests - L4 Self-Evolution Authorization
*
* Tests for the tiered authorization system:
* - Level-based permissions (supervised/assisted/autonomous)
* - Risk assessment for actions
* - Approval workflow
* - Audit logging
* - Rollback functionality
*/
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
import {
AutonomyManager,
getAutonomyManager,
resetAutonomyManager,
canAutoExecute,
executeWithAutonomy,
DEFAULT_AUTONOMY_CONFIGS,
type ActionType,
type AutonomyLevel,
} from '../autonomy-manager';
// === Helper to create fresh manager ===
function createManager(level: AutonomyLevel = 'assisted'): AutonomyManager {
resetAutonomyManager();
return getAutonomyManager({ ...DEFAULT_AUTONOMY_CONFIGS[level] });
}
// === Risk Assessment Tests ===
describe('AutonomyManager Risk Assessment', () => {
let manager: AutonomyManager;
beforeEach(() => {
manager = createManager('assisted');
});
afterEach(() => {
resetAutonomyManager();
});
it('should classify memory_save as low risk', () => {
const decision = manager.evaluate('memory_save', { importance: 3 });
expect(decision.riskLevel).toBe('low');
});
it('should classify memory_delete as high risk', () => {
const decision = manager.evaluate('memory_delete');
expect(decision.riskLevel).toBe('high');
});
it('should classify identity_update as high risk', () => {
const decision = manager.evaluate('identity_update');
expect(decision.riskLevel).toBe('high');
});
it('should allow risk override', () => {
const decision = manager.evaluate('memory_save', { riskOverride: 'high' });
expect(decision.riskLevel).toBe('high');
});
});
// === Level-Based Permission Tests ===
describe('AutonomyManager Level Permissions', () => {
afterEach(() => {
resetAutonomyManager();
});
describe('Supervised Mode', () => {
let manager: AutonomyManager;
beforeEach(() => {
manager = createManager('supervised');
});
it('should require approval for all actions', () => {
const decision = manager.evaluate('memory_save', { importance: 1 });
expect(decision.requiresApproval).toBe(true);
expect(decision.allowed).toBe(false);
});
it('should not auto-execute even low-risk actions', () => {
const decision = manager.evaluate('reflection_run', { importance: 1 });
expect(decision.allowed).toBe(false);
});
});
describe('Assisted Mode', () => {
let manager: AutonomyManager;
beforeEach(() => {
manager = createManager('assisted');
});
it('should auto-approve low importance, low risk actions', () => {
const decision = manager.evaluate('memory_save', { importance: 3 });
expect(decision.allowed).toBe(true);
expect(decision.requiresApproval).toBe(false);
});
it('should require approval for high importance actions', () => {
const decision = manager.evaluate('memory_save', { importance: 8 });
expect(decision.requiresApproval).toBe(true);
});
it('should always require approval for high risk actions', () => {
const decision = manager.evaluate('memory_delete', { importance: 1 });
expect(decision.requiresApproval).toBe(true);
expect(decision.allowed).toBe(false);
});
it('should not auto-approve identity updates', () => {
const decision = manager.evaluate('identity_update', { importance: 3 });
expect(decision.allowed).toBe(false);
expect(decision.requiresApproval).toBe(true);
});
});
describe('Autonomous Mode', () => {
let manager: AutonomyManager;
beforeEach(() => {
manager = createManager('autonomous');
});
it('should auto-approve medium risk, medium importance actions', () => {
const decision = manager.evaluate('skill_install', { importance: 5 });
expect(decision.allowed).toBe(true);
expect(decision.requiresApproval).toBe(false);
});
it('should still require approval for high risk actions', () => {
const decision = manager.evaluate('memory_delete', { importance: 1 });
expect(decision.allowed).toBe(false);
expect(decision.requiresApproval).toBe(true);
});
it('should not auto-approve self-modification', () => {
// Even in autonomous mode, self-modification requires approval
manager.updateConfig({
allowedActions: {
...manager.getConfig().allowedActions,
selfModification: false,
},
});
const decision = manager.evaluate('identity_update', { importance: 3 });
expect(decision.allowed).toBe(false);
});
});
});
// === Approval Workflow Tests ===
describe('AutonomyManager Approval Workflow', () => {
let manager: AutonomyManager;
beforeEach(() => {
manager = createManager('supervised');
});
afterEach(() => {
resetAutonomyManager();
});
it('should request approval and return approval ID', () => {
const decision = manager.evaluate('memory_save');
const approvalId = manager.requestApproval(decision);
expect(approvalId).toMatch(/^approval_/);
expect(manager.getPendingApprovals().length).toBe(1);
});
it('should approve pending action', () => {
const decision = manager.evaluate('memory_save');
const approvalId = manager.requestApproval(decision);
const result = manager.approve(approvalId);
expect(result).toBe(true);
expect(manager.getPendingApprovals().length).toBe(0);
});
it('should reject pending action', () => {
const decision = manager.evaluate('memory_save');
const approvalId = manager.requestApproval(decision);
const result = manager.reject(approvalId);
expect(result).toBe(true);
expect(manager.getPendingApprovals().length).toBe(0);
});
it('should return false for non-existent approval', () => {
expect(manager.approve('non_existent')).toBe(false);
expect(manager.reject('non_existent')).toBe(false);
});
});
// === Audit Log Tests ===
describe('AutonomyManager Audit Log', () => {
let manager: AutonomyManager;
beforeEach(() => {
manager = createManager('assisted');
manager.clearAuditLog();
});
afterEach(() => {
resetAutonomyManager();
});
it('should log decisions', () => {
manager.evaluate('memory_save', { importance: 3 });
const log = manager.getAuditLog();
expect(log.length).toBe(1);
expect(log[0].action).toBe('memory_save');
});
it('should limit log to 100 entries', () => {
for (let i = 0; i < 150; i++) {
manager.evaluate('memory_save', { importance: i % 10 });
}
const log = manager.getAuditLog(200);
expect(log.length).toBe(100);
});
it('should clear audit log', () => {
manager.evaluate('memory_save');
manager.evaluate('reflection_run');
expect(manager.getAuditLog().length).toBe(2);
manager.clearAuditLog();
expect(manager.getAuditLog().length).toBe(0);
});
it('should support rollback', () => {
manager.evaluate('memory_save');
const log = manager.getAuditLog();
const entryId = log[0].id;
const result = manager.rollback(entryId);
expect(result).toBe(true);
const updatedLog = manager.getAuditLog();
expect(updatedLog[0].outcome).toBe('rolled_back');
expect(updatedLog[0].rolledBackAt).toBeDefined();
});
it('should not allow double rollback', () => {
manager.evaluate('memory_save');
const log = manager.getAuditLog();
const entryId = log[0].id;
manager.rollback(entryId);
const result = manager.rollback(entryId);
expect(result).toBe(false);
});
});
// === Config Management Tests ===
describe('AutonomyManager Config Management', () => {
let manager: AutonomyManager;
beforeEach(() => {
manager = createManager('assisted');
});
afterEach(() => {
resetAutonomyManager();
});
it('should get current config', () => {
const config = manager.getConfig();
expect(config.level).toBe('assisted');
expect(config.allowedActions.memoryAutoSave).toBe(true);
});
it('should update config', () => {
manager.updateConfig({
approvalThreshold: {
importanceMax: 8,
riskMax: 'medium',
},
});
const config = manager.getConfig();
expect(config.approvalThreshold.importanceMax).toBe(8);
});
it('should change level', () => {
manager.setLevel('autonomous');
const config = manager.getConfig();
expect(config.level).toBe('autonomous');
expect(config.allowedActions.memoryAutoSave).toBe(true);
expect(config.allowedActions.identityAutoUpdate).toBe(true);
});
});
// === Helper Function Tests ===
describe('Helper Functions', () => {
beforeEach(() => {
resetAutonomyManager();
getAutonomyManager({ ...DEFAULT_AUTONOMY_CONFIGS.assisted });
});
afterEach(() => {
resetAutonomyManager();
});
describe('canAutoExecute', () => {
it('should return true for auto-approvable actions', () => {
const result = canAutoExecute('memory_save', 3);
expect(result.canProceed).toBe(true);
});
it('should return false for actions needing approval', () => {
const result = canAutoExecute('memory_delete', 1);
expect(result.canProceed).toBe(false);
});
});
describe('executeWithAutonomy', () => {
it('should execute auto-approved actions immediately', async () => {
const executor = vi.fn().mockResolvedValue('success');
const result = await executeWithAutonomy('memory_save', 3, executor);
expect(result.executed).toBe(true);
expect(result.result).toBe('success');
expect(executor).toHaveBeenCalled();
});
it('should not execute actions needing approval', async () => {
const executor = vi.fn().mockResolvedValue('success');
const result = await executeWithAutonomy('memory_delete', 1, executor);
expect(result.executed).toBe(false);
expect(executor).not.toHaveBeenCalled();
expect(result.approvalId).toBeDefined();
});
it('should call onApprovalNeeded callback', async () => {
const executor = vi.fn().mockResolvedValue('success');
const onApprovalNeeded = vi.fn();
await executeWithAutonomy('memory_delete', 1, executor, onApprovalNeeded);
expect(onApprovalNeeded).toHaveBeenCalled();
});
});
});