Implements L4 self-evolution authorization with: Autonomy Levels: - Supervised: All actions require user confirmation - Assisted: Low-risk actions auto-execute, high-risk need approval - Autonomous: Agent decides, only high-impact actions notify Features: - Risk-based action classification (low/medium/high) - Importance threshold for auto-approval - Approval workflow with pending queue - Full audit logging with rollback support - Configurable action permissions per level Security: - High-risk actions ALWAYS require confirmation - Self-modification disabled by default even in autonomous mode - All autonomous actions logged for audit - One-click rollback to any historical state Tests: 30 passing Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
365 lines
10 KiB
TypeScript
365 lines
10 KiB
TypeScript
/**
|
|
* AutonomyManager Tests - L4 Self-Evolution Authorization
|
|
*
|
|
* Tests for the tiered authorization system:
|
|
* - Level-based permissions (supervised/assisted/autonomous)
|
|
* - Risk assessment for actions
|
|
* - Approval workflow
|
|
* - Audit logging
|
|
* - Rollback functionality
|
|
*/
|
|
|
|
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
import {
|
|
AutonomyManager,
|
|
getAutonomyManager,
|
|
resetAutonomyManager,
|
|
canAutoExecute,
|
|
executeWithAutonomy,
|
|
DEFAULT_AUTONOMY_CONFIGS,
|
|
type ActionType,
|
|
type AutonomyLevel,
|
|
} from '../autonomy-manager';
|
|
|
|
// === Helper to create fresh manager ===
|
|
|
|
function createManager(level: AutonomyLevel = 'assisted'): AutonomyManager {
|
|
resetAutonomyManager();
|
|
return getAutonomyManager({ ...DEFAULT_AUTONOMY_CONFIGS[level] });
|
|
}
|
|
|
|
// === Risk Assessment Tests ===
|
|
|
|
describe('AutonomyManager Risk Assessment', () => {
|
|
let manager: AutonomyManager;
|
|
|
|
beforeEach(() => {
|
|
manager = createManager('assisted');
|
|
});
|
|
|
|
afterEach(() => {
|
|
resetAutonomyManager();
|
|
});
|
|
|
|
it('should classify memory_save as low risk', () => {
|
|
const decision = manager.evaluate('memory_save', { importance: 3 });
|
|
expect(decision.riskLevel).toBe('low');
|
|
});
|
|
|
|
it('should classify memory_delete as high risk', () => {
|
|
const decision = manager.evaluate('memory_delete');
|
|
expect(decision.riskLevel).toBe('high');
|
|
});
|
|
|
|
it('should classify identity_update as high risk', () => {
|
|
const decision = manager.evaluate('identity_update');
|
|
expect(decision.riskLevel).toBe('high');
|
|
});
|
|
|
|
it('should allow risk override', () => {
|
|
const decision = manager.evaluate('memory_save', { riskOverride: 'high' });
|
|
expect(decision.riskLevel).toBe('high');
|
|
});
|
|
});
|
|
|
|
// === Level-Based Permission Tests ===
|
|
|
|
describe('AutonomyManager Level Permissions', () => {
|
|
afterEach(() => {
|
|
resetAutonomyManager();
|
|
});
|
|
|
|
describe('Supervised Mode', () => {
|
|
let manager: AutonomyManager;
|
|
|
|
beforeEach(() => {
|
|
manager = createManager('supervised');
|
|
});
|
|
|
|
it('should require approval for all actions', () => {
|
|
const decision = manager.evaluate('memory_save', { importance: 1 });
|
|
expect(decision.requiresApproval).toBe(true);
|
|
expect(decision.allowed).toBe(false);
|
|
});
|
|
|
|
it('should not auto-execute even low-risk actions', () => {
|
|
const decision = manager.evaluate('reflection_run', { importance: 1 });
|
|
expect(decision.allowed).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('Assisted Mode', () => {
|
|
let manager: AutonomyManager;
|
|
|
|
beforeEach(() => {
|
|
manager = createManager('assisted');
|
|
});
|
|
|
|
it('should auto-approve low importance, low risk actions', () => {
|
|
const decision = manager.evaluate('memory_save', { importance: 3 });
|
|
expect(decision.allowed).toBe(true);
|
|
expect(decision.requiresApproval).toBe(false);
|
|
});
|
|
|
|
it('should require approval for high importance actions', () => {
|
|
const decision = manager.evaluate('memory_save', { importance: 8 });
|
|
expect(decision.requiresApproval).toBe(true);
|
|
});
|
|
|
|
it('should always require approval for high risk actions', () => {
|
|
const decision = manager.evaluate('memory_delete', { importance: 1 });
|
|
expect(decision.requiresApproval).toBe(true);
|
|
expect(decision.allowed).toBe(false);
|
|
});
|
|
|
|
it('should not auto-approve identity updates', () => {
|
|
const decision = manager.evaluate('identity_update', { importance: 3 });
|
|
expect(decision.allowed).toBe(false);
|
|
expect(decision.requiresApproval).toBe(true);
|
|
});
|
|
});
|
|
|
|
describe('Autonomous Mode', () => {
|
|
let manager: AutonomyManager;
|
|
|
|
beforeEach(() => {
|
|
manager = createManager('autonomous');
|
|
});
|
|
|
|
it('should auto-approve medium risk, medium importance actions', () => {
|
|
const decision = manager.evaluate('skill_install', { importance: 5 });
|
|
expect(decision.allowed).toBe(true);
|
|
expect(decision.requiresApproval).toBe(false);
|
|
});
|
|
|
|
it('should still require approval for high risk actions', () => {
|
|
const decision = manager.evaluate('memory_delete', { importance: 1 });
|
|
expect(decision.allowed).toBe(false);
|
|
expect(decision.requiresApproval).toBe(true);
|
|
});
|
|
|
|
it('should not auto-approve self-modification', () => {
|
|
// Even in autonomous mode, self-modification requires approval
|
|
manager.updateConfig({
|
|
allowedActions: {
|
|
...manager.getConfig().allowedActions,
|
|
selfModification: false,
|
|
},
|
|
});
|
|
|
|
const decision = manager.evaluate('identity_update', { importance: 3 });
|
|
expect(decision.allowed).toBe(false);
|
|
});
|
|
});
|
|
});
|
|
|
|
// === Approval Workflow Tests ===
|
|
|
|
describe('AutonomyManager Approval Workflow', () => {
|
|
let manager: AutonomyManager;
|
|
|
|
beforeEach(() => {
|
|
manager = createManager('supervised');
|
|
});
|
|
|
|
afterEach(() => {
|
|
resetAutonomyManager();
|
|
});
|
|
|
|
it('should request approval and return approval ID', () => {
|
|
const decision = manager.evaluate('memory_save');
|
|
const approvalId = manager.requestApproval(decision);
|
|
|
|
expect(approvalId).toMatch(/^approval_/);
|
|
expect(manager.getPendingApprovals().length).toBe(1);
|
|
});
|
|
|
|
it('should approve pending action', () => {
|
|
const decision = manager.evaluate('memory_save');
|
|
const approvalId = manager.requestApproval(decision);
|
|
|
|
const result = manager.approve(approvalId);
|
|
expect(result).toBe(true);
|
|
expect(manager.getPendingApprovals().length).toBe(0);
|
|
});
|
|
|
|
it('should reject pending action', () => {
|
|
const decision = manager.evaluate('memory_save');
|
|
const approvalId = manager.requestApproval(decision);
|
|
|
|
const result = manager.reject(approvalId);
|
|
expect(result).toBe(true);
|
|
expect(manager.getPendingApprovals().length).toBe(0);
|
|
});
|
|
|
|
it('should return false for non-existent approval', () => {
|
|
expect(manager.approve('non_existent')).toBe(false);
|
|
expect(manager.reject('non_existent')).toBe(false);
|
|
});
|
|
});
|
|
|
|
// === Audit Log Tests ===
|
|
|
|
describe('AutonomyManager Audit Log', () => {
|
|
let manager: AutonomyManager;
|
|
|
|
beforeEach(() => {
|
|
manager = createManager('assisted');
|
|
manager.clearAuditLog();
|
|
});
|
|
|
|
afterEach(() => {
|
|
resetAutonomyManager();
|
|
});
|
|
|
|
it('should log decisions', () => {
|
|
manager.evaluate('memory_save', { importance: 3 });
|
|
|
|
const log = manager.getAuditLog();
|
|
expect(log.length).toBe(1);
|
|
expect(log[0].action).toBe('memory_save');
|
|
});
|
|
|
|
it('should limit log to 100 entries', () => {
|
|
for (let i = 0; i < 150; i++) {
|
|
manager.evaluate('memory_save', { importance: i % 10 });
|
|
}
|
|
|
|
const log = manager.getAuditLog(200);
|
|
expect(log.length).toBe(100);
|
|
});
|
|
|
|
it('should clear audit log', () => {
|
|
manager.evaluate('memory_save');
|
|
manager.evaluate('reflection_run');
|
|
|
|
expect(manager.getAuditLog().length).toBe(2);
|
|
|
|
manager.clearAuditLog();
|
|
|
|
expect(manager.getAuditLog().length).toBe(0);
|
|
});
|
|
|
|
it('should support rollback', () => {
|
|
manager.evaluate('memory_save');
|
|
const log = manager.getAuditLog();
|
|
const entryId = log[0].id;
|
|
|
|
const result = manager.rollback(entryId);
|
|
|
|
expect(result).toBe(true);
|
|
|
|
const updatedLog = manager.getAuditLog();
|
|
expect(updatedLog[0].outcome).toBe('rolled_back');
|
|
expect(updatedLog[0].rolledBackAt).toBeDefined();
|
|
});
|
|
|
|
it('should not allow double rollback', () => {
|
|
manager.evaluate('memory_save');
|
|
const log = manager.getAuditLog();
|
|
const entryId = log[0].id;
|
|
|
|
manager.rollback(entryId);
|
|
const result = manager.rollback(entryId);
|
|
|
|
expect(result).toBe(false);
|
|
});
|
|
});
|
|
|
|
// === Config Management Tests ===
|
|
|
|
describe('AutonomyManager Config Management', () => {
|
|
let manager: AutonomyManager;
|
|
|
|
beforeEach(() => {
|
|
manager = createManager('assisted');
|
|
});
|
|
|
|
afterEach(() => {
|
|
resetAutonomyManager();
|
|
});
|
|
|
|
it('should get current config', () => {
|
|
const config = manager.getConfig();
|
|
expect(config.level).toBe('assisted');
|
|
expect(config.allowedActions.memoryAutoSave).toBe(true);
|
|
});
|
|
|
|
it('should update config', () => {
|
|
manager.updateConfig({
|
|
approvalThreshold: {
|
|
importanceMax: 8,
|
|
riskMax: 'medium',
|
|
},
|
|
});
|
|
|
|
const config = manager.getConfig();
|
|
expect(config.approvalThreshold.importanceMax).toBe(8);
|
|
});
|
|
|
|
it('should change level', () => {
|
|
manager.setLevel('autonomous');
|
|
|
|
const config = manager.getConfig();
|
|
expect(config.level).toBe('autonomous');
|
|
expect(config.allowedActions.memoryAutoSave).toBe(true);
|
|
expect(config.allowedActions.identityAutoUpdate).toBe(true);
|
|
});
|
|
});
|
|
|
|
// === Helper Function Tests ===
|
|
|
|
describe('Helper Functions', () => {
|
|
beforeEach(() => {
|
|
resetAutonomyManager();
|
|
getAutonomyManager({ ...DEFAULT_AUTONOMY_CONFIGS.assisted });
|
|
});
|
|
|
|
afterEach(() => {
|
|
resetAutonomyManager();
|
|
});
|
|
|
|
describe('canAutoExecute', () => {
|
|
it('should return true for auto-approvable actions', () => {
|
|
const result = canAutoExecute('memory_save', 3);
|
|
expect(result.canProceed).toBe(true);
|
|
});
|
|
|
|
it('should return false for actions needing approval', () => {
|
|
const result = canAutoExecute('memory_delete', 1);
|
|
expect(result.canProceed).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('executeWithAutonomy', () => {
|
|
it('should execute auto-approved actions immediately', async () => {
|
|
const executor = vi.fn().mockResolvedValue('success');
|
|
|
|
const result = await executeWithAutonomy('memory_save', 3, executor);
|
|
|
|
expect(result.executed).toBe(true);
|
|
expect(result.result).toBe('success');
|
|
expect(executor).toHaveBeenCalled();
|
|
});
|
|
|
|
it('should not execute actions needing approval', async () => {
|
|
const executor = vi.fn().mockResolvedValue('success');
|
|
|
|
const result = await executeWithAutonomy('memory_delete', 1, executor);
|
|
|
|
expect(result.executed).toBe(false);
|
|
expect(executor).not.toHaveBeenCalled();
|
|
expect(result.approvalId).toBeDefined();
|
|
});
|
|
|
|
it('should call onApprovalNeeded callback', async () => {
|
|
const executor = vi.fn().mockResolvedValue('success');
|
|
const onApprovalNeeded = vi.fn();
|
|
|
|
await executeWithAutonomy('memory_delete', 1, executor, onApprovalNeeded);
|
|
|
|
expect(onApprovalNeeded).toHaveBeenCalled();
|
|
});
|
|
});
|
|
});
|