diff --git a/integration-tests/run_shell_command.test.ts b/integration-tests/run_shell_command.test.ts index 1664870cfb..027f4cba8d 100644 --- a/integration-tests/run_shell_command.test.ts +++ b/integration-tests/run_shell_command.test.ts @@ -564,11 +564,17 @@ describe('run_shell_command', () => { it('rejects invalid shell expressions', async () => { await rig.setup('rejects invalid shell expressions', { - settings: { tools: { core: ['run_shell_command'] } }, + settings: { + tools: { + core: ['run_shell_command'], + allowed: ['run_shell_command(echo)'], // Specifically allow echo + }, + }, }); const invalidCommand = getInvalidCommand(); const result = await rig.run({ args: `I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`, + approvalMode: 'default', // Use default mode so safety fallback triggers confirmation }); expect(result).toContain('FAIL'); diff --git a/packages/core/src/policy/policy-engine.test.ts b/packages/core/src/policy/policy-engine.test.ts index 782123cfb3..495ca5d145 100644 --- a/packages/core/src/policy/policy-engine.test.ts +++ b/packages/core/src/policy/policy-engine.test.ts @@ -1398,6 +1398,100 @@ describe('PolicyEngine', () => { }); }); + describe('shell command parsing failure', () => { + it('should return ALLOW in YOLO mode even if shell command parsing fails', async () => { + const { splitCommands } = await import('../utils/shell-utils.js'); + const rules: PolicyRule[] = [ + { + decision: PolicyDecision.ALLOW, + priority: 999, + modes: [ApprovalMode.YOLO], + }, + { + toolName: 'run_shell_command', + decision: PolicyDecision.ASK_USER, + priority: 10, + }, + ]; + + engine = new PolicyEngine({ + rules, + approvalMode: ApprovalMode.YOLO, + }); + + // Simulate parsing failure (splitCommands returning empty array) + vi.mocked(splitCommands).mockReturnValueOnce([]); + + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'complex command' } }, + undefined, + ); + + expect(result.decision).toBe(PolicyDecision.ALLOW); + expect(result.rule).toBeDefined(); + expect(result.rule?.priority).toBe(999); + }); + + it('should return DENY in YOLO mode if shell command parsing fails and a higher priority rule says DENY', async () => { + const { splitCommands } = await import('../utils/shell-utils.js'); + const rules: PolicyRule[] = [ + { + toolName: 'run_shell_command', + decision: PolicyDecision.DENY, + priority: 2000, // Very high priority DENY (e.g. Admin) + }, + { + decision: PolicyDecision.ALLOW, + priority: 999, + modes: [ApprovalMode.YOLO], + }, + ]; + + engine = new PolicyEngine({ + rules, + approvalMode: ApprovalMode.YOLO, + }); + + // Simulate parsing failure + vi.mocked(splitCommands).mockReturnValueOnce([]); + + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'complex command' } }, + undefined, + ); + + expect(result.decision).toBe(PolicyDecision.DENY); + }); + + it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => { + const { splitCommands } = await import('../utils/shell-utils.js'); + const rules: PolicyRule[] = [ + { + toolName: 'run_shell_command', + decision: PolicyDecision.ALLOW, + priority: 20, + }, + ]; + + engine = new PolicyEngine({ + rules, + approvalMode: ApprovalMode.DEFAULT, + }); + + // Simulate parsing failure + vi.mocked(splitCommands).mockReturnValueOnce([]); + + const result = await engine.check( + { name: 'run_shell_command', args: { command: 'complex command' } }, + undefined, + ); + + expect(result.decision).toBe(PolicyDecision.ASK_USER); + expect(result.rule).toBeDefined(); + expect(result.rule?.priority).toBe(20); + }); + }); + describe('safety checker integration', () => { it('should call checker when rule allows and has safety_checker', async () => { const rules: PolicyRule[] = [ diff --git a/packages/core/src/policy/policy-engine.ts b/packages/core/src/policy/policy-engine.ts index 656a17c109..d617d3d75c 100644 --- a/packages/core/src/policy/policy-engine.ts +++ b/packages/core/src/policy/policy-engine.ts @@ -152,14 +152,28 @@ export class PolicyEngine { const subCommands = splitCommands(command); if (subCommands.length === 0) { + // If the matched rule says DENY, we should respect it immediately even if parsing fails. + if (ruleDecision === PolicyDecision.DENY) { + return { decision: PolicyDecision.DENY, rule }; + } + + // In YOLO mode, we should proceed anyway even if we can't parse the command. + if (this.approvalMode === ApprovalMode.YOLO) { + return { + decision: PolicyDecision.ALLOW, + rule, + }; + } + debugLogger.debug( `[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to ASK_USER.`, ); + // Parsing logic failed, we can't trust it. Force ASK_USER (or DENY). - // We don't blame a specific rule here, unless the input rule was stricter. + // We return the rule that matched so the evaluation loop terminates. return { decision: this.applyNonInteractiveMode(PolicyDecision.ASK_USER), - rule: undefined, + rule, }; }