fix(core): ensure YOLO mode auto-approves complex shell commands when parsing fails (#17920)

This commit is contained in:
Abhi
2026-01-30 15:44:17 -05:00
committed by GitHub
parent a362b7b382
commit 76387d22ae
3 changed files with 117 additions and 3 deletions

View File

@@ -564,11 +564,17 @@ describe('run_shell_command', () => {
it('rejects invalid shell expressions', async () => {
await rig.setup('rejects invalid shell expressions', {
settings: { tools: { core: ['run_shell_command'] } },
settings: {
tools: {
core: ['run_shell_command'],
allowed: ['run_shell_command(echo)'], // Specifically allow echo
},
},
});
const invalidCommand = getInvalidCommand();
const result = await rig.run({
args: `I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
approvalMode: 'default', // Use default mode so safety fallback triggers confirmation
});
expect(result).toContain('FAIL');

View File

@@ -1398,6 +1398,100 @@ describe('PolicyEngine', () => {
});
});
describe('shell command parsing failure', () => {
it('should return ALLOW in YOLO mode even if shell command parsing fails', async () => {
const { splitCommands } = await import('../utils/shell-utils.js');
const rules: PolicyRule[] = [
{
decision: PolicyDecision.ALLOW,
priority: 999,
modes: [ApprovalMode.YOLO],
},
{
toolName: 'run_shell_command',
decision: PolicyDecision.ASK_USER,
priority: 10,
},
];
engine = new PolicyEngine({
rules,
approvalMode: ApprovalMode.YOLO,
});
// Simulate parsing failure (splitCommands returning empty array)
vi.mocked(splitCommands).mockReturnValueOnce([]);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'complex command' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ALLOW);
expect(result.rule).toBeDefined();
expect(result.rule?.priority).toBe(999);
});
it('should return DENY in YOLO mode if shell command parsing fails and a higher priority rule says DENY', async () => {
const { splitCommands } = await import('../utils/shell-utils.js');
const rules: PolicyRule[] = [
{
toolName: 'run_shell_command',
decision: PolicyDecision.DENY,
priority: 2000, // Very high priority DENY (e.g. Admin)
},
{
decision: PolicyDecision.ALLOW,
priority: 999,
modes: [ApprovalMode.YOLO],
},
];
engine = new PolicyEngine({
rules,
approvalMode: ApprovalMode.YOLO,
});
// Simulate parsing failure
vi.mocked(splitCommands).mockReturnValueOnce([]);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'complex command' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.DENY);
});
it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => {
const { splitCommands } = await import('../utils/shell-utils.js');
const rules: PolicyRule[] = [
{
toolName: 'run_shell_command',
decision: PolicyDecision.ALLOW,
priority: 20,
},
];
engine = new PolicyEngine({
rules,
approvalMode: ApprovalMode.DEFAULT,
});
// Simulate parsing failure
vi.mocked(splitCommands).mockReturnValueOnce([]);
const result = await engine.check(
{ name: 'run_shell_command', args: { command: 'complex command' } },
undefined,
);
expect(result.decision).toBe(PolicyDecision.ASK_USER);
expect(result.rule).toBeDefined();
expect(result.rule?.priority).toBe(20);
});
});
describe('safety checker integration', () => {
it('should call checker when rule allows and has safety_checker', async () => {
const rules: PolicyRule[] = [

View File

@@ -152,14 +152,28 @@ export class PolicyEngine {
const subCommands = splitCommands(command);
if (subCommands.length === 0) {
// If the matched rule says DENY, we should respect it immediately even if parsing fails.
if (ruleDecision === PolicyDecision.DENY) {
return { decision: PolicyDecision.DENY, rule };
}
// In YOLO mode, we should proceed anyway even if we can't parse the command.
if (this.approvalMode === ApprovalMode.YOLO) {
return {
decision: PolicyDecision.ALLOW,
rule,
};
}
debugLogger.debug(
`[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to ASK_USER.`,
);
// Parsing logic failed, we can't trust it. Force ASK_USER (or DENY).
// We don't blame a specific rule here, unless the input rule was stricter.
// We return the rule that matched so the evaluation loop terminates.
return {
decision: this.applyNonInteractiveMode(PolicyDecision.ASK_USER),
rule: undefined,
rule,
};
}