mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-02-01 14:44:29 +00:00
fix(core): ensure YOLO mode auto-approves complex shell commands when parsing fails (#17920)
This commit is contained in:
@@ -564,11 +564,17 @@ describe('run_shell_command', () => {
|
||||
|
||||
it('rejects invalid shell expressions', async () => {
|
||||
await rig.setup('rejects invalid shell expressions', {
|
||||
settings: { tools: { core: ['run_shell_command'] } },
|
||||
settings: {
|
||||
tools: {
|
||||
core: ['run_shell_command'],
|
||||
allowed: ['run_shell_command(echo)'], // Specifically allow echo
|
||||
},
|
||||
},
|
||||
});
|
||||
const invalidCommand = getInvalidCommand();
|
||||
const result = await rig.run({
|
||||
args: `I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
|
||||
approvalMode: 'default', // Use default mode so safety fallback triggers confirmation
|
||||
});
|
||||
expect(result).toContain('FAIL');
|
||||
|
||||
|
||||
@@ -1398,6 +1398,100 @@ describe('PolicyEngine', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('shell command parsing failure', () => {
|
||||
it('should return ALLOW in YOLO mode even if shell command parsing fails', async () => {
|
||||
const { splitCommands } = await import('../utils/shell-utils.js');
|
||||
const rules: PolicyRule[] = [
|
||||
{
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 999,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
},
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
decision: PolicyDecision.ASK_USER,
|
||||
priority: 10,
|
||||
},
|
||||
];
|
||||
|
||||
engine = new PolicyEngine({
|
||||
rules,
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
});
|
||||
|
||||
// Simulate parsing failure (splitCommands returning empty array)
|
||||
vi.mocked(splitCommands).mockReturnValueOnce([]);
|
||||
|
||||
const result = await engine.check(
|
||||
{ name: 'run_shell_command', args: { command: 'complex command' } },
|
||||
undefined,
|
||||
);
|
||||
|
||||
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||
expect(result.rule).toBeDefined();
|
||||
expect(result.rule?.priority).toBe(999);
|
||||
});
|
||||
|
||||
it('should return DENY in YOLO mode if shell command parsing fails and a higher priority rule says DENY', async () => {
|
||||
const { splitCommands } = await import('../utils/shell-utils.js');
|
||||
const rules: PolicyRule[] = [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
decision: PolicyDecision.DENY,
|
||||
priority: 2000, // Very high priority DENY (e.g. Admin)
|
||||
},
|
||||
{
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 999,
|
||||
modes: [ApprovalMode.YOLO],
|
||||
},
|
||||
];
|
||||
|
||||
engine = new PolicyEngine({
|
||||
rules,
|
||||
approvalMode: ApprovalMode.YOLO,
|
||||
});
|
||||
|
||||
// Simulate parsing failure
|
||||
vi.mocked(splitCommands).mockReturnValueOnce([]);
|
||||
|
||||
const result = await engine.check(
|
||||
{ name: 'run_shell_command', args: { command: 'complex command' } },
|
||||
undefined,
|
||||
);
|
||||
|
||||
expect(result.decision).toBe(PolicyDecision.DENY);
|
||||
});
|
||||
|
||||
it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => {
|
||||
const { splitCommands } = await import('../utils/shell-utils.js');
|
||||
const rules: PolicyRule[] = [
|
||||
{
|
||||
toolName: 'run_shell_command',
|
||||
decision: PolicyDecision.ALLOW,
|
||||
priority: 20,
|
||||
},
|
||||
];
|
||||
|
||||
engine = new PolicyEngine({
|
||||
rules,
|
||||
approvalMode: ApprovalMode.DEFAULT,
|
||||
});
|
||||
|
||||
// Simulate parsing failure
|
||||
vi.mocked(splitCommands).mockReturnValueOnce([]);
|
||||
|
||||
const result = await engine.check(
|
||||
{ name: 'run_shell_command', args: { command: 'complex command' } },
|
||||
undefined,
|
||||
);
|
||||
|
||||
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||
expect(result.rule).toBeDefined();
|
||||
expect(result.rule?.priority).toBe(20);
|
||||
});
|
||||
});
|
||||
|
||||
describe('safety checker integration', () => {
|
||||
it('should call checker when rule allows and has safety_checker', async () => {
|
||||
const rules: PolicyRule[] = [
|
||||
|
||||
@@ -152,14 +152,28 @@ export class PolicyEngine {
|
||||
const subCommands = splitCommands(command);
|
||||
|
||||
if (subCommands.length === 0) {
|
||||
// If the matched rule says DENY, we should respect it immediately even if parsing fails.
|
||||
if (ruleDecision === PolicyDecision.DENY) {
|
||||
return { decision: PolicyDecision.DENY, rule };
|
||||
}
|
||||
|
||||
// In YOLO mode, we should proceed anyway even if we can't parse the command.
|
||||
if (this.approvalMode === ApprovalMode.YOLO) {
|
||||
return {
|
||||
decision: PolicyDecision.ALLOW,
|
||||
rule,
|
||||
};
|
||||
}
|
||||
|
||||
debugLogger.debug(
|
||||
`[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to ASK_USER.`,
|
||||
);
|
||||
|
||||
// Parsing logic failed, we can't trust it. Force ASK_USER (or DENY).
|
||||
// We don't blame a specific rule here, unless the input rule was stricter.
|
||||
// We return the rule that matched so the evaluation loop terminates.
|
||||
return {
|
||||
decision: this.applyNonInteractiveMode(PolicyDecision.ASK_USER),
|
||||
rule: undefined,
|
||||
rule,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user