mirror of
https://github.com/google-gemini/gemini-cli.git
synced 2026-02-01 22:48:03 +00:00
fix(core): ensure YOLO mode auto-approves complex shell commands when parsing fails (#17920)
This commit is contained in:
@@ -564,11 +564,17 @@ describe('run_shell_command', () => {
|
|||||||
|
|
||||||
it('rejects invalid shell expressions', async () => {
|
it('rejects invalid shell expressions', async () => {
|
||||||
await rig.setup('rejects invalid shell expressions', {
|
await rig.setup('rejects invalid shell expressions', {
|
||||||
settings: { tools: { core: ['run_shell_command'] } },
|
settings: {
|
||||||
|
tools: {
|
||||||
|
core: ['run_shell_command'],
|
||||||
|
allowed: ['run_shell_command(echo)'], // Specifically allow echo
|
||||||
|
},
|
||||||
|
},
|
||||||
});
|
});
|
||||||
const invalidCommand = getInvalidCommand();
|
const invalidCommand = getInvalidCommand();
|
||||||
const result = await rig.run({
|
const result = await rig.run({
|
||||||
args: `I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
|
args: `I am testing the error handling of the run_shell_command tool. Please attempt to run the following command, which I know has invalid syntax: \`${invalidCommand}\`. If the command fails as expected, please return the word FAIL, otherwise return the word SUCCESS.`,
|
||||||
|
approvalMode: 'default', // Use default mode so safety fallback triggers confirmation
|
||||||
});
|
});
|
||||||
expect(result).toContain('FAIL');
|
expect(result).toContain('FAIL');
|
||||||
|
|
||||||
|
|||||||
@@ -1398,6 +1398,100 @@ describe('PolicyEngine', () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('shell command parsing failure', () => {
|
||||||
|
it('should return ALLOW in YOLO mode even if shell command parsing fails', async () => {
|
||||||
|
const { splitCommands } = await import('../utils/shell-utils.js');
|
||||||
|
const rules: PolicyRule[] = [
|
||||||
|
{
|
||||||
|
decision: PolicyDecision.ALLOW,
|
||||||
|
priority: 999,
|
||||||
|
modes: [ApprovalMode.YOLO],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
toolName: 'run_shell_command',
|
||||||
|
decision: PolicyDecision.ASK_USER,
|
||||||
|
priority: 10,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
engine = new PolicyEngine({
|
||||||
|
rules,
|
||||||
|
approvalMode: ApprovalMode.YOLO,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Simulate parsing failure (splitCommands returning empty array)
|
||||||
|
vi.mocked(splitCommands).mockReturnValueOnce([]);
|
||||||
|
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'complex command' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ALLOW);
|
||||||
|
expect(result.rule).toBeDefined();
|
||||||
|
expect(result.rule?.priority).toBe(999);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return DENY in YOLO mode if shell command parsing fails and a higher priority rule says DENY', async () => {
|
||||||
|
const { splitCommands } = await import('../utils/shell-utils.js');
|
||||||
|
const rules: PolicyRule[] = [
|
||||||
|
{
|
||||||
|
toolName: 'run_shell_command',
|
||||||
|
decision: PolicyDecision.DENY,
|
||||||
|
priority: 2000, // Very high priority DENY (e.g. Admin)
|
||||||
|
},
|
||||||
|
{
|
||||||
|
decision: PolicyDecision.ALLOW,
|
||||||
|
priority: 999,
|
||||||
|
modes: [ApprovalMode.YOLO],
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
engine = new PolicyEngine({
|
||||||
|
rules,
|
||||||
|
approvalMode: ApprovalMode.YOLO,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Simulate parsing failure
|
||||||
|
vi.mocked(splitCommands).mockReturnValueOnce([]);
|
||||||
|
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'complex command' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.decision).toBe(PolicyDecision.DENY);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return ASK_USER in non-YOLO mode if shell command parsing fails', async () => {
|
||||||
|
const { splitCommands } = await import('../utils/shell-utils.js');
|
||||||
|
const rules: PolicyRule[] = [
|
||||||
|
{
|
||||||
|
toolName: 'run_shell_command',
|
||||||
|
decision: PolicyDecision.ALLOW,
|
||||||
|
priority: 20,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
engine = new PolicyEngine({
|
||||||
|
rules,
|
||||||
|
approvalMode: ApprovalMode.DEFAULT,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Simulate parsing failure
|
||||||
|
vi.mocked(splitCommands).mockReturnValueOnce([]);
|
||||||
|
|
||||||
|
const result = await engine.check(
|
||||||
|
{ name: 'run_shell_command', args: { command: 'complex command' } },
|
||||||
|
undefined,
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result.decision).toBe(PolicyDecision.ASK_USER);
|
||||||
|
expect(result.rule).toBeDefined();
|
||||||
|
expect(result.rule?.priority).toBe(20);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe('safety checker integration', () => {
|
describe('safety checker integration', () => {
|
||||||
it('should call checker when rule allows and has safety_checker', async () => {
|
it('should call checker when rule allows and has safety_checker', async () => {
|
||||||
const rules: PolicyRule[] = [
|
const rules: PolicyRule[] = [
|
||||||
|
|||||||
@@ -152,14 +152,28 @@ export class PolicyEngine {
|
|||||||
const subCommands = splitCommands(command);
|
const subCommands = splitCommands(command);
|
||||||
|
|
||||||
if (subCommands.length === 0) {
|
if (subCommands.length === 0) {
|
||||||
|
// If the matched rule says DENY, we should respect it immediately even if parsing fails.
|
||||||
|
if (ruleDecision === PolicyDecision.DENY) {
|
||||||
|
return { decision: PolicyDecision.DENY, rule };
|
||||||
|
}
|
||||||
|
|
||||||
|
// In YOLO mode, we should proceed anyway even if we can't parse the command.
|
||||||
|
if (this.approvalMode === ApprovalMode.YOLO) {
|
||||||
|
return {
|
||||||
|
decision: PolicyDecision.ALLOW,
|
||||||
|
rule,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
debugLogger.debug(
|
debugLogger.debug(
|
||||||
`[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to ASK_USER.`,
|
`[PolicyEngine.check] Command parsing failed for: ${command}. Falling back to ASK_USER.`,
|
||||||
);
|
);
|
||||||
|
|
||||||
// Parsing logic failed, we can't trust it. Force ASK_USER (or DENY).
|
// Parsing logic failed, we can't trust it. Force ASK_USER (or DENY).
|
||||||
// We don't blame a specific rule here, unless the input rule was stricter.
|
// We return the rule that matched so the evaluation loop terminates.
|
||||||
return {
|
return {
|
||||||
decision: this.applyNonInteractiveMode(PolicyDecision.ASK_USER),
|
decision: this.applyNonInteractiveMode(PolicyDecision.ASK_USER),
|
||||||
rule: undefined,
|
rule,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user