chore: fix session browser test and skip hook system tests (#14099)

This commit is contained in:
Jack Wotherspoon
2025-11-28 16:07:54 -05:00
committed by GitHub
parent 7a4280a482
commit 576fda18eb
2 changed files with 223 additions and 211 deletions

View File

@@ -260,15 +260,17 @@ echo '{
});
describe('AfterModel Hooks - LLM Response Modification', () => {
it('should modify LLM responses with AfterModel hooks', async () => {
await rig.setup('should modify LLM responses with AfterModel hooks', {
fakeResponsesPath: join(
import.meta.dirname,
'hooks-system.after-model.responses',
),
});
// Create a hook script that modifies the LLM response
const hookScript = `#!/bin/bash
it.skipIf(process.platform === 'win32')(
'should modify LLM responses with AfterModel hooks',
async () => {
await rig.setup('should modify LLM responses with AfterModel hooks', {
fakeResponsesPath: join(
import.meta.dirname,
'hooks-system.after-model.responses',
),
});
// Create a hook script that modifies the LLM response
const hookScript = `#!/bin/bash
echo '{
"hookSpecificOutput": {
"hookEventName": "AfterModel",
@@ -288,44 +290,45 @@ echo '{
}
}'`;
const scriptPath = join(rig.testDir!, 'after_model_hook.sh');
writeFileSync(scriptPath, hookScript);
const { execSync } = await import('node:child_process');
execSync(`chmod +x "${scriptPath}"`);
const scriptPath = join(rig.testDir!, 'after_model_hook.sh');
writeFileSync(scriptPath, hookScript);
const { execSync } = await import('node:child_process');
execSync(`chmod +x "${scriptPath}"`);
await rig.setup('should modify LLM responses with AfterModel hooks', {
settings: {
tools: {
enableHooks: true,
await rig.setup('should modify LLM responses with AfterModel hooks', {
settings: {
tools: {
enableHooks: true,
},
hooks: {
AfterModel: [
{
hooks: [
{
type: 'command',
command: scriptPath,
timeout: 5000,
},
],
},
],
},
},
hooks: {
AfterModel: [
{
hooks: [
{
type: 'command',
command: scriptPath,
timeout: 5000,
},
],
},
],
},
},
});
});
const prompt = 'What is 2 + 2?';
const result = await rig.run(prompt);
const prompt = 'What is 2 + 2?';
const result = await rig.run(prompt);
// The hook should have replaced the model response
expect(result).toContain(
'[FILTERED] Response has been filtered for security compliance',
);
// The hook should have replaced the model response
expect(result).toContain(
'[FILTERED] Response has been filtered for security compliance',
);
// Should generate hook telemetry
const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call');
expect(hookTelemetryFound).toBeTruthy();
});
// Should generate hook telemetry
const hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call');
expect(hookTelemetryFound).toBeTruthy();
},
);
});
describe('BeforeToolSelection Hooks - Tool Configuration', () => {
@@ -523,10 +526,12 @@ echo '{
describe('Sequential Hook Execution', () => {
// Note: This test checks telemetry for hook context in API requests,
// which behaves differently with mocked responses. Keeping real LLM calls.
it('should execute hooks sequentially when configured', async () => {
await rig.setup('should execute hooks sequentially when configured');
// Create two hooks that modify the input sequentially
const hook1Script = `#!/bin/bash
it.skipIf(process.platform === 'win32')(
'should execute hooks sequentially when configured',
async () => {
await rig.setup('should execute hooks sequentially when configured');
// Create two hooks that modify the input sequentially
const hook1Script = `#!/bin/bash
echo '{
"decision": "allow",
"hookSpecificOutput": {
@@ -535,7 +540,7 @@ echo '{
}
}'`;
const hook2Script = `#!/bin/bash
const hook2Script = `#!/bin/bash
echo '{
"decision": "allow",
"hookSpecificOutput": {
@@ -544,74 +549,75 @@ echo '{
}
}'`;
const script1Path = join(rig.testDir!, 'sequential_hook1.sh');
const script2Path = join(rig.testDir!, 'sequential_hook2.sh');
const script1Path = join(rig.testDir!, 'sequential_hook1.sh');
const script2Path = join(rig.testDir!, 'sequential_hook2.sh');
writeFileSync(script1Path, hook1Script);
writeFileSync(script2Path, hook2Script);
const { execSync } = await import('node:child_process');
execSync(`chmod +x "${script1Path}"`);
execSync(`chmod +x "${script2Path}"`);
writeFileSync(script1Path, hook1Script);
writeFileSync(script2Path, hook2Script);
const { execSync } = await import('node:child_process');
execSync(`chmod +x "${script1Path}"`);
execSync(`chmod +x "${script2Path}"`);
await rig.setup('should execute hooks sequentially when configured', {
settings: {
tools: {
enableHooks: true,
await rig.setup('should execute hooks sequentially when configured', {
settings: {
tools: {
enableHooks: true,
},
hooks: {
BeforeAgent: [
{
sequential: true,
hooks: [
{
type: 'command',
command: script1Path,
timeout: 5000,
},
{
type: 'command',
command: script2Path,
timeout: 5000,
},
],
},
],
},
},
hooks: {
BeforeAgent: [
{
sequential: true,
hooks: [
{
type: 'command',
command: script1Path,
timeout: 5000,
},
{
type: 'command',
command: script2Path,
timeout: 5000,
},
],
},
],
},
},
});
});
const prompt = 'Hello, please help me with a task';
await rig.run(prompt);
const prompt = 'Hello, please help me with a task';
await rig.run(prompt);
// Should generate hook telemetry
let hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call');
expect(hookTelemetryFound).toBeTruthy();
hookTelemetryFound = await rig.waitForTelemetryEvent('api_request');
const apiRequests = rig.readAllApiRequest();
const apiRequestsTexts = apiRequests
?.filter(
(request) =>
'attributes' in request &&
typeof request['attributes'] === 'object' &&
request['attributes'] !== null &&
'request_text' in request['attributes'] &&
typeof request['attributes']['request_text'] === 'string',
)
.map((request) => request['attributes']['request_text']);
expect(apiRequestsTexts).toBeDefined();
let hasBeforeAgentHookContext = false;
let hasAfterToolHookContext = false;
for (const requestText of apiRequestsTexts) {
if (requestText.includes('Step 1: Initial validation passed')) {
hasBeforeAgentHookContext = true;
// Should generate hook telemetry
let hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call');
expect(hookTelemetryFound).toBeTruthy();
hookTelemetryFound = await rig.waitForTelemetryEvent('api_request');
const apiRequests = rig.readAllApiRequest();
const apiRequestsTexts = apiRequests
?.filter(
(request) =>
'attributes' in request &&
typeof request['attributes'] === 'object' &&
request['attributes'] !== null &&
'request_text' in request['attributes'] &&
typeof request['attributes']['request_text'] === 'string',
)
.map((request) => request['attributes']['request_text']);
expect(apiRequestsTexts).toBeDefined();
let hasBeforeAgentHookContext = false;
let hasAfterToolHookContext = false;
for (const requestText of apiRequestsTexts) {
if (requestText.includes('Step 1: Initial validation passed')) {
hasBeforeAgentHookContext = true;
}
if (requestText.includes('Step 2: Security check completed')) {
hasAfterToolHookContext = true;
}
}
if (requestText.includes('Step 2: Security check completed')) {
hasAfterToolHookContext = true;
}
}
expect(hasBeforeAgentHookContext).toBeTruthy();
expect(hasAfterToolHookContext).toBeTruthy();
});
expect(hasBeforeAgentHookContext).toBeTruthy();
expect(hasAfterToolHookContext).toBeTruthy();
},
);
});
describe('Hook Input/Output Validation', () => {
@@ -682,124 +688,127 @@ fi`;
describe('Multiple Event Types', () => {
// Note: This test checks telemetry for hook context in API requests,
// which behaves differently with mocked responses. Keeping real LLM calls.
it('should handle hooks for all major event types', async () => {
await rig.setup('should handle hooks for all major event types');
// Create hook scripts for different events
const beforeToolScript = `#!/bin/bash
it.skipIf(process.platform === 'win32')(
'should handle hooks for all major event types',
async () => {
await rig.setup('should handle hooks for all major event types');
// Create hook scripts for different events
const beforeToolScript = `#!/bin/bash
echo '{"decision": "allow", "systemMessage": "BeforeTool: File operation logged"}'`;
const afterToolScript = `#!/bin/bash
const afterToolScript = `#!/bin/bash
echo '{"hookSpecificOutput": {"hookEventName": "AfterTool", "additionalContext": "AfterTool: Operation completed successfully"}}'`;
const beforeAgentScript = `#!/bin/bash
const beforeAgentScript = `#!/bin/bash
echo '{"decision": "allow", "hookSpecificOutput": {"hookEventName": "BeforeAgent", "additionalContext": "BeforeAgent: User request processed"}}'`;
const beforeToolPath = join(rig.testDir!, 'before_tool.sh');
const afterToolPath = join(rig.testDir!, 'after_tool.sh');
const beforeAgentPath = join(rig.testDir!, 'before_agent.sh');
const beforeToolPath = join(rig.testDir!, 'before_tool.sh');
const afterToolPath = join(rig.testDir!, 'after_tool.sh');
const beforeAgentPath = join(rig.testDir!, 'before_agent.sh');
writeFileSync(beforeToolPath, beforeToolScript);
writeFileSync(afterToolPath, afterToolScript);
writeFileSync(beforeAgentPath, beforeAgentScript);
writeFileSync(beforeToolPath, beforeToolScript);
writeFileSync(afterToolPath, afterToolScript);
writeFileSync(beforeAgentPath, beforeAgentScript);
const { execSync } = await import('node:child_process');
execSync(`chmod +x "${beforeToolPath}"`);
execSync(`chmod +x "${afterToolPath}"`);
execSync(`chmod +x "${beforeAgentPath}"`);
const { execSync } = await import('node:child_process');
execSync(`chmod +x "${beforeToolPath}"`);
execSync(`chmod +x "${afterToolPath}"`);
execSync(`chmod +x "${beforeAgentPath}"`);
await rig.setup('should handle hooks for all major event types', {
settings: {
tools: {
enableHooks: true,
await rig.setup('should handle hooks for all major event types', {
settings: {
tools: {
enableHooks: true,
},
hooks: {
BeforeAgent: [
{
hooks: [
{
type: 'command',
command: beforeAgentPath,
timeout: 5000,
},
],
},
],
BeforeTool: [
{
matcher: 'write_file',
hooks: [
{
type: 'command',
command: beforeToolPath,
timeout: 5000,
},
],
},
],
AfterTool: [
{
matcher: 'write_file',
hooks: [
{
type: 'command',
command: afterToolPath,
timeout: 5000,
},
],
},
],
},
},
hooks: {
BeforeAgent: [
{
hooks: [
{
type: 'command',
command: beforeAgentPath,
timeout: 5000,
},
],
},
],
BeforeTool: [
{
matcher: 'write_file',
hooks: [
{
type: 'command',
command: beforeToolPath,
timeout: 5000,
},
],
},
],
AfterTool: [
{
matcher: 'write_file',
hooks: [
{
type: 'command',
command: afterToolPath,
timeout: 5000,
},
],
},
],
},
},
});
});
const prompt =
'Create a file called multi-event-test.txt with content ' +
'"testing multiple events", and then please reply with ' +
'everything I say just after this:"';
const result = await rig.run(prompt);
const prompt =
'Create a file called multi-event-test.txt with content ' +
'"testing multiple events", and then please reply with ' +
'everything I say just after this:"';
const result = await rig.run(prompt);
// Should execute write_file tool
const foundWriteFile = await rig.waitForToolCall('write_file');
expect(foundWriteFile).toBeTruthy();
// Should execute write_file tool
const foundWriteFile = await rig.waitForToolCall('write_file');
expect(foundWriteFile).toBeTruthy();
// File should be created
const fileContent = rig.readFile('multi-event-test.txt');
expect(fileContent).toContain('testing multiple events');
// File should be created
const fileContent = rig.readFile('multi-event-test.txt');
expect(fileContent).toContain('testing multiple events');
// Result should contain context from all hooks
expect(result).toContain('BeforeTool: File operation logged');
// Result should contain context from all hooks
expect(result).toContain('BeforeTool: File operation logged');
// Should generate hook telemetry
let hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call');
expect(hookTelemetryFound).toBeTruthy();
hookTelemetryFound = await rig.waitForTelemetryEvent('api_request');
const apiRequests = rig.readAllApiRequest();
const apiRequestsTexts = apiRequests
?.filter(
(request) =>
'attributes' in request &&
typeof request['attributes'] === 'object' &&
request['attributes'] !== null &&
'request_text' in request['attributes'] &&
typeof request['attributes']['request_text'] === 'string',
)
.map((request) => request['attributes']['request_text']);
expect(apiRequestsTexts).toBeDefined();
let hasBeforeAgentHookContext = false;
let hasAfterToolHookContext = false;
for (const requestText of apiRequestsTexts) {
if (requestText.includes('BeforeAgent: User request processed')) {
hasBeforeAgentHookContext = true;
// Should generate hook telemetry
let hookTelemetryFound = await rig.waitForTelemetryEvent('hook_call');
expect(hookTelemetryFound).toBeTruthy();
hookTelemetryFound = await rig.waitForTelemetryEvent('api_request');
const apiRequests = rig.readAllApiRequest();
const apiRequestsTexts = apiRequests
?.filter(
(request) =>
'attributes' in request &&
typeof request['attributes'] === 'object' &&
request['attributes'] !== null &&
'request_text' in request['attributes'] &&
typeof request['attributes']['request_text'] === 'string',
)
.map((request) => request['attributes']['request_text']);
expect(apiRequestsTexts).toBeDefined();
let hasBeforeAgentHookContext = false;
let hasAfterToolHookContext = false;
for (const requestText of apiRequestsTexts) {
if (requestText.includes('BeforeAgent: User request processed')) {
hasBeforeAgentHookContext = true;
}
if (
requestText.includes('AfterTool: Operation completed successfully')
) {
hasAfterToolHookContext = true;
}
}
if (
requestText.includes('AfterTool: Operation completed successfully')
) {
hasAfterToolHookContext = true;
}
}
expect(hasBeforeAgentHookContext).toBeTruthy();
expect(hasAfterToolHookContext).toBeTruthy();
});
expect(hasBeforeAgentHookContext).toBeTruthy();
expect(hasAfterToolHookContext).toBeTruthy();
},
);
});
describe('Hook Error Handling', () => {