mirror of
https://github.com/openai/codex.git
synced 2026-05-29 15:30:22 +00:00
shell-command: reuse a PowerShell parser process on Windows (#16057)
## Why `//codex-rs/shell-command:shell-command-unit-tests` became a real bottleneck in the Windows Bazel lane because repeated calls to `is_safe_command_windows()` were starting a fresh PowerShell parser process for every `powershell.exe -Command ...` assertion. PR #16056 was motivated by that same bottleneck, but its test-only shortcut was the wrong layer to optimize because it weakened the end-to-end guarantee that our runtime path really asks PowerShell to parse the command the way we expect. This PR attacks the actual cost center instead: it keeps the real PowerShell parser in the loop, but turns that parser into a long-lived helper process so both tests and the runtime safe-command path can reuse it across many requests. ## What Changed - add `shell-command/src/command_safety/powershell_parser.rs`, which keeps one mutex-protected parser process per PowerShell executable path and speaks a simple JSON-over-stdio request/response protocol - turn `shell-command/src/command_safety/powershell_parser.ps1` into a long-running parser server with comments explaining the protocol, the AST-shape restrictions, and why unsupported constructs are rejected conservatively - keep request ids and a one-time respawn path so a dead or desynchronized cached child fails closed instead of silently returning mixed parser output - preserve separate parser processes for `powershell.exe` and `pwsh.exe`, since they do not accept the same language surface - avoid a direct `PipelineChainAst` type reference in the PowerShell script so the parser service still runs under Windows PowerShell 5.1 as well as newer `pwsh` - make `shell-command/src/command_safety/windows_safe_commands.rs` delegate to the new parser utility instead of spawning a fresh PowerShell process for every parse - add a Windows-only unit test that exercises multiple sequential requests against the same parser process ## Testing - adds a Windows-only parser-reuse unit test in `powershell_parser.rs` - the main end-to-end verification for this change is the Windows CI lane, because the new service depends on real `powershell.exe` / `pwsh.exe` behavior
This commit is contained in:
@@ -1,44 +1,98 @@
|
||||
$ErrorActionPreference = 'Stop'
|
||||
$ProgressPreference = 'SilentlyContinue'
|
||||
|
||||
$payload = $env:CODEX_POWERSHELL_PAYLOAD
|
||||
if ([string]::IsNullOrEmpty($payload)) {
|
||||
Write-Output '{"status":"parse_failed"}'
|
||||
exit 0
|
||||
}
|
||||
# Long-lived PowerShell AST parser used by the Rust command-safety layer on Windows.
|
||||
# The caller starts one child process per PowerShell executable variant and then sends
|
||||
# newline-delimited JSON requests over stdin:
|
||||
# { "id": <u64>, "payload": "<base64-encoded UTF-16LE script>" }
|
||||
# We answer with one compact JSON line per request:
|
||||
# { "id": <same>, "status": "ok", "commands": [["Get-Content", "foo.txt"]] }
|
||||
# or:
|
||||
# { "id": <same>, "status": "parse_failed" | "parse_errors" | "unsupported" }
|
||||
#
|
||||
# "unsupported" is intentional: it means the script parsed successfully, but the AST
|
||||
# included constructs that we conservatively refuse to lower into argv-like command words.
|
||||
# The Rust side treats that the same way as an unsafe command.
|
||||
|
||||
try {
|
||||
$source =
|
||||
[System.Text.Encoding]::Unicode.GetString(
|
||||
[System.Convert]::FromBase64String($payload)
|
||||
# Use BOM-free UTF-8 on the protocol stream so Rust sees clean JSON lines with no
|
||||
# leading BOM bytes on the first response.
|
||||
$utf8 = [System.Text.UTF8Encoding]::new($false)
|
||||
$stdin = [System.IO.StreamReader]::new([Console]::OpenStandardInput(), $utf8, $false)
|
||||
$stdout = [System.IO.StreamWriter]::new([Console]::OpenStandardOutput(), $utf8)
|
||||
$stdout.AutoFlush = $true
|
||||
|
||||
function Invoke-ParseRequest {
|
||||
param($RequestId, $Source)
|
||||
|
||||
$tokens = $null
|
||||
$errors = $null
|
||||
|
||||
$ast = $null
|
||||
try {
|
||||
$ast = [System.Management.Automation.Language.Parser]::ParseInput(
|
||||
$Source,
|
||||
[ref]$tokens,
|
||||
[ref]$errors
|
||||
)
|
||||
} catch {
|
||||
Write-Output '{"status":"parse_failed"}'
|
||||
exit 0
|
||||
} catch {
|
||||
return @{ id = $RequestId; status = 'parse_failed' }
|
||||
}
|
||||
|
||||
if ($errors.Count -gt 0) {
|
||||
return @{ id = $RequestId; status = 'parse_errors' }
|
||||
}
|
||||
|
||||
# Only accept AST shapes we can flatten into a list of argv-like command words.
|
||||
# Anything more dynamic than that becomes "unsupported" instead of being guessed at.
|
||||
$commands = [System.Collections.ArrayList]::new()
|
||||
|
||||
foreach ($statement in $ast.EndBlock.Statements) {
|
||||
if (-not (Add-CommandsFromPipelineBase $statement $commands)) {
|
||||
$commands = $null
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if ($commands -ne $null) {
|
||||
$normalized = [System.Collections.ArrayList]::new()
|
||||
foreach ($cmd in $commands) {
|
||||
# Convert every successful parse result to an array-of-arrays shape so the Rust
|
||||
# side can deserialize one uniform representation.
|
||||
if ($cmd -is [string]) {
|
||||
$null = $normalized.Add(@($cmd))
|
||||
continue
|
||||
}
|
||||
|
||||
if ($cmd -is [System.Array] -or $cmd -is [System.Collections.IEnumerable]) {
|
||||
$null = $normalized.Add(@($cmd))
|
||||
continue
|
||||
}
|
||||
|
||||
$normalized = $null
|
||||
break
|
||||
}
|
||||
|
||||
$commands = $normalized
|
||||
}
|
||||
|
||||
if ($commands -eq $null) {
|
||||
return @{ id = $RequestId; status = 'unsupported' }
|
||||
}
|
||||
|
||||
return @{ id = $RequestId; status = 'ok'; commands = $commands }
|
||||
}
|
||||
|
||||
$tokens = $null
|
||||
$errors = $null
|
||||
function Write-Response {
|
||||
param($Response)
|
||||
|
||||
$ast = $null
|
||||
try {
|
||||
$ast = [System.Management.Automation.Language.Parser]::ParseInput(
|
||||
$source,
|
||||
[ref]$tokens,
|
||||
[ref]$errors
|
||||
)
|
||||
} catch {
|
||||
Write-Output '{"status":"parse_failed"}'
|
||||
exit 0
|
||||
}
|
||||
|
||||
if ($errors.Count -gt 0) {
|
||||
Write-Output '{"status":"parse_errors"}'
|
||||
exit 0
|
||||
$stdout.WriteLine(($Response | ConvertTo-Json -Compress -Depth 3))
|
||||
}
|
||||
|
||||
function Convert-CommandElement {
|
||||
param($element)
|
||||
|
||||
# Accept only literal-ish command elements. Variable expansion, subexpressions, splats,
|
||||
# and other dynamic forms return $null so the whole request becomes unsupported.
|
||||
if ($element -is [System.Management.Automation.Language.StringConstantExpressionAst]) {
|
||||
return @($element.Value)
|
||||
}
|
||||
@@ -77,6 +131,8 @@ function Convert-PipelineElement {
|
||||
param($element)
|
||||
|
||||
if ($element -is [System.Management.Automation.Language.CommandAst]) {
|
||||
# Redirections and invocation operators make the command harder to classify safely,
|
||||
# so reject them rather than trying to normalize them.
|
||||
if ($element.Redirections.Count -gt 0) {
|
||||
return $null
|
||||
}
|
||||
@@ -104,6 +160,8 @@ function Convert-PipelineElement {
|
||||
return $null
|
||||
}
|
||||
|
||||
# Allow a parenthesized single pipeline element like "(Get-Content foo.rs -Raw)" so
|
||||
# the caller still sees the inner command words. More complex expressions stay unsupported.
|
||||
if ($element.Expression -is [System.Management.Automation.Language.ParenExpressionAst]) {
|
||||
$innerPipeline = $element.Expression.Pipeline
|
||||
if ($innerPipeline -and $innerPipeline.PipelineElements.Count -eq 1) {
|
||||
@@ -156,46 +214,44 @@ function Add-CommandsFromPipelineBase {
|
||||
return Add-CommandsFromPipelineAst $pipeline $commands
|
||||
}
|
||||
|
||||
if ($pipeline -is [System.Management.Automation.Language.PipelineChainAst]) {
|
||||
# Windows PowerShell 5.1 does not define PipelineChainAst, so avoid a direct type
|
||||
# reference here and instead check the runtime type name.
|
||||
if ($pipeline.GetType().FullName -eq 'System.Management.Automation.Language.PipelineChainAst') {
|
||||
return Add-CommandsFromPipelineChain $pipeline $commands
|
||||
}
|
||||
|
||||
return $false
|
||||
}
|
||||
|
||||
$commands = [System.Collections.ArrayList]::new()
|
||||
|
||||
foreach ($statement in $ast.EndBlock.Statements) {
|
||||
if (-not (Add-CommandsFromPipelineBase $statement $commands)) {
|
||||
$commands = $null
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if ($commands -ne $null) {
|
||||
$normalized = [System.Collections.ArrayList]::new()
|
||||
foreach ($cmd in $commands) {
|
||||
if ($cmd -is [string]) {
|
||||
$null = $normalized.Add(@($cmd))
|
||||
continue
|
||||
}
|
||||
|
||||
if ($cmd -is [System.Array] -or $cmd -is [System.Collections.IEnumerable]) {
|
||||
$null = $normalized.Add(@($cmd))
|
||||
continue
|
||||
}
|
||||
|
||||
$normalized = $null
|
||||
break
|
||||
# This script stays alive so the Rust caller can amortize PowerShell startup across
|
||||
# many parse requests. Each request and response is one compact JSON line.
|
||||
while (($requestLine = $stdin.ReadLine()) -ne $null) {
|
||||
$request = $null
|
||||
try {
|
||||
$request = $requestLine | ConvertFrom-Json
|
||||
} catch {
|
||||
Write-Response @{ id = $null; status = 'parse_failed' }
|
||||
continue
|
||||
}
|
||||
|
||||
$commands = $normalized
|
||||
}
|
||||
# We process requests serially, but still echo the id back so the Rust side can
|
||||
# detect protocol desyncs instead of silently trusting mixed stdout.
|
||||
$requestId = $request.id
|
||||
$payload = $request.payload
|
||||
if ([string]::IsNullOrEmpty($payload)) {
|
||||
Write-Response @{ id = $requestId; status = 'parse_failed' }
|
||||
continue
|
||||
}
|
||||
|
||||
$result = if ($commands -eq $null) {
|
||||
@{ status = 'unsupported' }
|
||||
} else {
|
||||
@{ status = 'ok'; commands = $commands }
|
||||
}
|
||||
try {
|
||||
$source =
|
||||
[System.Text.Encoding]::Unicode.GetString(
|
||||
[System.Convert]::FromBase64String($payload)
|
||||
)
|
||||
} catch {
|
||||
Write-Response @{ id = $requestId; status = 'parse_failed' }
|
||||
continue
|
||||
}
|
||||
|
||||
,$result | ConvertTo-Json -Depth 3
|
||||
Write-Response (Invoke-ParseRequest -RequestId $requestId -Source $source)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user