mirror of
https://github.com/openai/codex.git
synced 2026-05-16 17:23:57 +00:00
## Why `//codex-rs/shell-command:shell-command-unit-tests` became a real bottleneck in the Windows Bazel lane because repeated calls to `is_safe_command_windows()` were starting a fresh PowerShell parser process for every `powershell.exe -Command ...` assertion. PR #16056 was motivated by that same bottleneck, but its test-only shortcut was the wrong layer to optimize because it weakened the end-to-end guarantee that our runtime path really asks PowerShell to parse the command the way we expect. This PR attacks the actual cost center instead: it keeps the real PowerShell parser in the loop, but turns that parser into a long-lived helper process so both tests and the runtime safe-command path can reuse it across many requests. ## What Changed - add `shell-command/src/command_safety/powershell_parser.rs`, which keeps one mutex-protected parser process per PowerShell executable path and speaks a simple JSON-over-stdio request/response protocol - turn `shell-command/src/command_safety/powershell_parser.ps1` into a long-running parser server with comments explaining the protocol, the AST-shape restrictions, and why unsupported constructs are rejected conservatively - keep request ids and a one-time respawn path so a dead or desynchronized cached child fails closed instead of silently returning mixed parser output - preserve separate parser processes for `powershell.exe` and `pwsh.exe`, since they do not accept the same language surface - avoid a direct `PipelineChainAst` type reference in the PowerShell script so the parser service still runs under Windows PowerShell 5.1 as well as newer `pwsh` - make `shell-command/src/command_safety/windows_safe_commands.rs` delegate to the new parser utility instead of spawning a fresh PowerShell process for every parse - add a Windows-only unit test that exercises multiple sequential requests against the same parser process ## Testing - adds a Windows-only parser-reuse unit test in `powershell_parser.rs` - the main end-to-end verification for this change is the Windows CI lane, because the new service depends on real `powershell.exe` / `pwsh.exe` behavior
258 lines
8.2 KiB
PowerShell
258 lines
8.2 KiB
PowerShell
$ErrorActionPreference = 'Stop'
|
|
$ProgressPreference = 'SilentlyContinue'
|
|
|
|
# Long-lived PowerShell AST parser used by the Rust command-safety layer on Windows.
|
|
# The caller starts one child process per PowerShell executable variant and then sends
|
|
# newline-delimited JSON requests over stdin:
|
|
# { "id": <u64>, "payload": "<base64-encoded UTF-16LE script>" }
|
|
# We answer with one compact JSON line per request:
|
|
# { "id": <same>, "status": "ok", "commands": [["Get-Content", "foo.txt"]] }
|
|
# or:
|
|
# { "id": <same>, "status": "parse_failed" | "parse_errors" | "unsupported" }
|
|
#
|
|
# "unsupported" is intentional: it means the script parsed successfully, but the AST
|
|
# included constructs that we conservatively refuse to lower into argv-like command words.
|
|
# The Rust side treats that the same way as an unsafe command.
|
|
|
|
# Use BOM-free UTF-8 on the protocol stream so Rust sees clean JSON lines with no
|
|
# leading BOM bytes on the first response.
|
|
$utf8 = [System.Text.UTF8Encoding]::new($false)
|
|
$stdin = [System.IO.StreamReader]::new([Console]::OpenStandardInput(), $utf8, $false)
|
|
$stdout = [System.IO.StreamWriter]::new([Console]::OpenStandardOutput(), $utf8)
|
|
$stdout.AutoFlush = $true
|
|
|
|
function Invoke-ParseRequest {
|
|
param($RequestId, $Source)
|
|
|
|
$tokens = $null
|
|
$errors = $null
|
|
|
|
$ast = $null
|
|
try {
|
|
$ast = [System.Management.Automation.Language.Parser]::ParseInput(
|
|
$Source,
|
|
[ref]$tokens,
|
|
[ref]$errors
|
|
)
|
|
} catch {
|
|
return @{ id = $RequestId; status = 'parse_failed' }
|
|
}
|
|
|
|
if ($errors.Count -gt 0) {
|
|
return @{ id = $RequestId; status = 'parse_errors' }
|
|
}
|
|
|
|
# Only accept AST shapes we can flatten into a list of argv-like command words.
|
|
# Anything more dynamic than that becomes "unsupported" instead of being guessed at.
|
|
$commands = [System.Collections.ArrayList]::new()
|
|
|
|
foreach ($statement in $ast.EndBlock.Statements) {
|
|
if (-not (Add-CommandsFromPipelineBase $statement $commands)) {
|
|
$commands = $null
|
|
break
|
|
}
|
|
}
|
|
|
|
if ($commands -ne $null) {
|
|
$normalized = [System.Collections.ArrayList]::new()
|
|
foreach ($cmd in $commands) {
|
|
# Convert every successful parse result to an array-of-arrays shape so the Rust
|
|
# side can deserialize one uniform representation.
|
|
if ($cmd -is [string]) {
|
|
$null = $normalized.Add(@($cmd))
|
|
continue
|
|
}
|
|
|
|
if ($cmd -is [System.Array] -or $cmd -is [System.Collections.IEnumerable]) {
|
|
$null = $normalized.Add(@($cmd))
|
|
continue
|
|
}
|
|
|
|
$normalized = $null
|
|
break
|
|
}
|
|
|
|
$commands = $normalized
|
|
}
|
|
|
|
if ($commands -eq $null) {
|
|
return @{ id = $RequestId; status = 'unsupported' }
|
|
}
|
|
|
|
return @{ id = $RequestId; status = 'ok'; commands = $commands }
|
|
}
|
|
|
|
function Write-Response {
|
|
param($Response)
|
|
|
|
$stdout.WriteLine(($Response | ConvertTo-Json -Compress -Depth 3))
|
|
}
|
|
|
|
function Convert-CommandElement {
|
|
param($element)
|
|
|
|
# Accept only literal-ish command elements. Variable expansion, subexpressions, splats,
|
|
# and other dynamic forms return $null so the whole request becomes unsupported.
|
|
if ($element -is [System.Management.Automation.Language.StringConstantExpressionAst]) {
|
|
return @($element.Value)
|
|
}
|
|
|
|
if ($element -is [System.Management.Automation.Language.ExpandableStringExpressionAst]) {
|
|
if ($element.NestedExpressions.Count -gt 0) {
|
|
return $null
|
|
}
|
|
return @($element.Value)
|
|
}
|
|
|
|
if ($element -is [System.Management.Automation.Language.ConstantExpressionAst]) {
|
|
return @($element.Value.ToString())
|
|
}
|
|
|
|
if ($element -is [System.Management.Automation.Language.CommandParameterAst]) {
|
|
if ($element.Argument -eq $null) {
|
|
return @('-' + $element.ParameterName)
|
|
}
|
|
|
|
if ($element.Argument -is [System.Management.Automation.Language.StringConstantExpressionAst]) {
|
|
return @('-' + $element.ParameterName, $element.Argument.Value)
|
|
}
|
|
|
|
if ($element.Argument -is [System.Management.Automation.Language.ConstantExpressionAst]) {
|
|
return @('-' + $element.ParameterName, $element.Argument.Value.ToString())
|
|
}
|
|
|
|
return $null
|
|
}
|
|
|
|
return $null
|
|
}
|
|
|
|
function Convert-PipelineElement {
|
|
param($element)
|
|
|
|
if ($element -is [System.Management.Automation.Language.CommandAst]) {
|
|
# Redirections and invocation operators make the command harder to classify safely,
|
|
# so reject them rather than trying to normalize them.
|
|
if ($element.Redirections.Count -gt 0) {
|
|
return $null
|
|
}
|
|
|
|
if (
|
|
$element.InvocationOperator -ne $null -and
|
|
$element.InvocationOperator -ne [System.Management.Automation.Language.TokenKind]::Unknown
|
|
) {
|
|
return $null
|
|
}
|
|
|
|
$parts = @()
|
|
foreach ($commandElement in $element.CommandElements) {
|
|
$converted = Convert-CommandElement $commandElement
|
|
if ($converted -eq $null) {
|
|
return $null
|
|
}
|
|
$parts += $converted
|
|
}
|
|
return $parts
|
|
}
|
|
|
|
if ($element -is [System.Management.Automation.Language.CommandExpressionAst]) {
|
|
if ($element.Redirections.Count -gt 0) {
|
|
return $null
|
|
}
|
|
|
|
# Allow a parenthesized single pipeline element like "(Get-Content foo.rs -Raw)" so
|
|
# the caller still sees the inner command words. More complex expressions stay unsupported.
|
|
if ($element.Expression -is [System.Management.Automation.Language.ParenExpressionAst]) {
|
|
$innerPipeline = $element.Expression.Pipeline
|
|
if ($innerPipeline -and $innerPipeline.PipelineElements.Count -eq 1) {
|
|
return Convert-PipelineElement $innerPipeline.PipelineElements[0]
|
|
}
|
|
}
|
|
|
|
return $null
|
|
}
|
|
|
|
return $null
|
|
}
|
|
|
|
function Add-CommandsFromPipelineAst {
|
|
param($pipeline, $commands)
|
|
|
|
if ($pipeline.PipelineElements.Count -eq 0) {
|
|
return $false
|
|
}
|
|
|
|
foreach ($element in $pipeline.PipelineElements) {
|
|
$words = Convert-PipelineElement $element
|
|
if ($words -eq $null -or $words.Count -eq 0) {
|
|
return $false
|
|
}
|
|
$null = $commands.Add($words)
|
|
}
|
|
|
|
return $true
|
|
}
|
|
|
|
function Add-CommandsFromPipelineChain {
|
|
param($chain, $commands)
|
|
|
|
if (-not (Add-CommandsFromPipelineBase $chain.LhsPipelineChain $commands)) {
|
|
return $false
|
|
}
|
|
|
|
if (-not (Add-CommandsFromPipelineAst $chain.RhsPipeline $commands)) {
|
|
return $false
|
|
}
|
|
|
|
return $true
|
|
}
|
|
|
|
function Add-CommandsFromPipelineBase {
|
|
param($pipeline, $commands)
|
|
|
|
if ($pipeline -is [System.Management.Automation.Language.PipelineAst]) {
|
|
return Add-CommandsFromPipelineAst $pipeline $commands
|
|
}
|
|
|
|
# Windows PowerShell 5.1 does not define PipelineChainAst, so avoid a direct type
|
|
# reference here and instead check the runtime type name.
|
|
if ($pipeline.GetType().FullName -eq 'System.Management.Automation.Language.PipelineChainAst') {
|
|
return Add-CommandsFromPipelineChain $pipeline $commands
|
|
}
|
|
|
|
return $false
|
|
}
|
|
|
|
# This script stays alive so the Rust caller can amortize PowerShell startup across
|
|
# many parse requests. Each request and response is one compact JSON line.
|
|
while (($requestLine = $stdin.ReadLine()) -ne $null) {
|
|
$request = $null
|
|
try {
|
|
$request = $requestLine | ConvertFrom-Json
|
|
} catch {
|
|
Write-Response @{ id = $null; status = 'parse_failed' }
|
|
continue
|
|
}
|
|
|
|
# We process requests serially, but still echo the id back so the Rust side can
|
|
# detect protocol desyncs instead of silently trusting mixed stdout.
|
|
$requestId = $request.id
|
|
$payload = $request.payload
|
|
if ([string]::IsNullOrEmpty($payload)) {
|
|
Write-Response @{ id = $requestId; status = 'parse_failed' }
|
|
continue
|
|
}
|
|
|
|
try {
|
|
$source =
|
|
[System.Text.Encoding]::Unicode.GetString(
|
|
[System.Convert]::FromBase64String($payload)
|
|
)
|
|
} catch {
|
|
Write-Response @{ id = $requestId; status = 'parse_failed' }
|
|
continue
|
|
}
|
|
|
|
Write-Response (Invoke-ParseRequest -RequestId $requestId -Source $source)
|
|
}
|