Harden hosted Android Agent Genie bridge

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Iliyan Malchev
2026-03-19 19:06:09 -07:00
parent 2647a6de84
commit 7d3e5f6d0c
20 changed files with 1175 additions and 180 deletions

View File

@@ -45,6 +45,7 @@ object AgentCodexAppServerClient {
context: Context,
instructions: String,
prompt: String,
outputSchema: JSONObject? = null,
dynamicTools: JSONArray? = null,
toolCallHandler: ((String, JSONObject) -> JSONObject)? = null,
requestUserInputHandler: ((JSONArray) -> JSONObject)? = null,
@@ -62,7 +63,11 @@ object AgentCodexAppServerClient {
instructions = instructions,
dynamicTools = dynamicTools,
)
startTurn(threadId, prompt)
startTurn(
threadId = threadId,
prompt = prompt,
outputSchema = outputSchema,
)
waitForTurnCompletion(toolCallHandler, requestUserInputHandler).also { response ->
Log.i(TAG, "requestText completed response=${response.take(160)}")
}
@@ -180,19 +185,24 @@ object AgentCodexAppServerClient {
private fun startTurn(
threadId: String,
prompt: String,
outputSchema: JSONObject?,
) {
val turnParams = JSONObject()
.put("threadId", threadId)
.put(
"input",
JSONArray().put(
JSONObject()
.put("type", "text")
.put("text", prompt),
),
)
if (outputSchema != null) {
turnParams.put("outputSchema", outputSchema)
}
request(
method = "turn/start",
params = JSONObject()
.put("threadId", threadId)
.put(
"input",
JSONArray().put(
JSONObject()
.put("type", "text")
.put("text", prompt),
),
),
params = turnParams,
)
}
@@ -226,6 +236,16 @@ object AgentCodexAppServerClient {
.append(params.optString("delta"))
}
}
"item/commandExecution/outputDelta" -> {
val itemId = params.optString("itemId")
val delta = params.optString("delta")
if (delta.isNotBlank()) {
Log.i(
TAG,
"commandExecution/outputDelta itemId=$itemId delta=${delta.take(400)}",
)
}
}
"item/started" -> {
val item = params.optJSONObject("item")
Log.i(
@@ -239,6 +259,9 @@ object AgentCodexAppServerClient {
TAG,
"item/completed type=${item.optString("type")} status=${item.optString("status")} tool=${item.optString("tool")}",
)
if (item.optString("type") == "commandExecution") {
Log.i(TAG, "commandExecution/completed item=$item")
}
if (item.optString("type") == "agentMessage") {
val itemId = item.optString("id")
val text = item.optString("text").ifBlank {

View File

@@ -12,6 +12,12 @@ class AgentFrameworkToolBridge(
) {
companion object {
private const val TAG = "AgentFrameworkTool"
private val DISALLOWED_TARGET_PACKAGES = setOf(
"com.android.shell",
"com.android.systemui",
"com.openai.codexd",
"com.openai.codex.genie",
)
const val START_DIRECT_SESSION_TOOL = "android_framework_sessions_start_direct"
const val LIST_SESSIONS_TOOL = "android_framework_sessions_list"
const val ANSWER_QUESTION_TOOL = "android_framework_sessions_answer_question"
@@ -21,15 +27,20 @@ class AgentFrameworkToolBridge(
internal fun parseStartDirectSessionArguments(
arguments: JSONObject,
userObjective: String,
isLaunchablePackage: (String) -> Boolean,
isEligibleTargetPackage: (String) -> Boolean,
): StartDirectSessionRequest {
val targetsJson = arguments.optJSONArray("targets")
?: throw IOException("Framework session tool arguments missing targets")
val rejectedPackages = mutableListOf<String>()
val targets = buildList {
for (index in 0 until targetsJson.length()) {
val target = targetsJson.optJSONObject(index) ?: continue
val packageName = target.optString("packageName").trim()
if (packageName.isEmpty() || !isLaunchablePackage(packageName)) {
if (packageName.isEmpty()) {
continue
}
if (!isEligibleTargetPackage(packageName)) {
rejectedPackages += packageName
continue
}
val objective = target.optString("objective").trim().ifEmpty { userObjective }
@@ -42,7 +53,12 @@ class AgentFrameworkToolBridge(
}
}.distinctBy(AgentDelegationTarget::packageName)
if (targets.isEmpty()) {
throw IOException("Framework session tool did not select a launchable package")
if (rejectedPackages.isNotEmpty()) {
throw IOException(
"Framework session tool selected missing or disallowed package(s): ${rejectedPackages.joinToString(", ")}",
)
}
throw IOException("Framework session tool did not select an eligible target package")
}
return StartDirectSessionRequest(
plan = AgentDelegationPlan(
@@ -90,7 +106,7 @@ class AgentFrameworkToolBridge(
val request = parseStartDirectSessionArguments(
arguments = arguments,
userObjective = userObjective,
isLaunchablePackage = ::isLaunchablePackage,
isEligibleTargetPackage = ::isEligibleTargetPackage,
)
val startedSession = sessionController.startDirectSession(
plan = request.plan,
@@ -267,8 +283,13 @@ class AgentFrameworkToolBridge(
.put("sessions", sessions)
}
private fun isLaunchablePackage(packageName: String): Boolean {
return context.packageManager.getLaunchIntentForPackage(packageName) != null
private fun isEligibleTargetPackage(packageName: String): Boolean {
if (packageName in DISALLOWED_TARGET_PACKAGES) {
return false
}
return runCatching {
context.packageManager.getApplicationInfo(packageName, 0)
}.isSuccess
}
private fun requireString(arguments: JSONObject, fieldName: String): String {

View File

@@ -1,18 +1,21 @@
package com.openai.codexd
import android.content.Context
import android.os.ParcelFileDescriptor
import android.util.Log
import java.io.File
import java.io.IOException
import java.io.OutputStream
import java.net.HttpURLConnection
import java.net.URL
import java.nio.charset.StandardCharsets
import kotlin.concurrent.thread
import org.json.JSONObject
object AgentResponsesProxy {
private const val TAG = "AgentResponsesProxy"
private const val CONNECT_TIMEOUT_MS = 30_000
private const val READ_TIMEOUT_MS = 30_000
private const val READ_TIMEOUT_MS = 0
private const val DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"
private const val DEFAULT_CHATGPT_BASE_URL = "https://chatgpt.com/backend-api/codex"
private const val DEFAULT_ORIGINATOR = "codex_cli_rs"
@@ -37,6 +40,33 @@ object AgentResponsesProxy {
return executeRequest(upstreamUrl, requestBody, authSnapshot)
}
fun openResponsesStream(
context: Context,
requestBody: String,
): ParcelFileDescriptor {
val pipe = ParcelFileDescriptor.createPipe()
thread(name = "AgentResponsesProxyStream") {
ParcelFileDescriptor.AutoCloseOutputStream(pipe[1]).use { output ->
streamResponsesTo(context, requestBody, output)
}
}
return pipe[0]
}
fun streamResponsesTo(
context: Context,
requestBody: String,
output: OutputStream,
) {
val authSnapshot = loadAuthSnapshot(File(context.filesDir, "codex-home/auth.json"))
val upstreamUrl = buildResponsesUrl(
upstreamBaseUrl = "provider-default",
authSnapshot.authMode,
)
Log.i(TAG, "Streaming /v1/responses -> $upstreamUrl (auth_mode=${authSnapshot.authMode})")
streamRequest(upstreamUrl, requestBody, authSnapshot, output)
}
internal fun buildResponsesUrl(
upstreamBaseUrl: String,
authMode: String,
@@ -92,23 +122,7 @@ object AgentResponsesProxy {
requestBody: String,
authSnapshot: AuthSnapshot,
): CodexdLocalClient.HttpResponse {
val connection = (URL(upstreamUrl).openConnection() as HttpURLConnection).apply {
requestMethod = "POST"
connectTimeout = CONNECT_TIMEOUT_MS
readTimeout = READ_TIMEOUT_MS
doInput = true
doOutput = true
instanceFollowRedirects = true
setRequestProperty("Authorization", "Bearer ${authSnapshot.bearerToken}")
setRequestProperty("Content-Type", "application/json")
setRequestProperty("Accept", "text/event-stream")
setRequestProperty("Accept-Encoding", "identity")
setRequestProperty("originator", DEFAULT_ORIGINATOR)
setRequestProperty("User-Agent", DEFAULT_USER_AGENT)
if (authSnapshot.authMode == "chatgpt" && !authSnapshot.accountId.isNullOrBlank()) {
setRequestProperty("ChatGPT-Account-ID", authSnapshot.accountId)
}
}
val connection = openConnection(upstreamUrl, authSnapshot)
return try {
connection.outputStream.use { output ->
output.write(requestBody.toByteArray(StandardCharsets.UTF_8))
@@ -127,6 +141,99 @@ object AgentResponsesProxy {
}
}
private fun streamRequest(
upstreamUrl: String,
requestBody: String,
authSnapshot: AuthSnapshot,
output: OutputStream,
) {
val connection = openConnection(upstreamUrl, authSnapshot)
var headersWritten = false
try {
connection.outputStream.use { requestStream ->
requestStream.write(requestBody.toByteArray(StandardCharsets.UTF_8))
requestStream.flush()
}
val statusCode = connection.responseCode
val responseStream = if (statusCode >= 400) connection.errorStream else connection.inputStream
writeRawHttpResponseHeaders(
output = output,
statusCode = statusCode,
contentType = connection.contentType,
)
headersWritten = true
responseStream?.use { it.copyTo(output) }
} catch (err: Exception) {
Log.w(TAG, "Streaming /v1/responses failed", err)
if (!headersWritten) {
writeRawHttpResponseHeaders(
output = output,
statusCode = 502,
contentType = "text/plain; charset=utf-8",
)
output.write(
(err.message ?: err::class.java.simpleName).toByteArray(StandardCharsets.UTF_8),
)
}
} finally {
runCatching { output.flush() }
connection.disconnect()
}
}
private fun openConnection(
upstreamUrl: String,
authSnapshot: AuthSnapshot,
): HttpURLConnection {
return (URL(upstreamUrl).openConnection() as HttpURLConnection).apply {
requestMethod = "POST"
connectTimeout = CONNECT_TIMEOUT_MS
readTimeout = READ_TIMEOUT_MS
doInput = true
doOutput = true
instanceFollowRedirects = true
setRequestProperty("Authorization", "Bearer ${authSnapshot.bearerToken}")
setRequestProperty("Content-Type", "application/json")
setRequestProperty("Accept", "text/event-stream")
setRequestProperty("Accept-Encoding", "identity")
setRequestProperty("originator", DEFAULT_ORIGINATOR)
setRequestProperty("User-Agent", DEFAULT_USER_AGENT)
if (authSnapshot.authMode == "chatgpt" && !authSnapshot.accountId.isNullOrBlank()) {
setRequestProperty("ChatGPT-Account-ID", authSnapshot.accountId)
}
}
}
private fun writeRawHttpResponseHeaders(
output: OutputStream,
statusCode: Int,
contentType: String?,
) {
val headers = buildString {
append("HTTP/1.1 $statusCode ${reasonPhrase(statusCode)}\r\n")
if (!contentType.isNullOrBlank()) {
append("Content-Type: $contentType\r\n")
}
append("Connection: close\r\n")
append("\r\n")
}
output.write(headers.toByteArray(StandardCharsets.US_ASCII))
}
private fun reasonPhrase(statusCode: Int): String {
return when (statusCode) {
200 -> "OK"
400 -> "Bad Request"
401 -> "Unauthorized"
403 -> "Forbidden"
404 -> "Not Found"
500 -> "Internal Server Error"
502 -> "Bad Gateway"
503 -> "Service Unavailable"
else -> "Response"
}
}
private fun JSONObject.stringOrNull(key: String): String? {
if (!has(key) || isNull(key)) {
return null

View File

@@ -0,0 +1,281 @@
package com.openai.codexd
import android.content.Context
import android.net.LocalServerSocket
import android.net.LocalSocket
import android.net.LocalSocketAddress
import android.util.Log
import com.openai.codex.bridge.AgentSocketBridgeContract
import java.io.ByteArrayOutputStream
import java.io.Closeable
import java.io.EOFException
import java.io.IOException
import java.io.InputStream
import java.io.OutputStream
import java.nio.charset.StandardCharsets
import java.util.Collections
import java.util.concurrent.atomic.AtomicBoolean
import org.json.JSONObject
object AgentSocketBridgeServer {
@Volatile
private var runningServer: RunningServer? = null
fun ensureStarted(context: Context) {
synchronized(this) {
if (runningServer != null) {
return
}
runningServer = RunningServer(context.applicationContext).also(RunningServer::start)
}
}
private class RunningServer(
private val context: Context,
) : Closeable {
companion object {
private const val TAG = "AgentSocketBridge"
}
private val boundSocket = LocalSocket().apply {
bind(
LocalSocketAddress(
AgentSocketBridgeContract.SOCKET_NAME,
LocalSocketAddress.Namespace.ABSTRACT,
),
)
}
private val serverSocket = LocalServerSocket(boundSocket.fileDescriptor)
private val closed = AtomicBoolean(false)
private val clientSockets = Collections.synchronizedSet(mutableSetOf<LocalSocket>())
private val acceptThread = Thread(::acceptLoop, "AgentSocketBridge")
fun start() {
acceptThread.start()
Log.i(TAG, "Listening on ${AgentSocketBridgeContract.SOCKET_PATH}")
}
override fun close() {
if (!closed.compareAndSet(false, true)) {
return
}
runCatching { serverSocket.close() }
runCatching { boundSocket.close() }
synchronized(clientSockets) {
clientSockets.forEach { socket -> runCatching { socket.close() } }
clientSockets.clear()
}
acceptThread.interrupt()
}
private fun acceptLoop() {
while (!closed.get()) {
val socket = try {
serverSocket.accept()
} catch (err: IOException) {
if (!closed.get()) {
Log.w(TAG, "Failed to accept Agent socket bridge connection", err)
}
return
}
clientSockets += socket
Thread(
{ handleClient(socket) },
"AgentSocketBridgeClient",
).start()
}
}
private fun handleClient(socket: LocalSocket) {
socket.use { client ->
try {
val request = readRequest(client.inputStream)
when {
request.method == "GET" && request.path == "/internal/runtime/status" -> {
writeJsonResponse(
output = client.outputStream,
statusCode = 200,
body = buildRuntimeStatusJson().toString(),
)
}
request.method == "POST" && request.path == "/v1/responses" -> {
AgentResponsesProxy.streamResponsesTo(
context = context,
requestBody = request.body.orEmpty(),
output = client.outputStream,
)
}
request.method != "POST" && request.path == "/v1/responses" -> {
writeTextResponse(
output = client.outputStream,
statusCode = 405,
body = "Unsupported socket bridge method: ${request.method}",
)
}
else -> {
writeTextResponse(
output = client.outputStream,
statusCode = 404,
body = "Unsupported socket bridge path: ${request.path}",
)
}
}
} catch (err: Exception) {
if (!closed.get()) {
Log.w(TAG, "Agent socket bridge request failed", err)
runCatching {
writeTextResponse(
output = client.outputStream,
statusCode = 502,
body = err.message ?: err::class.java.simpleName,
)
}
}
} finally {
clientSockets -= client
}
}
}
private fun buildRuntimeStatusJson(): JSONObject {
val status = AgentCodexAppServerClient.readRuntimeStatus(context)
return JSONObject()
.put("authenticated", status.authenticated)
.put("accountEmail", status.accountEmail)
.put("clientCount", status.clientCount)
.put("modelProviderId", status.modelProviderId)
.put("configuredModel", status.configuredModel)
.put("effectiveModel", status.effectiveModel)
.put("upstreamBaseUrl", status.upstreamBaseUrl)
}
}
private data class ParsedRequest(
val method: String,
val path: String,
val body: String?,
)
private fun readRequest(input: InputStream): ParsedRequest {
val headerBuffer = ByteArrayOutputStream()
var matched = 0
while (matched < 4) {
val next = input.read()
if (next == -1) {
throw EOFException("unexpected EOF while reading Agent socket bridge request headers")
}
headerBuffer.write(next)
matched = when {
matched == 0 && next == '\r'.code -> 1
matched == 1 && next == '\n'.code -> 2
matched == 2 && next == '\r'.code -> 3
matched == 3 && next == '\n'.code -> 4
next == '\r'.code -> 1
else -> 0
}
}
val headerBytes = headerBuffer.toByteArray()
val headerText = headerBytes
.copyOfRange(0, headerBytes.size - 4)
.toString(StandardCharsets.US_ASCII)
val lines = headerText.split("\r\n")
val requestLine = lines.firstOrNull()
?: throw IOException("socket bridge request line missing")
val requestParts = requestLine.split(" ", limit = 3)
if (requestParts.size < 2) {
throw IOException("invalid socket bridge request line: $requestLine")
}
val headers = mutableMapOf<String, String>()
lines.drop(1).forEach { line ->
val separatorIndex = line.indexOf(':')
if (separatorIndex <= 0) {
return@forEach
}
val name = line.substring(0, separatorIndex).trim().lowercase()
val value = line.substring(separatorIndex + 1).trim()
headers[name] = value
}
if (headers["transfer-encoding"]?.contains("chunked", ignoreCase = true) == true) {
throw IOException("chunked socket bridge requests are unsupported")
}
val contentLength = headers["content-length"]?.toIntOrNull() ?: 0
val bodyBytes = ByteArray(contentLength)
var offset = 0
while (offset < bodyBytes.size) {
val read = input.read(bodyBytes, offset, bodyBytes.size - offset)
if (read == -1) {
throw EOFException("unexpected EOF while reading Agent socket bridge request body")
}
offset += read
}
return ParsedRequest(
method = requestParts[0],
path = requestParts[1],
body = if (bodyBytes.isEmpty()) null else bodyBytes.toString(StandardCharsets.UTF_8),
)
}
private fun writeJsonResponse(
output: OutputStream,
statusCode: Int,
body: String,
) {
writeResponse(
output = output,
statusCode = statusCode,
body = body,
contentType = "application/json; charset=utf-8",
)
}
private fun writeTextResponse(
output: OutputStream,
statusCode: Int,
body: String,
) {
writeResponse(
output = output,
statusCode = statusCode,
body = body,
contentType = "text/plain; charset=utf-8",
)
}
private fun writeResponse(
output: OutputStream,
statusCode: Int,
body: String,
contentType: String,
) {
val bodyBytes = body.toByteArray(StandardCharsets.UTF_8)
val headers = buildString {
append("HTTP/1.1 $statusCode ${reasonPhrase(statusCode)}\r\n")
append("Content-Type: $contentType\r\n")
append("Content-Length: ${bodyBytes.size}\r\n")
append("Connection: close\r\n")
append("\r\n")
}
output.write(headers.toByteArray(StandardCharsets.US_ASCII))
output.write(bodyBytes)
output.flush()
}
private fun reasonPhrase(statusCode: Int): String {
return when (statusCode) {
200 -> "OK"
400 -> "Bad Request"
401 -> "Unauthorized"
403 -> "Forbidden"
404 -> "Not Found"
500 -> "Internal Server Error"
502 -> "Bad Gateway"
503 -> "Service Unavailable"
else -> "Response"
}
}
}

View File

@@ -5,6 +5,7 @@ import android.util.Log
import java.io.IOException
import org.json.JSONArray
import org.json.JSONObject
import org.json.JSONTokener
data class AgentDelegationTarget(
val packageName: String,
@@ -23,19 +24,69 @@ data class AgentDelegationPlan(
object AgentTaskPlanner {
private const val TAG = "AgentTaskPlanner"
private const val PLANNER_ATTEMPTS = 2
private val PLANNER_INSTRUCTIONS =
"""
You are Codex acting as the Android Agent orchestrator.
The user interacts only with the Agent. Decide which installed Android packages should receive delegated Genie sessions.
Use the standard Android shell tools already available in this runtime, such as `cmd package`, `pm`, and `am`, to inspect installed packages and resolve the correct targets.
After deciding on the target packages, call the framework session tool `${AgentFrameworkToolBridge.START_DIRECT_SESSION_TOOL}` exactly once.
Return exactly one JSON object and nothing else. Do not wrap it in markdown fences.
JSON schema:
{
"targets": [
{
"packageName": "installed.package",
"objective": "free-form delegated objective for the child Genie"
}
],
"reason": "short rationale",
"allowDetachedMode": true
}
Rules:
- Choose the fewest packages needed to complete the request.
- The framework session tool `targets` must be non-empty.
- `targets` must be non-empty.
- Each delegated `objective` should be written for the child Genie, not the user.
- After the framework session tool succeeds, reply with a short summary for the Agent UI.
- Stop after at most 6 shell commands.
- Prefer direct package-manager commands over grepping large package lists.
- Verify each chosen package by inspecting its package dump or query-activities output before returning it.
- Only choose packages that directly own the requested app behavior. Never choose helper packages such as `com.android.shell`, `com.android.systemui`, or the Codex Agent/Genie packages unless the user explicitly asked for them.
- For intent resolution commands, include `--user 0`.
- If the user objective already names a specific installed package, use it directly after verification.
- `cmd package list packages PACKAGE_NAME` alone is not sufficient verification.
- Prefer focused verification commands such as `cmd package dump PACKAGE | sed -n '1,120p'`, `cmd package query-activities --brief --user 0 -p PACKAGE -a android.intent.action.MAIN`, and `cmd package query-activities --brief --user 0 -p PACKAGE -a RELEVANT_ACTION`.
- Do not enumerate every launcher activity on the device. Query specific candidate packages instead.
""".trimIndent()
private val PLANNER_OUTPUT_SCHEMA =
JSONObject()
.put("type", "object")
.put(
"properties",
JSONObject()
.put(
"targets",
JSONObject()
.put("type", "array")
.put("minItems", 1)
.put(
"items",
JSONObject()
.put("type", "object")
.put(
"properties",
JSONObject()
.put("packageName", JSONObject().put("type", "string"))
.put("objective", JSONObject().put("type", "string")),
)
.put("required", JSONArray().put("packageName").put("objective"))
.put("additionalProperties", false),
),
)
.put("reason", JSONObject().put("type", "string"))
.put("allowDetachedMode", JSONObject().put("type", "boolean")),
)
.put("required", JSONArray().put("targets").put("reason").put("allowDetachedMode"))
.put("additionalProperties", false)
fun startSession(
context: Context,
@@ -62,42 +113,117 @@ object AgentTaskPlanner {
allowDetachedMode = allowDetachedMode,
)
}
var sessionStartResult: SessionStartResult? = null
val frameworkToolBridge = AgentFrameworkToolBridge(context, sessionController)
Log.i(TAG, "Planning Agent session for objective=${userObjective.take(160)}")
AgentCodexAppServerClient.requestText(
context = context,
instructions = PLANNER_INSTRUCTIONS,
prompt = buildPlannerPrompt(userObjective),
dynamicTools = frameworkToolBridge.buildPlanningToolSpecs(),
toolCallHandler = { toolName, arguments ->
frameworkToolBridge.handleToolCall(
toolName = toolName,
arguments = arguments,
userObjective = userObjective,
onSessionStarted = { startedSession ->
if (sessionStartResult != null) {
throw IOException("Agent runtime attempted to start multiple Genie batches")
}
Log.i(
TAG,
"Framework tool started parent=${startedSession.parentSessionId} children=${startedSession.childSessionIds}",
)
sessionStartResult = startedSession
},
val isEligibleTargetPackage = { packageName: String ->
runCatching { context.packageManager.getApplicationInfo(packageName, 0) }.isSuccess &&
packageName !in setOf(
"com.android.shell",
"com.android.systemui",
"com.openai.codexd",
"com.openai.codex.genie",
)
},
requestUserInputHandler = requestUserInputHandler,
}
var previousPlannerResponse: String? = null
var plannerRequest: AgentFrameworkToolBridge.StartDirectSessionRequest? = null
var lastPlannerError: IOException? = null
for (attemptIndex in 0 until PLANNER_ATTEMPTS) {
val plannerResponse = AgentCodexAppServerClient.requestText(
context = context,
instructions = PLANNER_INSTRUCTIONS,
prompt = buildPlannerPrompt(
userObjective = userObjective,
previousPlannerResponse = previousPlannerResponse,
previousPlannerError = lastPlannerError?.message,
),
outputSchema = PLANNER_OUTPUT_SCHEMA,
requestUserInputHandler = requestUserInputHandler,
)
Log.i(TAG, "Planner response=${plannerResponse.take(400)}")
previousPlannerResponse = plannerResponse
val parsedRequest = runCatching {
parsePlannerResponse(
responseText = plannerResponse,
userObjective = userObjective,
isEligibleTargetPackage = isEligibleTargetPackage,
)
}.getOrElse { err ->
if (err is IOException && attemptIndex < PLANNER_ATTEMPTS - 1) {
Log.w(TAG, "Planner response rejected: ${err.message}")
lastPlannerError = err
continue
}
throw err
}
plannerRequest = parsedRequest
break
}
val request = plannerRequest ?: throw (lastPlannerError
?: IOException("Planner did not return a valid session plan"))
val sessionStartResult = sessionController.startDirectSession(
plan = request.plan,
allowDetachedMode = allowDetachedMode && request.allowDetachedMode,
)
Log.i(TAG, "Planner sessionStartResult=$sessionStartResult")
return sessionStartResult
?: throw IOException("Agent runtime did not launch any Genie sessions")
}
private fun buildPlannerPrompt(userObjective: String): String {
return """
User objective:
$userObjective
""".trimIndent()
private fun buildPlannerPrompt(
userObjective: String,
previousPlannerResponse: String?,
previousPlannerError: String?,
): String {
return buildString {
appendLine("User objective:")
appendLine(userObjective)
if (!previousPlannerError.isNullOrBlank()) {
appendLine()
appendLine("Previous candidate plan was rejected by host validation:")
appendLine(previousPlannerError)
appendLine("Choose a different installed target package and verify it with focused package commands.")
}
if (!previousPlannerResponse.isNullOrBlank()) {
appendLine()
appendLine("Previous invalid planner response:")
appendLine(previousPlannerResponse)
}
}.trim()
}
internal fun parsePlannerResponse(
responseText: String,
userObjective: String,
isEligibleTargetPackage: (String) -> Boolean,
): AgentFrameworkToolBridge.StartDirectSessionRequest {
val plannerJson = extractPlannerJson(responseText)
return AgentFrameworkToolBridge.parseStartDirectSessionArguments(
arguments = plannerJson,
userObjective = userObjective,
isEligibleTargetPackage = isEligibleTargetPackage,
)
}
private fun extractPlannerJson(responseText: String): JSONObject {
val trimmed = responseText.trim()
parseJsonObject(trimmed)?.let { return it }
val unfenced = trimmed
.removePrefix("```json")
.removePrefix("```")
.removeSuffix("```")
.trim()
parseJsonObject(unfenced)?.let { return it }
val firstBrace = trimmed.indexOf('{')
val lastBrace = trimmed.lastIndexOf('}')
if (firstBrace >= 0 && lastBrace > firstBrace) {
parseJsonObject(trimmed.substring(firstBrace, lastBrace + 1))?.let { return it }
}
throw IOException("Planner did not return a valid JSON object")
}
private fun parseJsonObject(text: String): JSONObject? {
return runCatching {
val tokener = JSONTokener(text)
val value = tokener.nextValue()
value as? JSONObject
}.getOrNull()
}
}

View File

@@ -3,6 +3,7 @@ package com.openai.codexd
import android.app.Service
import android.content.Intent
import android.os.IBinder
import android.os.ParcelFileDescriptor
import android.util.Log
import com.openai.codex.bridge.BridgeHttpResponse
import com.openai.codex.bridge.BridgeRuntimeStatus
@@ -45,6 +46,19 @@ class CodexAgentBridgeService : Service() {
Log.i(TAG, "Proxied /v1/responses")
return BridgeHttpResponse(response.statusCode, response.body)
}
override fun openResponsesStream(requestBody: String?): ParcelFileDescriptor {
val stream = runCatching {
AgentResponsesProxy.openResponsesStream(
this@CodexAgentBridgeService,
requestBody.orEmpty(),
)
}.getOrElse { err ->
throw err.asBinderError("openResponsesStream")
}
Log.i(TAG, "Opened /v1/responses stream")
return stream
}
}
override fun onBind(intent: Intent?): IBinder {

View File

@@ -20,6 +20,9 @@ class CodexAgentService : AgentService() {
private const val AUTO_ANSWER_INSTRUCTIONS =
"You are Codex acting as the Android Agent supervising a Genie execution. If you can answer the current Genie question from the available session context, call the framework session tool `android.framework.sessions.answer_question` exactly once with a short free-form answer. You may inspect current framework state with `android.framework.sessions.list`. If user input is required, do not call any framework tool. Instead reply with `ESCALATE: ` followed by the exact question the Agent should ask the user."
private const val MAX_AUTO_ANSWER_CONTEXT_CHARS = 800
private val handledGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
private val pendingGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
private val handledBridgeRequests = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
}
private sealed class AutoAnswerResult {
@@ -30,11 +33,14 @@ class CodexAgentService : AgentService() {
) : AutoAnswerResult()
}
private val handledGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
private val pendingGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
private val agentManager by lazy { getSystemService(AgentManager::class.java) }
private val sessionController by lazy { AgentSessionController(this) }
override fun onCreate() {
super.onCreate()
AgentSocketBridgeServer.ensureStarted(this)
}
override fun onSessionChanged(session: AgentSessionInfo) {
Log.i(TAG, "onSessionChanged $session")
maybeAutoAnswerGenieQuestion(session)
@@ -45,6 +51,7 @@ class CodexAgentService : AgentService() {
Log.i(TAG, "onSessionRemoved sessionId=$sessionId")
AgentQuestionNotifier.cancel(this, sessionId)
handledGenieQuestions.removeIf { it.startsWith("$sessionId:") }
handledBridgeRequests.removeIf { it.startsWith("$sessionId:") }
pendingGenieQuestions.removeIf { it.startsWith("$sessionId:") }
}
@@ -227,6 +234,20 @@ class CodexAgentService : AgentService() {
) {
val request = JSONObject(question.removePrefix(BRIDGE_REQUEST_PREFIX))
val requestId = request.optString("requestId")
if (requestId.isNotBlank()) {
val bridgeRequestKey = "${session.sessionId}:$requestId"
if (!handledBridgeRequests.add(bridgeRequestKey)) {
Log.i(
TAG,
"Skipping duplicate bridge question method=${request.optString("method")} requestId=$requestId session=${session.sessionId}",
)
return
}
}
Log.i(
TAG,
"Answering bridge question method=${request.optString("method")} requestId=$requestId session=${session.sessionId}",
)
val response: JSONObject = runCatching {
when (request.optString("method")) {
BRIDGE_METHOD_GET_RUNTIME_STATUS -> {
@@ -292,6 +313,14 @@ class CodexAgentService : AgentService() {
}
private fun genieQuestionKey(sessionId: String, question: String): String {
if (isBridgeQuestion(question)) {
val requestId = runCatching {
JSONObject(question.removePrefix(BRIDGE_REQUEST_PREFIX)).optString("requestId").trim()
}.getOrNull()
if (!requestId.isNullOrEmpty()) {
return "$sessionId:bridge:$requestId"
}
}
return "$sessionId:$question"
}
}

View File

@@ -35,6 +35,8 @@ class MainActivity : Activity() {
private const val STATUS_REFRESH_INTERVAL_MS = 2000L
private const val ACTION_DEBUG_START_AGENT_SESSION =
"com.openai.codexd.action.DEBUG_START_AGENT_SESSION"
private const val ACTION_DEBUG_CANCEL_ALL_AGENT_SESSIONS =
"com.openai.codexd.action.DEBUG_CANCEL_ALL_AGENT_SESSIONS"
private const val EXTRA_DEBUG_PROMPT = "prompt"
private const val EXTRA_DEBUG_TARGET_PACKAGE = "targetPackage"
}
@@ -86,6 +88,7 @@ class MainActivity : Activity() {
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
AgentSocketBridgeServer.ensureStarted(this)
setContentView(R.layout.activity_main)
updatePaths()
handleSessionIntent(intent)
@@ -131,6 +134,25 @@ class MainActivity : Activity() {
}
private fun maybeStartSessionFromIntent(intent: Intent?) {
if (intent?.action == ACTION_DEBUG_CANCEL_ALL_AGENT_SESSIONS) {
Log.i(TAG, "Handling debug cancel-all Agent sessions intent")
thread {
val result = runCatching { agentSessionController.cancelActiveSessions() }
result.onFailure { err ->
Log.w(TAG, "Failed to cancel Agent sessions from debug intent", err)
showToast("Failed to cancel active sessions: ${err.message}")
}
result.onSuccess { cancelResult ->
focusedFrameworkSessionId = null
val cancelledCount = cancelResult.cancelledSessionIds.size
val failedCount = cancelResult.failedSessionIds.size
showToast("Cancelled $cancelledCount sessions, $failedCount failed")
refreshAgentSessions(force = true)
}
}
intent.action = null
return
}
if (intent?.action != ACTION_DEBUG_START_AGENT_SESSION) {
return
}