Let Agent answer Genie questions through codexd

Teach the Agent service to answer non-bridge Genie questions through the embedded codexd runtime, and upgrade the Genie scaffold to loop over QUESTION:/RESULT: turns instead of assuming a single user reply. The current bridge now sends Responses input in list form, which fixes the last observed backend validation error.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Iliyan Malchev
2026-03-19 08:39:18 -07:00
parent 6acd1f7473
commit 1340af08aa
5 changed files with 321 additions and 46 deletions

View File

@@ -6,7 +6,9 @@ import android.app.agent.AgentSessionEvent
import android.app.agent.AgentSessionInfo
import android.os.Process
import android.util.Log
import org.json.JSONArray
import org.json.JSONObject
import java.io.IOException
import java.util.concurrent.ConcurrentHashMap
import kotlin.concurrent.thread
@@ -19,20 +21,28 @@ class CodexAgentService : AgentService() {
private const val BRIDGE_RESPONSE_PREFIX = "__codex_bridge_result__ "
private const val METHOD_GET_AUTH_STATUS = "get_auth_status"
private const val METHOD_HTTP_REQUEST = "http_request"
private const val AUTO_ANSWER_INSTRUCTIONS =
"You are Codex acting as the Android Agent supervising a Genie execution. Reply with the exact free-form answer that should be sent back to the Genie. Keep it short and actionable. If the Genie can proceed without extra constraints, reply with exactly: continue"
private const val MAX_AUTO_ANSWER_CONTEXT_CHARS = 800
}
private val handledBridgeRequests = ConcurrentHashMap.newKeySet<String>()
private val handledGenieQuestions = ConcurrentHashMap.newKeySet<String>()
private val pendingGenieQuestions = ConcurrentHashMap.newKeySet<String>()
private val agentManager by lazy { getSystemService(AgentManager::class.java) }
override fun onSessionChanged(session: AgentSessionInfo) {
Log.i(TAG, "onSessionChanged $session")
handleInternalBridgeQuestion(session.sessionId)
maybeAutoAnswerGenieQuestion(session)
updateQuestionNotification(session)
}
override fun onSessionRemoved(sessionId: String) {
Log.i(TAG, "onSessionRemoved sessionId=$sessionId")
AgentQuestionNotifier.cancel(this, sessionId)
handledGenieQuestions.removeIf { it.startsWith("$sessionId:") }
pendingGenieQuestions.removeIf { it.startsWith("$sessionId:") }
}
private fun handleInternalBridgeQuestion(sessionId: String) {
@@ -115,7 +125,7 @@ class CodexAgentService : AgentService() {
}
runCatching {
answerBridgeQuestion(manager, sessionId, "$BRIDGE_RESPONSE_PREFIX$response")
answerQuestionWithRetry(manager, sessionId, "$BRIDGE_RESPONSE_PREFIX$response")
}.onFailure { err ->
handledBridgeRequests.remove(requestKey)
Log.w(TAG, "Failed to answer bridge question for $sessionId", err)
@@ -123,7 +133,7 @@ class CodexAgentService : AgentService() {
}
}
private fun answerBridgeQuestion(manager: AgentManager, sessionId: String, response: String) {
private fun answerQuestionWithRetry(manager: AgentManager, sessionId: String, response: String) {
repeat(BRIDGE_ANSWER_RETRY_COUNT) { attempt ->
runCatching {
manager.answerQuestion(sessionId, response)
@@ -169,22 +179,54 @@ class CodexAgentService : AgentService() {
!isSessionWaitingForUser(manager, sessionId)
}
private fun maybeAutoAnswerGenieQuestion(session: AgentSessionInfo) {
if (session.state != AgentSessionInfo.STATE_WAITING_FOR_USER) {
return
}
val manager = agentManager ?: return
val events = manager.getSessionEvents(session.sessionId)
val question = findVisibleQuestion(events) ?: return
val questionKey = genieQuestionKey(session.sessionId, question)
if (handledGenieQuestions.contains(questionKey) || !pendingGenieQuestions.add(questionKey)) {
return
}
thread(name = "CodexAgentAutoAnswer-${session.sessionId}") {
Log.i(TAG, "Attempting Agent auto-answer for ${session.sessionId}")
runCatching {
val answer = requestGenieAutoAnswer(session, question, events)
answerQuestionWithRetry(manager, session.sessionId, answer)
handledGenieQuestions.add(questionKey)
AgentQuestionNotifier.cancel(this, session.sessionId)
Log.i(TAG, "Auto-answered Genie question for ${session.sessionId}")
}.onFailure { err ->
Log.i(TAG, "Agent auto-answer unavailable for ${session.sessionId}: ${err.message}")
if (isSessionWaitingForUser(manager, session.sessionId)) {
AgentQuestionNotifier.showQuestion(
context = this,
sessionId = session.sessionId,
targetPackage = session.targetPackage,
question = question,
)
}
}
pendingGenieQuestions.remove(questionKey)
}
}
private fun updateQuestionNotification(session: AgentSessionInfo) {
if (session.state != AgentSessionInfo.STATE_WAITING_FOR_USER) {
AgentQuestionNotifier.cancel(this, session.sessionId)
return
}
val manager = agentManager ?: return
val question = manager.getSessionEvents(session.sessionId)
.lastOrNull { event ->
event.type == AgentSessionEvent.TYPE_QUESTION &&
event.message != null &&
!event.message.startsWith(BRIDGE_REQUEST_PREFIX)
}?.message
val question = findVisibleQuestion(manager.getSessionEvents(session.sessionId))
if (question.isNullOrBlank()) {
AgentQuestionNotifier.cancel(this, session.sessionId)
return
}
if (pendingGenieQuestions.contains(genieQuestionKey(session.sessionId, question))) {
return
}
AgentQuestionNotifier.showQuestion(
context = this,
sessionId = session.sessionId,
@@ -192,4 +234,129 @@ class CodexAgentService : AgentService() {
question = question,
)
}
private fun requestGenieAutoAnswer(
session: AgentSessionInfo,
question: String,
events: List<AgentSessionEvent>,
): String {
val runtimeStatus = CodexdLocalClient.waitForRuntimeStatus(this)
if (!runtimeStatus.authenticated) {
throw IOException("codexd is not authenticated")
}
val model = runtimeStatus.effectiveModel ?: throw IOException("codexd effective model unavailable")
val requestBody = JSONObject()
.put("model", model)
.put("store", false)
.put("stream", false)
.put("instructions", AUTO_ANSWER_INSTRUCTIONS)
.put(
"input",
JSONArray().put(
JSONObject()
.put("role", "user")
.put(
"content",
JSONArray().put(
JSONObject()
.put("type", "input_text")
.put("text", buildAutoAnswerPrompt(session, question, events)),
),
),
),
)
.toString()
val response = CodexdLocalClient.waitForResponse(this, "POST", "/v1/responses", requestBody)
if (response.statusCode != 200) {
throw IOException("HTTP ${response.statusCode}: ${response.body}")
}
return parseResponsesOutputText(response.body)
}
private fun buildAutoAnswerPrompt(
session: AgentSessionInfo,
question: String,
events: List<AgentSessionEvent>,
): String {
val recentContext = renderRecentContext(events)
return """
Target package: ${session.targetPackage ?: "unknown"}
Current Genie question: $question
Recent session context:
$recentContext
""".trimIndent()
}
private fun renderRecentContext(events: List<AgentSessionEvent>): String {
val context = events
.filterNot(::isInternalBridgeEvent)
.takeLast(6)
.joinToString("\n") { event ->
"${eventTypeToString(event.type)}: ${event.message ?: ""}"
}
if (context.length <= MAX_AUTO_ANSWER_CONTEXT_CHARS) {
return context.ifBlank { "No prior Genie context." }
}
return context.takeLast(MAX_AUTO_ANSWER_CONTEXT_CHARS)
}
private fun parseResponsesOutputText(body: String): String {
val data = JSONObject(body)
val directOutput = data.optString("output_text")
if (directOutput.isNotBlank()) {
return directOutput
}
val output = data.optJSONArray("output")
?: throw IOException("Responses payload missing output")
val combined = buildString {
for (outputIndex in 0 until output.length()) {
val item = output.optJSONObject(outputIndex) ?: continue
val content = item.optJSONArray("content") ?: continue
for (contentIndex in 0 until content.length()) {
val part = content.optJSONObject(contentIndex) ?: continue
if (part.optString("type") == "output_text") {
append(part.optString("text"))
}
}
}
}
if (combined.isBlank()) {
throw IOException("Responses payload missing output_text content")
}
return combined
}
private fun findVisibleQuestion(events: List<AgentSessionEvent>): String? {
return events.lastOrNull { event ->
event.type == AgentSessionEvent.TYPE_QUESTION &&
!event.message.isNullOrBlank() &&
!isInternalBridgeEvent(event)
}?.message
}
private fun isInternalBridgeEvent(event: AgentSessionEvent): Boolean {
val message = event.message ?: return false
return when (event.type) {
AgentSessionEvent.TYPE_QUESTION -> message.startsWith(BRIDGE_REQUEST_PREFIX)
AgentSessionEvent.TYPE_ANSWER -> message.startsWith(BRIDGE_RESPONSE_PREFIX)
else -> false
}
}
private fun eventTypeToString(type: Int): String {
return when (type) {
AgentSessionEvent.TYPE_TRACE -> "Trace"
AgentSessionEvent.TYPE_QUESTION -> "Question"
AgentSessionEvent.TYPE_RESULT -> "Result"
AgentSessionEvent.TYPE_ERROR -> "Error"
AgentSessionEvent.TYPE_POLICY -> "Policy"
AgentSessionEvent.TYPE_ANSWER -> "Answer"
else -> "Event($type)"
}
}
private fun genieQuestionKey(sessionId: String, question: String): String {
return "$sessionId:$question"
}
}

View File

@@ -20,6 +20,16 @@ object CodexdLocalClient {
val clientCount: Int,
)
data class RuntimeStatus(
val authenticated: Boolean,
val accountEmail: String?,
val clientCount: Int,
val modelProviderId: String,
val configuredModel: String?,
val effectiveModel: String?,
val upstreamBaseUrl: String,
)
fun waitForResponse(
context: Context,
method: String,
@@ -52,6 +62,14 @@ object CodexdLocalClient {
return parseAuthStatus(response.body)
}
fun waitForRuntimeStatus(context: Context): RuntimeStatus {
val response = waitForResponse(context, "GET", "/internal/runtime/status", null)
if (response.statusCode != 200) {
throw IOException("HTTP ${response.statusCode}: ${response.body}")
}
return parseRuntimeStatus(response.body)
}
fun fetchAuthStatus(socketPath: String): AuthStatus? {
return try {
val response = executeRequest(socketPath, "GET", "/internal/auth/status", null)
@@ -124,4 +142,17 @@ object CodexdLocalClient {
clientCount = clientCount,
)
}
private fun parseRuntimeStatus(body: String): RuntimeStatus {
val json = JSONObject(body)
return RuntimeStatus(
authenticated = json.optBoolean("authenticated", false),
accountEmail = if (json.isNull("accountEmail")) null else json.optString("accountEmail"),
clientCount = json.optInt("clientCount", 0),
modelProviderId = json.optString("modelProviderId", "unknown"),
configuredModel = if (json.isNull("configuredModel")) null else json.optString("configuredModel"),
effectiveModel = if (json.isNull("effectiveModel")) null else json.optString("effectiveModel"),
upstreamBaseUrl = json.optString("upstreamBaseUrl", "unknown"),
)
}
}

View File

@@ -1,5 +1,6 @@
package com.openai.codex.genie
import org.json.JSONArray
import org.json.JSONObject
import java.io.IOException
@@ -34,17 +35,29 @@ object CodexAgentBridge {
fun buildResponsesRequest(
requestId: String,
model: String,
instructions: String,
prompt: String,
): String {
val body = JSONObject()
.put("model", model)
.put("store", false)
.put("stream", false)
.put("instructions", instructions)
.put(
"instructions",
"You are Codex acting as an Android Genie. Reply with exactly one short sentence.",
"input",
JSONArray().put(
JSONObject()
.put("role", "user")
.put(
"content",
JSONArray().put(
JSONObject()
.put("type", "input_text")
.put("text", prompt),
),
),
),
)
.put("input", prompt)
.toString()
return buildHttpRequest(requestId, "POST", "/v1/responses", body)
}

View File

@@ -15,6 +15,8 @@ class CodexGenieService : GenieService() {
private const val TAG = "CodexGenieService"
private const val MAX_BRIDGE_PROMPT_CHARS = 240
private const val MAX_BRIDGE_ANSWER_CHARS = 120
private const val GENIE_RESPONSE_INSTRUCTIONS =
"You are Codex acting as an Android Genie. Reply with exactly one line that starts with QUESTION: or RESULT:."
}
private val sessionControls = ConcurrentHashMap<String, SessionControl>()
@@ -106,50 +108,45 @@ class CodexGenieService : GenieService() {
return
}
val answer = waitForUserResponse(control)
callback.updateState(sessionId, AgentSessionInfo.STATE_RUNNING)
callback.publishTrace(sessionId, "Received user response: $answer")
val runtime = runtimeStatus.getOrNull()
val modelResponse = runtime?.takeIf { status ->
status.authenticated && !status.effectiveModel.isNullOrBlank()
}?.let { status ->
if (runtime == null) {
callback.publishResult(
sessionId,
"Reached the framework-managed Agent bridge, but runtime status was unavailable. Replace this scaffold with a real Codex-driven Genie executor.",
)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
if (!runtime.authenticated || runtime.effectiveModel.isNullOrBlank()) {
callback.publishResult(
sessionId,
"Reached the Agent bridge, but the Agent runtime was not authenticated or did not expose an effective model for ${request.targetPackage}.",
)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
var answer = waitForAgentAnswer(sessionId, callback, control)
Log.i(TAG, "Received Agent answer for $sessionId")
callback.publishTrace(sessionId, "Received Agent answer: $answer")
while (!control.cancelled) {
callback.publishTrace(
sessionId,
"Requesting a non-streaming /v1/responses call through the Agent using ${status.effectiveModel}.",
"Requesting a non-streaming /v1/responses call through the Agent using ${runtime.effectiveModel}.",
)
runCatching {
val modelResponse = runCatching {
requestModelNextStep(
sessionId = sessionId,
request = request,
answer = answer,
runtimeStatus = status,
runtimeStatus = runtime,
targetAppContext = targetAppContext.getOrNull(),
callback = callback,
control = control,
)
}
}
when {
modelResponse == null && runtime == null -> {
callback.publishResult(
sessionId,
"Reached the framework-managed Agent bridge, but runtime status was unavailable. Replace this scaffold with a real Codex-driven Genie executor.",
)
}
modelResponse == null -> {
callback.publishResult(
sessionId,
"Reached the Agent bridge, but the Agent runtime was not authenticated or did not expose an effective model for ${request.targetPackage}.",
)
}
modelResponse.isSuccess -> {
callback.publishResult(
sessionId,
modelResponse.getOrThrow(),
)
}
else -> {
if (modelResponse.isFailure) {
callback.publishTrace(
sessionId,
"Agent-mediated /v1/responses request failed: ${modelResponse.exceptionOrNull()?.message}",
@@ -158,9 +155,29 @@ class CodexGenieService : GenieService() {
sessionId,
"Reached the Agent bridge for ${request.targetPackage}, but the proxied model request failed. Replace this scaffold with a real Codex-driven Genie executor.",
)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
when (val turn = parseGenieModelTurn(modelResponse.getOrThrow())) {
is GenieModelTurn.Result -> {
Log.i(TAG, "Publishing Genie result for $sessionId")
callback.publishResult(sessionId, turn.text)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
is GenieModelTurn.Question -> {
Log.i(TAG, "Publishing Genie follow-up question for $sessionId")
callback.publishTrace(sessionId, "Genie follow-up question: ${turn.text}")
callback.publishQuestion(sessionId, turn.text)
callback.updateState(sessionId, AgentSessionInfo.STATE_WAITING_FOR_USER)
answer = waitForAgentAnswer(sessionId, callback, control)
Log.i(TAG, "Received follow-up Agent answer for $sessionId")
callback.publishTrace(sessionId, "Received Agent answer: $answer")
}
}
}
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
callback.publishError(sessionId, "Cancelled")
callback.updateState(sessionId, AgentSessionInfo.STATE_CANCELLED)
} catch (err: InterruptedException) {
Thread.currentThread().interrupt()
callback.publishError(sessionId, "Interrupted: ${err.message}")
@@ -202,6 +219,7 @@ class CodexGenieService : GenieService() {
CodexAgentBridge.buildResponsesRequest(
requestId = requestId,
model = model,
instructions = GENIE_RESPONSE_INSTRUCTIONS,
prompt = buildModelPrompt(
request = request,
answer = answer,
@@ -215,6 +233,16 @@ class CodexGenieService : GenieService() {
return CodexAgentBridge.parseResponsesOutputText(response, requestId)
}
private fun waitForAgentAnswer(
sessionId: String,
callback: Callback,
control: SessionControl,
): String {
val answer = waitForUserResponse(control)
callback.updateState(sessionId, AgentSessionInfo.STATE_RUNNING)
return answer
}
private fun waitForBridgeResponse(control: SessionControl, requestId: String): String {
val deadlineNanos = System.nanoTime() + TimeUnit.SECONDS.toNanos(5)
while (!control.cancelled) {
@@ -253,12 +281,12 @@ class CodexGenieService : GenieService() {
You are Codex acting as an Android Genie for the target package ${request.targetPackage}.
Original objective: $objective
The Agent answered your latest question with: $userAnswer
$targetSummary
Reply with either:
1. one short sentence describing the next automation step you would take in the target app, or
2. one short follow-up question if you are blocked and need clarification.
Emit exactly one line starting with QUESTION: or RESULT:.
Use QUESTION: when you need another free-form answer from the Agent before you can proceed.
Use RESULT: when you are ready to report the next concrete step or final outcome.
""".trimIndent()
}
@@ -270,6 +298,32 @@ class CodexGenieService : GenieService() {
return "Codex Genie is ready to drive $displayName. Reply with any extra constraints or answer 'continue' to let Genie proceed."
}
private fun parseGenieModelTurn(message: String): GenieModelTurn {
val trimmed = message.trim()
val question = stripTurnPrefix(trimmed, "QUESTION:")
if (question != null) {
return GenieModelTurn.Question(question)
}
val result = stripTurnPrefix(trimmed, "RESULT:")
if (result != null) {
return GenieModelTurn.Result(result)
}
return if (trimmed.endsWith("?")) {
GenieModelTurn.Question(trimmed)
} else {
GenieModelTurn.Result(trimmed)
}
}
private fun stripTurnPrefix(message: String, prefix: String): String? {
if (!message.startsWith(prefix, ignoreCase = true)) {
return null
}
return message.substring(prefix.length).trim().ifEmpty {
"continue"
}
}
private fun abbreviate(value: String, maxChars: Int): String {
if (value.length <= maxChars) {
return value
@@ -282,4 +336,10 @@ class CodexGenieService : GenieService() {
val bridgeResponses = LinkedBlockingQueue<String>()
val userResponses = LinkedBlockingQueue<String>()
}
private sealed interface GenieModelTurn {
data class Question(val text: String) : GenieModelTurn
data class Result(val text: String) : GenieModelTurn
}
}

View File

@@ -33,6 +33,8 @@ The current repo now contains the first implementation slice:
- Non-bridge Genie questions now surface through an Agent-owned notification,
which gives the Agent a concrete user-escalation path without making the
Genie the user-facing surface.
- The Agent now also attempts to answer non-bridge Genie questions through the
embedded `codexd` runtime before falling back to that notification path.
- This is intentional: runtime testing on the emulator showed that a Genie
execution runs inside the paired target app's sandbox/UID, so ordinary
cross-app Android service/provider IPC to the Agent app is not a reliable
@@ -118,6 +120,8 @@ existing network/auth bridge while this refactor proceeds.
- One real non-streaming proxied `/v1/responses` request from Genie through the
Agent-owned bridge after the user answer
- Agent-owned question notifications for non-bridge Genie questions
- Agent-mediated free-form answers for non-bridge Genie questions, using the
current embedded `codexd` runtime as the temporary answer engine
- Abstract-unix-socket support in the legacy Rust bridge via `@name` or
`abstract:name`, so the compatibility transport can move off app-private
filesystem sockets when Agent<->Genie traffic is introduced