Route Agent answers through framework bridge

Co-authored-by: Codex <noreply@openai.com>
2026-04-24 14:45:27 +00:00 · 2026-03-19 14:18:46 -07:00
parent be817e3421
commit c2c663a6e5
4 changed files with 114 additions and 50 deletions
--- a/android/app/src/main/java/com/openai/codexd/AgentFrameworkToolBridge.kt
+++ b/android/app/src/main/java/com/openai/codexd/AgentFrameworkToolBridge.kt
@@ -63,10 +63,14 @@ class AgentFrameworkToolBridge(
        return JSONArray().put(buildStartDirectSessionToolSpec())
    }

-    fun buildSessionManagementToolSpecs(): JSONArray {
+    fun buildQuestionResolutionToolSpecs(): JSONArray {
        return JSONArray()
            .put(buildListSessionsToolSpec())
            .put(buildAnswerQuestionToolSpec())
+    }
+
+    fun buildSessionManagementToolSpecs(): JSONArray {
+        return buildQuestionResolutionToolSpecs()
            .put(buildAttachTargetToolSpec())
            .put(buildCancelSessionToolSpec())
    }
--- a/android/app/src/main/java/com/openai/codexd/AgentSessionController.kt
+++ b/android/app/src/main/java/com/openai/codexd/AgentSessionController.kt
@@ -11,6 +11,8 @@ import java.util.concurrent.Executor
 class AgentSessionController(context: Context) {
    companion object {
        private const val PREFERRED_GENIE_PACKAGE = "com.openai.codex.genie"
+        private const val QUESTION_ANSWER_RETRY_COUNT = 10
+        private const val QUESTION_ANSWER_RETRY_DELAY_MS = 50L
    }

    private val agentManager = context.getSystemService(AgentManager::class.java)
@@ -128,9 +130,28 @@ class AgentSessionController(context: Context) {

    fun answerQuestion(sessionId: String, answer: String, parentSessionId: String?) {
        val manager = requireAgentManager()
-        manager.answerQuestion(sessionId, answer)
-        if (parentSessionId != null) {
-            manager.publishTrace(parentSessionId, "Answered question for $sessionId: $answer")
+        repeat(QUESTION_ANSWER_RETRY_COUNT) { attempt ->
+            runCatching {
+                manager.answerQuestion(sessionId, answer)
+            }.onSuccess {
+                if (parentSessionId != null) {
+                    manager.publishTrace(parentSessionId, "Answered question for $sessionId: $answer")
+                }
+                return
+            }.onFailure { err ->
+                if (attempt == QUESTION_ANSWER_RETRY_COUNT - 1 || !shouldRetryAnswerQuestion(sessionId, err)) {
+                    throw err
+                }
+                Thread.sleep(QUESTION_ANSWER_RETRY_DELAY_MS)
+            }
+        }
+    }
+
+    fun isSessionWaitingForUser(sessionId: String): Boolean {
+        val manager = agentManager ?: return false
+        return manager.getSessions(currentUserId()).any { session ->
+            session.sessionId == sessionId &&
+                session.state == AgentSessionInfo.STATE_WAITING_FOR_USER
        }
    }

@@ -171,6 +192,14 @@ class AgentSessionController(context: Context) {
        return checkNotNull(agentManager) { "AgentManager unavailable" }
    }

+    private fun shouldRetryAnswerQuestion(
+        sessionId: String,
+        err: Throwable,
+    ): Boolean {
+        return err.message?.contains("not waiting for user input", ignoreCase = true) == true ||
+            !isSessionWaitingForUser(sessionId)
+    }
+
    private fun chooseSelectedSession(
        sessions: List<AgentSessionDetails>,
        focusedSessionId: String?,
--- a/android/app/src/main/java/com/openai/codexd/CodexAgentService.kt
+++ b/android/app/src/main/java/com/openai/codexd/CodexAgentService.kt
@@ -4,7 +4,6 @@ import android.app.agent.AgentManager
 import android.app.agent.AgentService
 import android.app.agent.AgentSessionEvent
 import android.app.agent.AgentSessionInfo
-import android.os.Process
 import android.util.Log
 import java.io.IOException
 import kotlin.concurrent.thread
@@ -12,16 +11,24 @@ import kotlin.concurrent.thread
 class CodexAgentService : AgentService() {
    companion object {
        private const val TAG = "CodexAgentService"
-        private const val BRIDGE_ANSWER_RETRY_COUNT = 10
-        private const val BRIDGE_ANSWER_RETRY_DELAY_MS = 50L
+        private const val AUTO_ANSWER_ESCALATE_PREFIX = "ESCALATE:"
        private const val AUTO_ANSWER_INSTRUCTIONS =
-            "You are Codex acting as the Android Agent supervising a Genie execution. Reply with the exact free-form answer that should be sent back to the Genie. Keep it short and actionable. If the Genie can proceed without extra constraints, reply with exactly: continue"
+            "You are Codex acting as the Android Agent supervising a Genie execution. If you can answer the current Genie question from the available session context, call the framework session tool `android.framework.sessions.answer_question` exactly once with a short free-form answer. You may inspect current framework state with `android.framework.sessions.list`. If user input is required, do not call any framework tool. Instead reply with `ESCALATE: ` followed by the exact question the Agent should ask the user."
        private const val MAX_AUTO_ANSWER_CONTEXT_CHARS = 800
    }

+    private sealed class AutoAnswerResult {
+        data object Answered : AutoAnswerResult()
+
+        data class Escalate(
+            val question: String,
+        ) : AutoAnswerResult()
+    }
+
    private val handledGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
    private val pendingGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
    private val agentManager by lazy { getSystemService(AgentManager::class.java) }
+    private val sessionController by lazy { AgentSessionController(this) }

    override fun onSessionChanged(session: AgentSessionInfo) {
        Log.i(TAG, "onSessionChanged $session")
@@ -36,37 +43,6 @@ class CodexAgentService : AgentService() {
        pendingGenieQuestions.removeIf { it.startsWith("$sessionId:") }
    }

-    private fun answerQuestionWithRetry(manager: AgentManager, sessionId: String, response: String) {
-        repeat(BRIDGE_ANSWER_RETRY_COUNT) { attempt ->
-            runCatching {
-                manager.answerQuestion(sessionId, response)
-            }.onSuccess {
-                return
-            }.onFailure { err ->
-                if (attempt == BRIDGE_ANSWER_RETRY_COUNT - 1 || !isBridgeQuestionPending(manager, sessionId, err)) {
-                    throw err
-                }
-                Thread.sleep(BRIDGE_ANSWER_RETRY_DELAY_MS)
-            }
-        }
-    }
-
-    private fun isSessionWaitingForUser(manager: AgentManager, sessionId: String): Boolean {
-        return manager.getSessions(Process.myUid() / 100000).any { session ->
-            session.sessionId == sessionId &&
-                session.state == AgentSessionInfo.STATE_WAITING_FOR_USER
-        }
-    }
-
-    private fun isBridgeQuestionPending(
-        manager: AgentManager,
-        sessionId: String,
-        err: Throwable,
-    ): Boolean {
-        return err.message?.contains("not waiting for user input", ignoreCase = true) == true ||
-            !isSessionWaitingForUser(manager, sessionId)
-    }
-
    private fun maybeAutoAnswerGenieQuestion(session: AgentSessionInfo) {
        if (session.state != AgentSessionInfo.STATE_WAITING_FOR_USER) {
            return
@@ -81,14 +57,26 @@ class CodexAgentService : AgentService() {
        thread(name = "CodexAgentAutoAnswer-${session.sessionId}") {
            Log.i(TAG, "Attempting Agent auto-answer for ${session.sessionId}")
            runCatching {
-                val answer = requestGenieAutoAnswer(session, question, events)
-                answerQuestionWithRetry(manager, session.sessionId, answer)
-                handledGenieQuestions.add(questionKey)
-                AgentQuestionNotifier.cancel(this, session.sessionId)
-                Log.i(TAG, "Auto-answered Genie question for ${session.sessionId}")
+                when (val result = requestGenieAutoAnswer(session, question, events)) {
+                    AutoAnswerResult.Answered -> {
+                        handledGenieQuestions.add(questionKey)
+                        AgentQuestionNotifier.cancel(this, session.sessionId)
+                        Log.i(TAG, "Auto-answered Genie question for ${session.sessionId}")
+                    }
+                    is AutoAnswerResult.Escalate -> {
+                        if (sessionController.isSessionWaitingForUser(session.sessionId)) {
+                            AgentQuestionNotifier.showQuestion(
+                                context = this,
+                                sessionId = session.sessionId,
+                                targetPackage = session.targetPackage,
+                                question = result.question,
+                            )
+                        }
+                    }
+                }
            }.onFailure { err ->
                Log.i(TAG, "Agent auto-answer unavailable for ${session.sessionId}: ${err.message}")
-                if (isSessionWaitingForUser(manager, session.sessionId)) {
+                if (sessionController.isSessionWaitingForUser(session.sessionId)) {
                    AgentQuestionNotifier.showQuestion(
                        context = this,
                        sessionId = session.sessionId,
@@ -127,16 +115,56 @@ class CodexAgentService : AgentService() {
        session: AgentSessionInfo,
        question: String,
        events: List<AgentSessionEvent>,
-    ): String {
+    ): AutoAnswerResult {
        val runtimeStatus = AgentCodexAppServerClient.readRuntimeStatus(this)
        if (!runtimeStatus.authenticated) {
            throw IOException("Agent runtime is not authenticated")
        }
-        return AgentCodexAppServerClient.requestText(
+        val frameworkToolBridge = AgentFrameworkToolBridge(this, sessionController)
+        var answered = false
+        val response = AgentCodexAppServerClient.requestText(
            context = this,
            instructions = AUTO_ANSWER_INSTRUCTIONS,
            prompt = buildAutoAnswerPrompt(session, question, events),
-        )
+            dynamicTools = frameworkToolBridge.buildQuestionResolutionToolSpecs(),
+            toolCallHandler = { toolName, arguments ->
+                if (
+                    toolName == AgentFrameworkToolBridge.ANSWER_QUESTION_TOOL &&
+                    arguments.optString("sessionId").trim().isEmpty()
+                ) {
+                    arguments.put("sessionId", session.sessionId)
+                }
+                if (
+                    toolName == AgentFrameworkToolBridge.ANSWER_QUESTION_TOOL &&
+                    arguments.optString("parentSessionId").trim().isEmpty() &&
+                    !session.parentSessionId.isNullOrBlank()
+                ) {
+                    arguments.put("parentSessionId", session.parentSessionId)
+                }
+                val toolResult = frameworkToolBridge.handleToolCall(
+                    toolName = toolName,
+                    arguments = arguments,
+                    userObjective = question,
+                    focusedSessionId = session.sessionId,
+                )
+                if (toolName == AgentFrameworkToolBridge.ANSWER_QUESTION_TOOL) {
+                    answered = true
+                }
+                toolResult
+            },
+        ).trim()
+        if (answered) {
+            return AutoAnswerResult.Answered
+        }
+        if (response.startsWith(AUTO_ANSWER_ESCALATE_PREFIX, ignoreCase = true)) {
+            val escalateQuestion = response.substringAfter(':').trim().ifEmpty { question }
+            return AutoAnswerResult.Escalate(escalateQuestion)
+        }
+        if (response.isNotBlank()) {
+            sessionController.answerQuestion(session.sessionId, response, session.parentSessionId)
+            return AutoAnswerResult.Answered
+        }
+        throw IOException("Agent runtime did not return an answer")
    }

    private fun buildAutoAnswerPrompt(
@@ -165,6 +193,7 @@ class CodexAgentService : AgentService() {
        }
        return context.takeLast(MAX_AUTO_ANSWER_CONTEXT_CHARS)
    }
+
    private fun findVisibleQuestion(events: List<AgentSessionEvent>): String? {
        return events.lastOrNull { event ->
            event.type == AgentSessionEvent.TYPE_QUESTION &&
--- a/docs/android-agent-genie-refactor.md
+++ b/docs/android-agent-genie-refactor.md
@@ -49,7 +49,9 @@ The current repo now contains these implementation slices:
 - Non-bridge Genie questions surface through AgentSDK question flow by mapping
  `request_user_input` back into Agent-managed questions and answers.
 - The Agent also attempts to answer Genie questions through its hosted Codex
-  runtime before falling back to notification/UI escalation.
+  runtime before falling back to notification/UI escalation, and now submits
+  those answers through the same framework-session bridge instead of a separate
+  Kotlin-only path.
 - Runtime testing on the emulator shows that the exported Agent Binder service
  is reachable from Genie execution for the current bootstrap calls, while
  direct cross-app access to the Agent-owned abstract socket is not a valid
@@ -135,7 +137,7 @@ foreground-service auth/status surface while this refactor proceeds.
 - Direct session launcher in the Agent UI
 - Agent-side target-package planning with an optional package override
 - Hosted Agent planning via standard Android shell tools already available on-device
- Dedicated framework-session bridge tool for direct Genie-session launch
+- Dedicated framework-session bridge tools for direct Genie-session launch and question resolution
 - Framework session inspection UI in the Agent app
 - Question answering and detached-target attach controls
 - Exported Binder bridge request handling in `CodexAgentBridgeService`