Allow Android planner questions

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Iliyan Malchev
2026-04-05 20:45:08 -07:00
parent f4f9acccb5
commit 1bc1fce1a6
11 changed files with 193 additions and 16 deletions

View File

@@ -125,6 +125,15 @@ stub SDK docs and the local refactor doc:
- if delegated rendering is unavailable, the framework may post a generic
fallback notification, so app-side notification code must remain
token-aware and idempotent
- Direct AGENT planning can ask user-facing questions before any child Genie is
created:
- hosted planner `request_user_input` calls are stored by the Agent, the
parent session is moved to `WAITING_FOR_USER`, and the parent Agent icon /
notification becomes the user-facing question surface
- answering the parent question resolves the pending planner tool request so
the same planner turn can continue and produce the child-Genie plan
- child Genie questions remain separate child-session questions that roll up
through the parent session when user escalation is needed
- HOME icon / notification taps for question or final-result states should route
to `SessionPopupActivity`, which uses one dialog-style popup shape for both
question answering and result follow-up.

View File

@@ -221,6 +221,8 @@ internal class AgentPlannerDesktopSessionHost(
CodexCliBinaryLocator.resolve(context).absolutePath,
"-c",
"enable_request_compression=false",
"-c",
"features.default_mode_request_user_input=true",
"app-server",
"--listen",
"stdio://",
@@ -303,11 +305,23 @@ internal class AgentPlannerDesktopSessionHost(
val method = message.optString("method")
when (method) {
"item/tool/requestUserInput" -> {
sendError(
requestId = requestId,
code = -32601,
message = "Planner desktop attach does not support request_user_input yet",
)
val questions = message.optJSONObject("params")?.optJSONArray("questions") ?: JSONArray()
val result = runCatching {
AgentPlannerQuestionRegistry.requestUserInput(
context = context,
sessionController = sessionController,
sessionId = sessionId,
questions = questions,
)
}.getOrElse { err ->
sendError(
requestId = requestId,
code = -32000,
message = err.message ?: "Planner user input request failed",
)
return
}
sendResult(requestId, result)
}
else -> {
sendError(

View File

@@ -0,0 +1,93 @@
package com.openai.codex.agent
import android.app.agent.AgentManager
import android.app.agent.AgentSessionInfo
import android.content.Context
import android.util.Log
import java.io.IOException
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.LinkedBlockingQueue
import org.json.JSONArray
import org.json.JSONObject
object AgentPlannerQuestionRegistry {
private const val TAG = "AgentPlannerQuestionRegistry"
private data class PendingPlannerQuestion(
val questions: JSONArray,
val renderedQuestion: String,
val responses: LinkedBlockingQueue<PendingPlannerQuestionResponse> = LinkedBlockingQueue(1),
)
private data class PendingPlannerQuestionResponse(
val answer: JSONObject? = null,
val error: IOException? = null,
)
private val pendingQuestions = ConcurrentHashMap<String, PendingPlannerQuestion>()
fun requestUserInput(
context: Context,
sessionController: AgentSessionController,
sessionId: String,
questions: JSONArray,
): JSONObject {
val appContext = context.applicationContext
val manager = appContext.getSystemService(AgentManager::class.java)
?: throw IOException("AgentManager unavailable for planner question")
val pendingQuestion = PendingPlannerQuestion(
questions = JSONArray(questions.toString()),
renderedQuestion = AgentUserInputPrompter.renderQuestions(questions),
)
pendingQuestions.put(sessionId, pendingQuestion)?.responses?.offer(
PendingPlannerQuestionResponse(error = IOException("Planner question superseded")),
)
runCatching {
manager.publishTrace(sessionId, "Planner requested user input before delegating to Genies.")
}.onFailure { err ->
Log.w(TAG, "Failed to publish planner question trace for $sessionId", err)
}
manager.updateSessionState(sessionId, AgentSessionInfo.STATE_WAITING_FOR_USER)
return try {
val response = pendingQuestion.responses.take()
response.error?.let { throw it }
response.answer ?: throw IOException("Planner question completed without an answer")
} catch (err: InterruptedException) {
Thread.currentThread().interrupt()
throw IOException("Interrupted while waiting for planner question answer", err)
} finally {
pendingQuestions.remove(sessionId, pendingQuestion)
if (!sessionController.isTerminalSession(sessionId)) {
runCatching {
manager.updateSessionState(sessionId, AgentSessionInfo.STATE_RUNNING)
}.onFailure { err ->
Log.w(TAG, "Failed to restore planner session state for $sessionId", err)
}
}
}
}
fun answerQuestion(
sessionId: String,
answer: String,
): Boolean {
val pendingQuestion = pendingQuestions[sessionId] ?: return false
val answerJson = JSONObject().put(
"answers",
AgentUserInputPrompter.buildQuestionAnswers(pendingQuestion.questions, answer),
)
pendingQuestion.responses.offer(PendingPlannerQuestionResponse(answer = answerJson))
return true
}
fun cancelQuestion(
sessionId: String,
reason: String,
) {
pendingQuestions.remove(sessionId)?.responses?.offer(
PendingPlannerQuestionResponse(error = IOException(reason)),
)
}
fun latestQuestion(sessionId: String): String? = pendingQuestions[sessionId]?.renderedQuestion
}

View File

@@ -185,6 +185,8 @@ object AgentPlannerRuntimeManager {
CodexCliBinaryLocator.resolve(context).absolutePath,
"-c",
"enable_request_compression=false",
"-c",
"features.default_mode_request_user_input=true",
"app-server",
"--listen",
"stdio://",

View File

@@ -117,6 +117,14 @@ class AgentSessionController(context: Context) {
sessionDetails = sessionDetails.map { session ->
diagnosticsBySessionId[session.sessionId]?.let(session::withDiagnostics) ?: session
}
sessionDetails = sessionDetails.map { session ->
AgentPlannerQuestionRegistry.latestQuestion(session.sessionId)?.let { plannerQuestion ->
session.copy(
latestQuestion = plannerQuestion,
latestTrace = session.latestTrace ?: "Planner waiting for user input.",
)
} ?: session
}
sessionDetails = deriveDirectParentUiState(sessionDetails)
val selectedSession = chooseSelectedSession(sessionDetails, focusedSessionId)
val parentSession = findParentSession(sessionDetails, selectedSession)
@@ -531,6 +539,12 @@ class AgentSessionController(context: Context) {
fun answerQuestion(sessionId: String, answer: String, parentSessionId: String?) {
val manager = requireAgentManager()
if (AgentPlannerQuestionRegistry.answerQuestion(sessionId, answer)) {
manager.publishTrace(sessionId, "Answered planner question: $answer")
manager.updateSessionState(sessionId, AgentSessionInfo.STATE_RUNNING)
AgentQuestionNotifier.cancel(appContext, sessionId)
return
}
repeat(QUESTION_ANSWER_RETRY_COUNT) { attempt ->
runCatching {
manager.answerQuestion(sessionId, answer)
@@ -562,6 +576,12 @@ class AgentSessionController(context: Context) {
parentSessionId: String?,
) {
val manager = requireAgentManager()
if (AgentPlannerQuestionRegistry.answerQuestion(sessionId, answer)) {
manager.publishTrace(sessionId, "Answered planner question from notification: $answer")
manager.updateSessionState(sessionId, AgentSessionInfo.STATE_RUNNING)
runCatching { manager.ackSessionNotification(sessionId, notificationToken) }
return
}
manager.answerQuestionFromNotification(sessionId, notificationToken, answer)
if (parentSessionId != null) {
manager.publishTrace(parentSessionId, "Answered question for $sessionId: $answer")
@@ -767,6 +787,9 @@ class AgentSessionController(context: Context) {
if (!isDirectParentSession(session)) {
return@map session
}
if (!session.latestQuestion.isNullOrBlank()) {
return@map session
}
val childSessions = childrenByParent[session.sessionId].orEmpty()
if (childSessions.isEmpty()) {
return@map session

View File

@@ -144,12 +144,20 @@ object AgentSessionLauncher {
val applicationContext = context.applicationContext
thread(name = "CodexAgentPlanner-${pendingSession.parentSessionId}") {
runCatching {
val effectiveRequestUserInputHandler = requestUserInputHandler ?: { questions: JSONArray ->
AgentPlannerQuestionRegistry.requestUserInput(
context = applicationContext,
sessionController = sessionController,
sessionId = pendingSession.parentSessionId,
questions = questions,
)
}
AgentTaskPlanner.planSession(
context = applicationContext,
userObjective = prompt,
executionSettings = executionSettings,
sessionController = sessionController,
requestUserInputHandler = null,
requestUserInputHandler = effectiveRequestUserInputHandler,
frameworkSessionId = pendingSession.parentSessionId,
)
}.onFailure { err ->

View File

@@ -62,6 +62,7 @@ object AgentTaskPlanner {
- Verify each chosen package by inspecting focused query-activities or resolve-activity output before returning it.
- Only choose packages that directly own the requested app behavior. Never choose helper packages such as `com.android.shell`, `com.android.systemui`, or the Codex Agent/Genie packages unless the user explicitly asked for them.
- If the user objective already names a specific installed package, use it directly after verification.
- If the user objective is too ambiguous to choose target packages or delegated objectives safely, call request_user_input before returning JSON. Do not guess a task for vague prompts like "do something".
- `pm list packages PACKAGE_NAME` alone is not sufficient verification.
- Prefer focused verification commands such as `pm list packages clock`, `cmd package query-activities --brief -p PACKAGE -a android.intent.action.MAIN`, and `cmd package resolve-activity --brief -a RELEVANT_ACTION PACKAGE`.
- Do not enumerate every launcher activity on the device. Query specific candidate packages instead.
@@ -152,12 +153,20 @@ object AgentTaskPlanner {
executionSettings = executionSettings,
)
val sessionStartResult = try {
val effectiveRequestUserInputHandler = requestUserInputHandler ?: { questions: JSONArray ->
AgentPlannerQuestionRegistry.requestUserInput(
context = context,
sessionController = sessionController,
sessionId = pendingSession.parentSessionId,
questions = questions,
)
}
val request = planSession(
context = context,
userObjective = userObjective,
executionSettings = executionSettings,
sessionController = sessionController,
requestUserInputHandler = requestUserInputHandler,
requestUserInputHandler = effectiveRequestUserInputHandler,
frameworkSessionId = pendingSession.parentSessionId,
)
sessionController.startDirectSessionChildren(

View File

@@ -41,6 +41,10 @@ class CodexAgentService : AgentService() {
}
}
if (isTerminalSessionState(session.state) && !DesktopInspectionRegistry.isPlannerAttached(session.sessionId)) {
AgentPlannerQuestionRegistry.cancelQuestion(
sessionId = session.sessionId,
reason = "Planner session ended before the question was answered",
)
AgentPlannerRuntimeManager.closeSession(session.sessionId)
}
if (session.state != AgentSessionInfo.STATE_WAITING_FOR_USER) {
@@ -63,6 +67,10 @@ class CodexAgentService : AgentService() {
AgentSessionBridgeServer.closeSession(sessionId)
AgentPlannerRuntimeManager.closeSession(sessionId)
DesktopInspectionRegistry.removeSession(sessionId)
AgentPlannerQuestionRegistry.cancelQuestion(
sessionId = sessionId,
reason = "Planner session was removed before the question was answered",
)
AgentQuestionNotifier.cancel(this, sessionId)
AgentQuestionNotifier.clearSessionState(sessionId)
presentationPolicyStore.removePolicy(sessionId)
@@ -283,12 +291,14 @@ class CodexAgentService : AgentService() {
}
return@thread
}
val effectiveNotificationText =
AgentPlannerQuestionRegistry.latestQuestion(session.sessionId) ?: notificationText
val posted = runCatching {
AgentQuestionNotifier.showOrUpdateDelegatedNotification(
context = this,
session = session,
notificationToken = notificationToken,
notificationText = notificationText,
notificationText = effectiveNotificationText,
)
}.onFailure { err ->
Log.w(

View File

@@ -335,9 +335,6 @@ class CreateSessionActivity : Activity() {
existingSessionId = existingSessionId,
),
sessionController = sessionController,
requestUserInputHandler = { questions ->
AgentUserInputPrompter.promptForAnswers(this, questions)
},
)
}.onFailure { err ->
runOnUiThread {

View File

@@ -207,9 +207,6 @@ class MainActivity : Activity() {
finalPresentationPolicyOverride = finalPresentationPolicy,
executionSettings = SessionExecutionSettings.default,
sessionController = agentSessionController,
requestUserInputHandler = { questions ->
AgentUserInputPrompter.promptForAnswers(this, questions)
},
)
}
}

View File

@@ -126,6 +126,16 @@ The current repo now contains these implementation slices:
- pressing `Done` on a direct-parent result consumes only the parent HOME icon
via `consumeHomeSessionPresentation(parentSessionId)` and leaves the parent
session inspectable in `Codex Manager`
- Planner `request_user_input` calls now pause the direct parent session before
any child Genie needs to exist:
- the Agent stores the pending planner request, moves the parent framework
session to `WAITING_FOR_USER`, and uses the parent Agent HOME icon /
notification as the question surface
- answering the parent question resolves the hosted planner tool request and
returns the parent session to `RUNNING` so the same planner turn can finish
selecting packages and delegated Genie objectives
- child Genie questions are still represented as child-session questions and
roll up to the parent only when they need user escalation
- Codex Agent still uses `cancelSession(sessionId)` for user-driven cancellation
because it is the AGENT-role app, not a HOME-role surface. The
HOME-only `cancelHomeSession(sessionId)` API is reserved for Launcher/HOME
@@ -317,7 +327,7 @@ the Android Agent/Genie flow.
- `android/app/src/main/java/com/openai/codex/agent/SessionPopupActivity.kt`
- dialog-style question/result surfaces for HOME icon and notification
entrypoints; question cancel only dismisses the dialog, while final results
render in a scrollable text view with an OK button that clears HOME
render in a scrollable text view with a Done button that clears HOME
presentation state for top-level HOME sessions
- `android/app/src/main/java/com/openai/codex/agent/AgentQuestionNotifier.kt`
- token-aware Agent-side renderer for delegated framework notifications,
@@ -325,7 +335,12 @@ the Android Agent/Genie flow.
- `android/app/src/main/java/com/openai/codex/agent/AgentNotificationReplyReceiver.kt`
- inline reply receiver for Agent-rendered question notifications
- `android/app/src/main/java/com/openai/codex/agent/AgentUserInputPrompter.kt`
- Android dialog bridge for hosted Agent `request_user_input` calls
- shared rendering and answer encoding helpers for hosted `request_user_input`
calls
- `android/app/src/main/java/com/openai/codex/agent/AgentPlannerQuestionRegistry.kt`
- parent-session question registry that pauses hosted planner
`request_user_input` calls until the user answers through Agent UI or
notification surfaces
- `android/genie/src/main/java/com/openai/codex/genie/CodexGenieService.kt`
- Genie lifecycle host for the embedded `codex app-server`
- `android/genie/src/main/java/com/openai/codex/genie/CodexAppServerHost.kt`