mirror of
https://github.com/openai/codex.git
synced 2026-04-24 06:35:50 +00:00
Allow Android planner questions
Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -125,6 +125,15 @@ stub SDK docs and the local refactor doc:
|
||||
- if delegated rendering is unavailable, the framework may post a generic
|
||||
fallback notification, so app-side notification code must remain
|
||||
token-aware and idempotent
|
||||
- Direct AGENT planning can ask user-facing questions before any child Genie is
|
||||
created:
|
||||
- hosted planner `request_user_input` calls are stored by the Agent, the
|
||||
parent session is moved to `WAITING_FOR_USER`, and the parent Agent icon /
|
||||
notification becomes the user-facing question surface
|
||||
- answering the parent question resolves the pending planner tool request so
|
||||
the same planner turn can continue and produce the child-Genie plan
|
||||
- child Genie questions remain separate child-session questions that roll up
|
||||
through the parent session when user escalation is needed
|
||||
- HOME icon / notification taps for question or final-result states should route
|
||||
to `SessionPopupActivity`, which uses one dialog-style popup shape for both
|
||||
question answering and result follow-up.
|
||||
|
||||
@@ -221,6 +221,8 @@ internal class AgentPlannerDesktopSessionHost(
|
||||
CodexCliBinaryLocator.resolve(context).absolutePath,
|
||||
"-c",
|
||||
"enable_request_compression=false",
|
||||
"-c",
|
||||
"features.default_mode_request_user_input=true",
|
||||
"app-server",
|
||||
"--listen",
|
||||
"stdio://",
|
||||
@@ -303,11 +305,23 @@ internal class AgentPlannerDesktopSessionHost(
|
||||
val method = message.optString("method")
|
||||
when (method) {
|
||||
"item/tool/requestUserInput" -> {
|
||||
sendError(
|
||||
requestId = requestId,
|
||||
code = -32601,
|
||||
message = "Planner desktop attach does not support request_user_input yet",
|
||||
)
|
||||
val questions = message.optJSONObject("params")?.optJSONArray("questions") ?: JSONArray()
|
||||
val result = runCatching {
|
||||
AgentPlannerQuestionRegistry.requestUserInput(
|
||||
context = context,
|
||||
sessionController = sessionController,
|
||||
sessionId = sessionId,
|
||||
questions = questions,
|
||||
)
|
||||
}.getOrElse { err ->
|
||||
sendError(
|
||||
requestId = requestId,
|
||||
code = -32000,
|
||||
message = err.message ?: "Planner user input request failed",
|
||||
)
|
||||
return
|
||||
}
|
||||
sendResult(requestId, result)
|
||||
}
|
||||
else -> {
|
||||
sendError(
|
||||
|
||||
@@ -0,0 +1,93 @@
|
||||
package com.openai.codex.agent
|
||||
|
||||
import android.app.agent.AgentManager
|
||||
import android.app.agent.AgentSessionInfo
|
||||
import android.content.Context
|
||||
import android.util.Log
|
||||
import java.io.IOException
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
import org.json.JSONArray
|
||||
import org.json.JSONObject
|
||||
|
||||
object AgentPlannerQuestionRegistry {
|
||||
private const val TAG = "AgentPlannerQuestionRegistry"
|
||||
|
||||
private data class PendingPlannerQuestion(
|
||||
val questions: JSONArray,
|
||||
val renderedQuestion: String,
|
||||
val responses: LinkedBlockingQueue<PendingPlannerQuestionResponse> = LinkedBlockingQueue(1),
|
||||
)
|
||||
|
||||
private data class PendingPlannerQuestionResponse(
|
||||
val answer: JSONObject? = null,
|
||||
val error: IOException? = null,
|
||||
)
|
||||
|
||||
private val pendingQuestions = ConcurrentHashMap<String, PendingPlannerQuestion>()
|
||||
|
||||
fun requestUserInput(
|
||||
context: Context,
|
||||
sessionController: AgentSessionController,
|
||||
sessionId: String,
|
||||
questions: JSONArray,
|
||||
): JSONObject {
|
||||
val appContext = context.applicationContext
|
||||
val manager = appContext.getSystemService(AgentManager::class.java)
|
||||
?: throw IOException("AgentManager unavailable for planner question")
|
||||
val pendingQuestion = PendingPlannerQuestion(
|
||||
questions = JSONArray(questions.toString()),
|
||||
renderedQuestion = AgentUserInputPrompter.renderQuestions(questions),
|
||||
)
|
||||
pendingQuestions.put(sessionId, pendingQuestion)?.responses?.offer(
|
||||
PendingPlannerQuestionResponse(error = IOException("Planner question superseded")),
|
||||
)
|
||||
runCatching {
|
||||
manager.publishTrace(sessionId, "Planner requested user input before delegating to Genies.")
|
||||
}.onFailure { err ->
|
||||
Log.w(TAG, "Failed to publish planner question trace for $sessionId", err)
|
||||
}
|
||||
manager.updateSessionState(sessionId, AgentSessionInfo.STATE_WAITING_FOR_USER)
|
||||
return try {
|
||||
val response = pendingQuestion.responses.take()
|
||||
response.error?.let { throw it }
|
||||
response.answer ?: throw IOException("Planner question completed without an answer")
|
||||
} catch (err: InterruptedException) {
|
||||
Thread.currentThread().interrupt()
|
||||
throw IOException("Interrupted while waiting for planner question answer", err)
|
||||
} finally {
|
||||
pendingQuestions.remove(sessionId, pendingQuestion)
|
||||
if (!sessionController.isTerminalSession(sessionId)) {
|
||||
runCatching {
|
||||
manager.updateSessionState(sessionId, AgentSessionInfo.STATE_RUNNING)
|
||||
}.onFailure { err ->
|
||||
Log.w(TAG, "Failed to restore planner session state for $sessionId", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fun answerQuestion(
|
||||
sessionId: String,
|
||||
answer: String,
|
||||
): Boolean {
|
||||
val pendingQuestion = pendingQuestions[sessionId] ?: return false
|
||||
val answerJson = JSONObject().put(
|
||||
"answers",
|
||||
AgentUserInputPrompter.buildQuestionAnswers(pendingQuestion.questions, answer),
|
||||
)
|
||||
pendingQuestion.responses.offer(PendingPlannerQuestionResponse(answer = answerJson))
|
||||
return true
|
||||
}
|
||||
|
||||
fun cancelQuestion(
|
||||
sessionId: String,
|
||||
reason: String,
|
||||
) {
|
||||
pendingQuestions.remove(sessionId)?.responses?.offer(
|
||||
PendingPlannerQuestionResponse(error = IOException(reason)),
|
||||
)
|
||||
}
|
||||
|
||||
fun latestQuestion(sessionId: String): String? = pendingQuestions[sessionId]?.renderedQuestion
|
||||
}
|
||||
@@ -185,6 +185,8 @@ object AgentPlannerRuntimeManager {
|
||||
CodexCliBinaryLocator.resolve(context).absolutePath,
|
||||
"-c",
|
||||
"enable_request_compression=false",
|
||||
"-c",
|
||||
"features.default_mode_request_user_input=true",
|
||||
"app-server",
|
||||
"--listen",
|
||||
"stdio://",
|
||||
|
||||
@@ -117,6 +117,14 @@ class AgentSessionController(context: Context) {
|
||||
sessionDetails = sessionDetails.map { session ->
|
||||
diagnosticsBySessionId[session.sessionId]?.let(session::withDiagnostics) ?: session
|
||||
}
|
||||
sessionDetails = sessionDetails.map { session ->
|
||||
AgentPlannerQuestionRegistry.latestQuestion(session.sessionId)?.let { plannerQuestion ->
|
||||
session.copy(
|
||||
latestQuestion = plannerQuestion,
|
||||
latestTrace = session.latestTrace ?: "Planner waiting for user input.",
|
||||
)
|
||||
} ?: session
|
||||
}
|
||||
sessionDetails = deriveDirectParentUiState(sessionDetails)
|
||||
val selectedSession = chooseSelectedSession(sessionDetails, focusedSessionId)
|
||||
val parentSession = findParentSession(sessionDetails, selectedSession)
|
||||
@@ -531,6 +539,12 @@ class AgentSessionController(context: Context) {
|
||||
|
||||
fun answerQuestion(sessionId: String, answer: String, parentSessionId: String?) {
|
||||
val manager = requireAgentManager()
|
||||
if (AgentPlannerQuestionRegistry.answerQuestion(sessionId, answer)) {
|
||||
manager.publishTrace(sessionId, "Answered planner question: $answer")
|
||||
manager.updateSessionState(sessionId, AgentSessionInfo.STATE_RUNNING)
|
||||
AgentQuestionNotifier.cancel(appContext, sessionId)
|
||||
return
|
||||
}
|
||||
repeat(QUESTION_ANSWER_RETRY_COUNT) { attempt ->
|
||||
runCatching {
|
||||
manager.answerQuestion(sessionId, answer)
|
||||
@@ -562,6 +576,12 @@ class AgentSessionController(context: Context) {
|
||||
parentSessionId: String?,
|
||||
) {
|
||||
val manager = requireAgentManager()
|
||||
if (AgentPlannerQuestionRegistry.answerQuestion(sessionId, answer)) {
|
||||
manager.publishTrace(sessionId, "Answered planner question from notification: $answer")
|
||||
manager.updateSessionState(sessionId, AgentSessionInfo.STATE_RUNNING)
|
||||
runCatching { manager.ackSessionNotification(sessionId, notificationToken) }
|
||||
return
|
||||
}
|
||||
manager.answerQuestionFromNotification(sessionId, notificationToken, answer)
|
||||
if (parentSessionId != null) {
|
||||
manager.publishTrace(parentSessionId, "Answered question for $sessionId: $answer")
|
||||
@@ -767,6 +787,9 @@ class AgentSessionController(context: Context) {
|
||||
if (!isDirectParentSession(session)) {
|
||||
return@map session
|
||||
}
|
||||
if (!session.latestQuestion.isNullOrBlank()) {
|
||||
return@map session
|
||||
}
|
||||
val childSessions = childrenByParent[session.sessionId].orEmpty()
|
||||
if (childSessions.isEmpty()) {
|
||||
return@map session
|
||||
|
||||
@@ -144,12 +144,20 @@ object AgentSessionLauncher {
|
||||
val applicationContext = context.applicationContext
|
||||
thread(name = "CodexAgentPlanner-${pendingSession.parentSessionId}") {
|
||||
runCatching {
|
||||
val effectiveRequestUserInputHandler = requestUserInputHandler ?: { questions: JSONArray ->
|
||||
AgentPlannerQuestionRegistry.requestUserInput(
|
||||
context = applicationContext,
|
||||
sessionController = sessionController,
|
||||
sessionId = pendingSession.parentSessionId,
|
||||
questions = questions,
|
||||
)
|
||||
}
|
||||
AgentTaskPlanner.planSession(
|
||||
context = applicationContext,
|
||||
userObjective = prompt,
|
||||
executionSettings = executionSettings,
|
||||
sessionController = sessionController,
|
||||
requestUserInputHandler = null,
|
||||
requestUserInputHandler = effectiveRequestUserInputHandler,
|
||||
frameworkSessionId = pendingSession.parentSessionId,
|
||||
)
|
||||
}.onFailure { err ->
|
||||
|
||||
@@ -62,6 +62,7 @@ object AgentTaskPlanner {
|
||||
- Verify each chosen package by inspecting focused query-activities or resolve-activity output before returning it.
|
||||
- Only choose packages that directly own the requested app behavior. Never choose helper packages such as `com.android.shell`, `com.android.systemui`, or the Codex Agent/Genie packages unless the user explicitly asked for them.
|
||||
- If the user objective already names a specific installed package, use it directly after verification.
|
||||
- If the user objective is too ambiguous to choose target packages or delegated objectives safely, call request_user_input before returning JSON. Do not guess a task for vague prompts like "do something".
|
||||
- `pm list packages PACKAGE_NAME` alone is not sufficient verification.
|
||||
- Prefer focused verification commands such as `pm list packages clock`, `cmd package query-activities --brief -p PACKAGE -a android.intent.action.MAIN`, and `cmd package resolve-activity --brief -a RELEVANT_ACTION PACKAGE`.
|
||||
- Do not enumerate every launcher activity on the device. Query specific candidate packages instead.
|
||||
@@ -152,12 +153,20 @@ object AgentTaskPlanner {
|
||||
executionSettings = executionSettings,
|
||||
)
|
||||
val sessionStartResult = try {
|
||||
val effectiveRequestUserInputHandler = requestUserInputHandler ?: { questions: JSONArray ->
|
||||
AgentPlannerQuestionRegistry.requestUserInput(
|
||||
context = context,
|
||||
sessionController = sessionController,
|
||||
sessionId = pendingSession.parentSessionId,
|
||||
questions = questions,
|
||||
)
|
||||
}
|
||||
val request = planSession(
|
||||
context = context,
|
||||
userObjective = userObjective,
|
||||
executionSettings = executionSettings,
|
||||
sessionController = sessionController,
|
||||
requestUserInputHandler = requestUserInputHandler,
|
||||
requestUserInputHandler = effectiveRequestUserInputHandler,
|
||||
frameworkSessionId = pendingSession.parentSessionId,
|
||||
)
|
||||
sessionController.startDirectSessionChildren(
|
||||
|
||||
@@ -41,6 +41,10 @@ class CodexAgentService : AgentService() {
|
||||
}
|
||||
}
|
||||
if (isTerminalSessionState(session.state) && !DesktopInspectionRegistry.isPlannerAttached(session.sessionId)) {
|
||||
AgentPlannerQuestionRegistry.cancelQuestion(
|
||||
sessionId = session.sessionId,
|
||||
reason = "Planner session ended before the question was answered",
|
||||
)
|
||||
AgentPlannerRuntimeManager.closeSession(session.sessionId)
|
||||
}
|
||||
if (session.state != AgentSessionInfo.STATE_WAITING_FOR_USER) {
|
||||
@@ -63,6 +67,10 @@ class CodexAgentService : AgentService() {
|
||||
AgentSessionBridgeServer.closeSession(sessionId)
|
||||
AgentPlannerRuntimeManager.closeSession(sessionId)
|
||||
DesktopInspectionRegistry.removeSession(sessionId)
|
||||
AgentPlannerQuestionRegistry.cancelQuestion(
|
||||
sessionId = sessionId,
|
||||
reason = "Planner session was removed before the question was answered",
|
||||
)
|
||||
AgentQuestionNotifier.cancel(this, sessionId)
|
||||
AgentQuestionNotifier.clearSessionState(sessionId)
|
||||
presentationPolicyStore.removePolicy(sessionId)
|
||||
@@ -283,12 +291,14 @@ class CodexAgentService : AgentService() {
|
||||
}
|
||||
return@thread
|
||||
}
|
||||
val effectiveNotificationText =
|
||||
AgentPlannerQuestionRegistry.latestQuestion(session.sessionId) ?: notificationText
|
||||
val posted = runCatching {
|
||||
AgentQuestionNotifier.showOrUpdateDelegatedNotification(
|
||||
context = this,
|
||||
session = session,
|
||||
notificationToken = notificationToken,
|
||||
notificationText = notificationText,
|
||||
notificationText = effectiveNotificationText,
|
||||
)
|
||||
}.onFailure { err ->
|
||||
Log.w(
|
||||
|
||||
@@ -335,9 +335,6 @@ class CreateSessionActivity : Activity() {
|
||||
existingSessionId = existingSessionId,
|
||||
),
|
||||
sessionController = sessionController,
|
||||
requestUserInputHandler = { questions ->
|
||||
AgentUserInputPrompter.promptForAnswers(this, questions)
|
||||
},
|
||||
)
|
||||
}.onFailure { err ->
|
||||
runOnUiThread {
|
||||
|
||||
@@ -207,9 +207,6 @@ class MainActivity : Activity() {
|
||||
finalPresentationPolicyOverride = finalPresentationPolicy,
|
||||
executionSettings = SessionExecutionSettings.default,
|
||||
sessionController = agentSessionController,
|
||||
requestUserInputHandler = { questions ->
|
||||
AgentUserInputPrompter.promptForAnswers(this, questions)
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -126,6 +126,16 @@ The current repo now contains these implementation slices:
|
||||
- pressing `Done` on a direct-parent result consumes only the parent HOME icon
|
||||
via `consumeHomeSessionPresentation(parentSessionId)` and leaves the parent
|
||||
session inspectable in `Codex Manager`
|
||||
- Planner `request_user_input` calls now pause the direct parent session before
|
||||
any child Genie needs to exist:
|
||||
- the Agent stores the pending planner request, moves the parent framework
|
||||
session to `WAITING_FOR_USER`, and uses the parent Agent HOME icon /
|
||||
notification as the question surface
|
||||
- answering the parent question resolves the hosted planner tool request and
|
||||
returns the parent session to `RUNNING` so the same planner turn can finish
|
||||
selecting packages and delegated Genie objectives
|
||||
- child Genie questions are still represented as child-session questions and
|
||||
roll up to the parent only when they need user escalation
|
||||
- Codex Agent still uses `cancelSession(sessionId)` for user-driven cancellation
|
||||
because it is the AGENT-role app, not a HOME-role surface. The
|
||||
HOME-only `cancelHomeSession(sessionId)` API is reserved for Launcher/HOME
|
||||
@@ -317,7 +327,7 @@ the Android Agent/Genie flow.
|
||||
- `android/app/src/main/java/com/openai/codex/agent/SessionPopupActivity.kt`
|
||||
- dialog-style question/result surfaces for HOME icon and notification
|
||||
entrypoints; question cancel only dismisses the dialog, while final results
|
||||
render in a scrollable text view with an OK button that clears HOME
|
||||
render in a scrollable text view with a Done button that clears HOME
|
||||
presentation state for top-level HOME sessions
|
||||
- `android/app/src/main/java/com/openai/codex/agent/AgentQuestionNotifier.kt`
|
||||
- token-aware Agent-side renderer for delegated framework notifications,
|
||||
@@ -325,7 +335,12 @@ the Android Agent/Genie flow.
|
||||
- `android/app/src/main/java/com/openai/codex/agent/AgentNotificationReplyReceiver.kt`
|
||||
- inline reply receiver for Agent-rendered question notifications
|
||||
- `android/app/src/main/java/com/openai/codex/agent/AgentUserInputPrompter.kt`
|
||||
- Android dialog bridge for hosted Agent `request_user_input` calls
|
||||
- shared rendering and answer encoding helpers for hosted `request_user_input`
|
||||
calls
|
||||
- `android/app/src/main/java/com/openai/codex/agent/AgentPlannerQuestionRegistry.kt`
|
||||
- parent-session question registry that pauses hosted planner
|
||||
`request_user_input` calls until the user answers through Agent UI or
|
||||
notification surfaces
|
||||
- `android/genie/src/main/java/com/openai/codex/genie/CodexGenieService.kt`
|
||||
- Genie lifecycle host for the embedded `codex app-server`
|
||||
- `android/genie/src/main/java/com/openai/codex/genie/CodexAppServerHost.kt`
|
||||
|
||||
Reference in New Issue
Block a user