Let Agent answer Genie questions through codexd

Teach the Agent service to answer non-bridge Genie questions through the embedded codexd runtime, and upgrade the Genie scaffold to loop over QUESTION:/RESULT: turns instead of assuming a single user reply. The current bridge now sends Responses input in list form, which fixes the last observed backend validation error.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Iliyan Malchev
2026-03-19 08:39:18 -07:00
parent 6acd1f7473
commit 1340af08aa
5 changed files with 321 additions and 46 deletions

View File

@@ -6,7 +6,9 @@ import android.app.agent.AgentSessionEvent
import android.app.agent.AgentSessionInfo
import android.os.Process
import android.util.Log
import org.json.JSONArray
import org.json.JSONObject
import java.io.IOException
import java.util.concurrent.ConcurrentHashMap
import kotlin.concurrent.thread
@@ -19,20 +21,28 @@ class CodexAgentService : AgentService() {
private const val BRIDGE_RESPONSE_PREFIX = "__codex_bridge_result__ "
private const val METHOD_GET_AUTH_STATUS = "get_auth_status"
private const val METHOD_HTTP_REQUEST = "http_request"
private const val AUTO_ANSWER_INSTRUCTIONS =
"You are Codex acting as the Android Agent supervising a Genie execution. Reply with the exact free-form answer that should be sent back to the Genie. Keep it short and actionable. If the Genie can proceed without extra constraints, reply with exactly: continue"
private const val MAX_AUTO_ANSWER_CONTEXT_CHARS = 800
}
private val handledBridgeRequests = ConcurrentHashMap.newKeySet<String>()
private val handledGenieQuestions = ConcurrentHashMap.newKeySet<String>()
private val pendingGenieQuestions = ConcurrentHashMap.newKeySet<String>()
private val agentManager by lazy { getSystemService(AgentManager::class.java) }
override fun onSessionChanged(session: AgentSessionInfo) {
Log.i(TAG, "onSessionChanged $session")
handleInternalBridgeQuestion(session.sessionId)
maybeAutoAnswerGenieQuestion(session)
updateQuestionNotification(session)
}
override fun onSessionRemoved(sessionId: String) {
Log.i(TAG, "onSessionRemoved sessionId=$sessionId")
AgentQuestionNotifier.cancel(this, sessionId)
handledGenieQuestions.removeIf { it.startsWith("$sessionId:") }
pendingGenieQuestions.removeIf { it.startsWith("$sessionId:") }
}
private fun handleInternalBridgeQuestion(sessionId: String) {
@@ -115,7 +125,7 @@ class CodexAgentService : AgentService() {
}
runCatching {
answerBridgeQuestion(manager, sessionId, "$BRIDGE_RESPONSE_PREFIX$response")
answerQuestionWithRetry(manager, sessionId, "$BRIDGE_RESPONSE_PREFIX$response")
}.onFailure { err ->
handledBridgeRequests.remove(requestKey)
Log.w(TAG, "Failed to answer bridge question for $sessionId", err)
@@ -123,7 +133,7 @@ class CodexAgentService : AgentService() {
}
}
private fun answerBridgeQuestion(manager: AgentManager, sessionId: String, response: String) {
private fun answerQuestionWithRetry(manager: AgentManager, sessionId: String, response: String) {
repeat(BRIDGE_ANSWER_RETRY_COUNT) { attempt ->
runCatching {
manager.answerQuestion(sessionId, response)
@@ -169,22 +179,54 @@ class CodexAgentService : AgentService() {
!isSessionWaitingForUser(manager, sessionId)
}
private fun maybeAutoAnswerGenieQuestion(session: AgentSessionInfo) {
if (session.state != AgentSessionInfo.STATE_WAITING_FOR_USER) {
return
}
val manager = agentManager ?: return
val events = manager.getSessionEvents(session.sessionId)
val question = findVisibleQuestion(events) ?: return
val questionKey = genieQuestionKey(session.sessionId, question)
if (handledGenieQuestions.contains(questionKey) || !pendingGenieQuestions.add(questionKey)) {
return
}
thread(name = "CodexAgentAutoAnswer-${session.sessionId}") {
Log.i(TAG, "Attempting Agent auto-answer for ${session.sessionId}")
runCatching {
val answer = requestGenieAutoAnswer(session, question, events)
answerQuestionWithRetry(manager, session.sessionId, answer)
handledGenieQuestions.add(questionKey)
AgentQuestionNotifier.cancel(this, session.sessionId)
Log.i(TAG, "Auto-answered Genie question for ${session.sessionId}")
}.onFailure { err ->
Log.i(TAG, "Agent auto-answer unavailable for ${session.sessionId}: ${err.message}")
if (isSessionWaitingForUser(manager, session.sessionId)) {
AgentQuestionNotifier.showQuestion(
context = this,
sessionId = session.sessionId,
targetPackage = session.targetPackage,
question = question,
)
}
}
pendingGenieQuestions.remove(questionKey)
}
}
private fun updateQuestionNotification(session: AgentSessionInfo) {
if (session.state != AgentSessionInfo.STATE_WAITING_FOR_USER) {
AgentQuestionNotifier.cancel(this, session.sessionId)
return
}
val manager = agentManager ?: return
val question = manager.getSessionEvents(session.sessionId)
.lastOrNull { event ->
event.type == AgentSessionEvent.TYPE_QUESTION &&
event.message != null &&
!event.message.startsWith(BRIDGE_REQUEST_PREFIX)
}?.message
val question = findVisibleQuestion(manager.getSessionEvents(session.sessionId))
if (question.isNullOrBlank()) {
AgentQuestionNotifier.cancel(this, session.sessionId)
return
}
if (pendingGenieQuestions.contains(genieQuestionKey(session.sessionId, question))) {
return
}
AgentQuestionNotifier.showQuestion(
context = this,
sessionId = session.sessionId,
@@ -192,4 +234,129 @@ class CodexAgentService : AgentService() {
question = question,
)
}
private fun requestGenieAutoAnswer(
session: AgentSessionInfo,
question: String,
events: List<AgentSessionEvent>,
): String {
val runtimeStatus = CodexdLocalClient.waitForRuntimeStatus(this)
if (!runtimeStatus.authenticated) {
throw IOException("codexd is not authenticated")
}
val model = runtimeStatus.effectiveModel ?: throw IOException("codexd effective model unavailable")
val requestBody = JSONObject()
.put("model", model)
.put("store", false)
.put("stream", false)
.put("instructions", AUTO_ANSWER_INSTRUCTIONS)
.put(
"input",
JSONArray().put(
JSONObject()
.put("role", "user")
.put(
"content",
JSONArray().put(
JSONObject()
.put("type", "input_text")
.put("text", buildAutoAnswerPrompt(session, question, events)),
),
),
),
)
.toString()
val response = CodexdLocalClient.waitForResponse(this, "POST", "/v1/responses", requestBody)
if (response.statusCode != 200) {
throw IOException("HTTP ${response.statusCode}: ${response.body}")
}
return parseResponsesOutputText(response.body)
}
private fun buildAutoAnswerPrompt(
session: AgentSessionInfo,
question: String,
events: List<AgentSessionEvent>,
): String {
val recentContext = renderRecentContext(events)
return """
Target package: ${session.targetPackage ?: "unknown"}
Current Genie question: $question
Recent session context:
$recentContext
""".trimIndent()
}
private fun renderRecentContext(events: List<AgentSessionEvent>): String {
val context = events
.filterNot(::isInternalBridgeEvent)
.takeLast(6)
.joinToString("\n") { event ->
"${eventTypeToString(event.type)}: ${event.message ?: ""}"
}
if (context.length <= MAX_AUTO_ANSWER_CONTEXT_CHARS) {
return context.ifBlank { "No prior Genie context." }
}
return context.takeLast(MAX_AUTO_ANSWER_CONTEXT_CHARS)
}
private fun parseResponsesOutputText(body: String): String {
val data = JSONObject(body)
val directOutput = data.optString("output_text")
if (directOutput.isNotBlank()) {
return directOutput
}
val output = data.optJSONArray("output")
?: throw IOException("Responses payload missing output")
val combined = buildString {
for (outputIndex in 0 until output.length()) {
val item = output.optJSONObject(outputIndex) ?: continue
val content = item.optJSONArray("content") ?: continue
for (contentIndex in 0 until content.length()) {
val part = content.optJSONObject(contentIndex) ?: continue
if (part.optString("type") == "output_text") {
append(part.optString("text"))
}
}
}
}
if (combined.isBlank()) {
throw IOException("Responses payload missing output_text content")
}
return combined
}
private fun findVisibleQuestion(events: List<AgentSessionEvent>): String? {
return events.lastOrNull { event ->
event.type == AgentSessionEvent.TYPE_QUESTION &&
!event.message.isNullOrBlank() &&
!isInternalBridgeEvent(event)
}?.message
}
private fun isInternalBridgeEvent(event: AgentSessionEvent): Boolean {
val message = event.message ?: return false
return when (event.type) {
AgentSessionEvent.TYPE_QUESTION -> message.startsWith(BRIDGE_REQUEST_PREFIX)
AgentSessionEvent.TYPE_ANSWER -> message.startsWith(BRIDGE_RESPONSE_PREFIX)
else -> false
}
}
private fun eventTypeToString(type: Int): String {
return when (type) {
AgentSessionEvent.TYPE_TRACE -> "Trace"
AgentSessionEvent.TYPE_QUESTION -> "Question"
AgentSessionEvent.TYPE_RESULT -> "Result"
AgentSessionEvent.TYPE_ERROR -> "Error"
AgentSessionEvent.TYPE_POLICY -> "Policy"
AgentSessionEvent.TYPE_ANSWER -> "Answer"
else -> "Event($type)"
}
}
private fun genieQuestionKey(sessionId: String, question: String): String {
return "$sessionId:$question"
}
}

View File

@@ -20,6 +20,16 @@ object CodexdLocalClient {
val clientCount: Int,
)
data class RuntimeStatus(
val authenticated: Boolean,
val accountEmail: String?,
val clientCount: Int,
val modelProviderId: String,
val configuredModel: String?,
val effectiveModel: String?,
val upstreamBaseUrl: String,
)
fun waitForResponse(
context: Context,
method: String,
@@ -52,6 +62,14 @@ object CodexdLocalClient {
return parseAuthStatus(response.body)
}
fun waitForRuntimeStatus(context: Context): RuntimeStatus {
val response = waitForResponse(context, "GET", "/internal/runtime/status", null)
if (response.statusCode != 200) {
throw IOException("HTTP ${response.statusCode}: ${response.body}")
}
return parseRuntimeStatus(response.body)
}
fun fetchAuthStatus(socketPath: String): AuthStatus? {
return try {
val response = executeRequest(socketPath, "GET", "/internal/auth/status", null)
@@ -124,4 +142,17 @@ object CodexdLocalClient {
clientCount = clientCount,
)
}
private fun parseRuntimeStatus(body: String): RuntimeStatus {
val json = JSONObject(body)
return RuntimeStatus(
authenticated = json.optBoolean("authenticated", false),
accountEmail = if (json.isNull("accountEmail")) null else json.optString("accountEmail"),
clientCount = json.optInt("clientCount", 0),
modelProviderId = json.optString("modelProviderId", "unknown"),
configuredModel = if (json.isNull("configuredModel")) null else json.optString("configuredModel"),
effectiveModel = if (json.isNull("effectiveModel")) null else json.optString("effectiveModel"),
upstreamBaseUrl = json.optString("upstreamBaseUrl", "unknown"),
)
}
}