Host Genie sessions with codex app-server

Package the Android codex binary into the Genie APK and run hosted codex app-server inside Genie sessions, with Android dynamic tools and AgentSDK question bridging replacing the live TOOL/QUESTION/RESULT scaffold.

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Iliyan Malchev
2026-03-19 11:33:15 -07:00
parent 3a5ab674f0
commit f212204fa4
8 changed files with 880 additions and 344 deletions

View File

@@ -22,6 +22,12 @@ val agentPlatformStubSdkZip = providers
val extractedAgentPlatformJar = layout.buildDirectory.file(
"generated/agent-platform/android-agent-platform-stub-sdk.jar"
)
val repoRoot = rootProject.projectDir.parentFile
val codexTargets = mapOf(
"arm64-v8a" to "aarch64-linux-android",
"x86_64" to "x86_64-linux-android",
)
val codexJniDir = layout.buildDirectory.dir("generated/codex-jni")
android {
namespace = "com.openai.codex.genie"
@@ -49,6 +55,10 @@ android {
sourceCompatibility = androidJavaVersion
targetCompatibility = androidJavaVersion
}
packaging {
jniLibs.useLegacyPackaging = true
}
}
val extractAgentPlatformStubSdk = tasks.register<Sync>("extractAgentPlatformStubSdk") {
@@ -65,8 +75,35 @@ val extractAgentPlatformStubSdk = tasks.register<Sync>("extractAgentPlatformStub
into(outputDir)
}
val syncCodexCliJniLibs = tasks.register<Sync>("syncCodexCliJniLibs") {
val outputDir = codexJniDir
into(outputDir)
codexTargets.forEach { (abi, triple) ->
val binary = file("${repoRoot}/codex-rs/target/android/${triple}/release/codex")
from(binary) {
into(abi)
rename { "libcodex.so" }
}
}
doFirst {
codexTargets.forEach { (abi, triple) ->
val binary = file("${repoRoot}/codex-rs/target/android/${triple}/release/codex")
if (!binary.exists()) {
throw GradleException(
"Missing codex binary for ${abi} at ${binary}. Run `just android-build` from the repo root."
)
}
}
}
}
android.sourceSets["main"].jniLibs.srcDir(codexJniDir.get().asFile)
tasks.named("preBuild").configure {
dependsOn(extractAgentPlatformStubSdk)
dependsOn(syncCodexCliJniLibs)
}
dependencies {

View File

@@ -25,10 +25,13 @@ class AndroidGenieToolExecutor(
private const val MAX_UI_XML_CHARS = 8_000
}
fun execute(toolCall: GenieModelTurn.ToolCall): GenieToolObservation {
return when (toolCall.name) {
"android.package.inspect" -> inspectPackage(toolCall.arguments)
"android.intent.launch" -> launchIntent(toolCall.arguments)
fun execute(
toolName: String,
arguments: JSONObject,
): GenieToolObservation {
return when (toolName) {
"android.package.inspect" -> inspectPackage(arguments)
"android.intent.launch" -> launchIntent(arguments)
"android.target.show" -> requestTargetVisibility(
action = "show",
request = callback::requestShowDetachedTarget,
@@ -47,11 +50,11 @@ class AndroidGenieToolExecutor(
)
"android.target.capture_frame" -> captureDetachedTargetFrame()
"android.ui.dump" -> dumpUiHierarchy()
"android.input.tap" -> tap(toolCall.arguments)
"android.input.text" -> inputText(toolCall.arguments)
"android.input.key" -> inputKey(toolCall.arguments)
"android.wait" -> waitFor(toolCall.arguments)
else -> throw IOException("Unknown tool: ${toolCall.name}")
"android.input.tap" -> tap(arguments)
"android.input.text" -> inputText(arguments)
"android.input.key" -> inputKey(arguments)
"android.wait" -> waitFor(arguments)
else -> throw IOException("Unknown tool: $toolName")
}
}

View File

@@ -0,0 +1,681 @@
package com.openai.codex.genie
import android.app.agent.AgentSessionInfo
import android.app.agent.GenieRequest
import android.app.agent.GenieService
import android.content.Context
import android.util.Log
import java.io.BufferedWriter
import java.io.Closeable
import java.io.File
import java.io.IOException
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.LinkedBlockingQueue
import java.util.concurrent.TimeUnit
import java.util.concurrent.atomic.AtomicInteger
import org.json.JSONArray
import org.json.JSONObject
class CodexAppServerHost(
private val context: Context,
private val request: GenieRequest,
private val callback: GenieService.Callback,
private val control: GenieSessionControl,
private val runtimeStatus: CodexAgentBridge.RuntimeStatus,
private val targetAppContext: TargetAppContext?,
) : Closeable {
companion object {
private const val TAG = "CodexAppServerHost"
private const val AGENT_SOCKET_PATH = "@com.openai.codexd.codexd"
private const val REQUEST_TIMEOUT_MS = 30_000L
private const val POLL_TIMEOUT_MS = 250L
}
private val requestIdSequence = AtomicInteger(1)
private val pendingResponses = ConcurrentHashMap<String, LinkedBlockingQueue<JSONObject>>()
private val inboundMessages = LinkedBlockingQueue<JSONObject>()
private val writerLock = Any()
private val streamedAgentMessages = mutableMapOf<String, StringBuilder>()
private lateinit var process: Process
private lateinit var writer: BufferedWriter
private var stdoutThread: Thread? = null
private var stderrThread: Thread? = null
private var finalAgentMessage: String? = null
private var resultPublished = false
fun run() {
startProcess()
initialize()
val threadId = startThread()
startTurn(threadId)
callback.publishTrace(request.sessionId, "Hosted codex app-server thread $threadId for ${request.targetPackage}.")
eventLoop()
}
override fun close() {
stdoutThread?.interrupt()
stderrThread?.interrupt()
synchronized(writerLock) {
runCatching { writer.close() }
}
if (::process.isInitialized) {
process.destroy()
}
control.process = null
}
private fun startProcess() {
val codexHome = File(context.filesDir, "codex-home").apply { mkdirs() }
val processBuilder = ProcessBuilder(
listOf(
CodexBinaryLocator.resolve(context).absolutePath,
"app-server",
"--listen",
"stdio://",
),
)
val env = processBuilder.environment()
env["CODEX_HOME"] = codexHome.absolutePath
env["CODEX_OPENAI_UNIX_SOCKET"] = AGENT_SOCKET_PATH
env["OPENAI_BASE_URL"] = "http://localhost/v1"
env["RUST_LOG"] = "info"
process = processBuilder.start()
control.process = process
writer = process.outputStream.bufferedWriter()
startStdoutPump()
startStderrPump()
}
private fun startStdoutPump() {
stdoutThread = Thread {
process.inputStream.bufferedReader().useLines { lines ->
lines.forEach { line ->
if (line.isBlank()) {
return@forEach
}
val message = runCatching { JSONObject(line) }
.getOrElse { err ->
Log.w(TAG, "Failed to parse codex app-server stdout line", err)
return@forEach
}
routeInbound(message)
}
}
}.also {
it.name = "CodexAppServerStdout-${request.sessionId}"
it.start()
}
}
private fun startStderrPump() {
stderrThread = Thread {
process.errorStream.bufferedReader().useLines { lines ->
lines.forEach { line ->
if (line.isNotBlank()) {
Log.i(TAG, line)
}
}
}
}.also {
it.name = "CodexAppServerStderr-${request.sessionId}"
it.start()
}
}
private fun routeInbound(message: JSONObject) {
if (message.has("id") && !message.has("method")) {
pendingResponses[message.get("id").toString()]?.offer(message)
return
}
inboundMessages.offer(message)
}
private fun initialize() {
request(
method = "initialize",
params = JSONObject()
.put(
"clientInfo",
JSONObject()
.put("name", "android_genie")
.put("title", "Android Genie")
.put("version", "0.1.0"),
)
.put(
"capabilities",
JSONObject().put("experimentalApi", true),
),
)
notify("initialized", JSONObject())
}
private fun startThread(): String {
val result = request(
method = "thread/start",
params = JSONObject()
.put("model", runtimeStatus.effectiveModel)
.put("approvalPolicy", "never")
.put("sandbox", "read-only")
.put("ephemeral", true)
.put("cwd", context.filesDir.absolutePath)
.put("serviceName", "android_genie")
.put("baseInstructions", buildBaseInstructions())
.put("dynamicTools", buildDynamicToolSpecs()),
)
return result.getJSONObject("thread").getString("id")
}
private fun startTurn(threadId: String) {
request(
method = "turn/start",
params = JSONObject()
.put("threadId", threadId)
.put(
"input",
JSONArray().put(
JSONObject()
.put("type", "text")
.put("text", buildDelegatedPrompt()),
),
),
)
}
private fun eventLoop() {
while (!control.cancelled) {
val message = inboundMessages.poll(POLL_TIMEOUT_MS, TimeUnit.MILLISECONDS)
if (message == null) {
if (!process.isAlive) {
throw IOException("codex app-server exited with code ${process.exitValue()}")
}
continue
}
if (message.has("method") && message.has("id")) {
handleServerRequest(message)
continue
}
if (message.has("method") && handleNotification(message)) {
return
}
}
throw IOException("Cancelled")
}
private fun handleServerRequest(message: JSONObject) {
val method = message.getString("method")
val requestId = message.get("id")
val params = message.optJSONObject("params") ?: JSONObject()
when (method) {
"item/tool/call" -> handleDynamicToolCall(requestId, params)
"item/tool/requestUserInput" -> handleRequestUserInput(requestId, params)
else -> {
callback.publishTrace(request.sessionId, "Unsupported codex app-server request: $method")
sendError(
requestId = requestId,
code = -32601,
message = "Unsupported app-server request: $method",
)
}
}
}
private fun handleDynamicToolCall(
requestId: Any,
params: JSONObject,
) {
val toolName = params.optString("tool").trim()
val arguments = params.optJSONObject("arguments") ?: JSONObject()
val toolExecutor = AndroidGenieToolExecutor(
context = context,
callback = callback,
sessionId = request.sessionId,
defaultTargetPackage = request.targetPackage,
)
val observation = runCatching {
toolExecutor.execute(toolName, arguments)
}.getOrElse { err ->
GenieToolObservation(
name = toolName.ifBlank { "unknown" },
summary = "Tool $toolName failed: ${err.message}",
promptDetails = "Tool $toolName failed.\nError: ${err.message ?: err::class.java.simpleName}",
)
}
callback.publishTrace(request.sessionId, observation.summary)
sendResult(
requestId = requestId,
result = JSONObject()
.put("success", !observation.summary.contains(" failed:"))
.put("contentItems", buildDynamicToolContentItems(observation)),
)
}
private fun handleRequestUserInput(
requestId: Any,
params: JSONObject,
) {
val questions = params.optJSONArray("questions") ?: JSONArray()
val renderedQuestion = renderAgentQuestion(questions)
callback.publishQuestion(request.sessionId, renderedQuestion)
callback.updateState(request.sessionId, AgentSessionInfo.STATE_WAITING_FOR_USER)
val answer = control.waitForUserResponse()
callback.updateState(request.sessionId, AgentSessionInfo.STATE_RUNNING)
callback.publishTrace(request.sessionId, "Received Agent answer for ${request.targetPackage}.")
sendResult(
requestId = requestId,
result = JSONObject().put("answers", buildQuestionAnswers(questions, answer)),
)
}
private fun handleNotification(message: JSONObject): Boolean {
val method = message.getString("method")
val params = message.optJSONObject("params") ?: JSONObject()
return when (method) {
"turn/started" -> {
callback.publishTrace(request.sessionId, "codex turn started for ${request.targetPackage}.")
false
}
"item/agentMessage/delta" -> {
val itemId = params.optString("itemId")
if (itemId.isNotBlank()) {
streamedAgentMessages.getOrPut(itemId, ::StringBuilder)
.append(params.optString("delta"))
}
false
}
"item/started" -> {
publishItemStartedTrace(params.optJSONObject("item"))
false
}
"item/completed" -> {
captureCompletedItem(params.optJSONObject("item"))
false
}
"turn/completed" -> {
finishTurn(params)
true
}
else -> false
}
}
private fun publishItemStartedTrace(item: JSONObject?) {
if (item == null) {
return
}
when (item.optString("type")) {
"dynamicToolCall" -> {
val tool = item.optString("tool")
callback.publishTrace(request.sessionId, "Codex requested dynamic tool $tool.")
}
"commandExecution" -> {
val command = item.optJSONArray("command")?.join(" ") ?: "command"
callback.publishTrace(request.sessionId, "Codex started command execution: $command")
}
}
}
private fun captureCompletedItem(item: JSONObject?) {
if (item == null) {
return
}
when (item.optString("type")) {
"agentMessage" -> {
val itemId = item.optString("id")
val text = item.optString("text").ifBlank {
streamedAgentMessages[itemId]?.toString().orEmpty()
}
if (text.isNotBlank()) {
finalAgentMessage = text
}
}
"commandExecution" -> {
val status = item.optString("status")
val exitCode = if (item.has("exitCode")) item.opt("exitCode") else null
callback.publishTrace(
request.sessionId,
"Command execution completed with status=$status exitCode=${exitCode ?: "unknown"}.",
)
}
"dynamicToolCall" -> {
val tool = item.optString("tool")
val status = item.optString("status")
callback.publishTrace(request.sessionId, "Dynamic tool $tool completed with status=$status.")
}
}
}
private fun finishTurn(params: JSONObject) {
val turn = params.optJSONObject("turn") ?: JSONObject()
when (turn.optString("status")) {
"completed" -> {
val resultText = finalAgentMessage?.takeIf(String::isNotBlank)
?: "Genie completed without a final assistant message."
publishResultOnce(resultText)
callback.updateState(request.sessionId, AgentSessionInfo.STATE_COMPLETED)
}
"interrupted" -> {
callback.publishError(request.sessionId, "Genie turn interrupted")
callback.updateState(request.sessionId, AgentSessionInfo.STATE_CANCELLED)
}
else -> {
val errorDetail = turn.opt("error")?.toString()
?: "Genie turn failed with status ${turn.optString("status", "unknown")}"
callback.publishError(request.sessionId, errorDetail)
callback.updateState(request.sessionId, AgentSessionInfo.STATE_FAILED)
}
}
}
private fun publishResultOnce(text: String) {
if (resultPublished) {
return
}
resultPublished = true
callback.publishResult(request.sessionId, text)
}
private fun request(
method: String,
params: JSONObject,
): JSONObject {
val requestId = requestIdSequence.getAndIncrement().toString()
val responseQueue = LinkedBlockingQueue<JSONObject>(1)
pendingResponses[requestId] = responseQueue
try {
sendMessage(
JSONObject()
.put("id", requestId)
.put("method", method)
.put("params", params),
)
val response = responseQueue.poll(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS)
?: throw IOException("Timed out waiting for $method response")
val error = response.optJSONObject("error")
if (error != null) {
throw IOException("$method failed: ${error.optString("message", error.toString())}")
}
return response.optJSONObject("result") ?: JSONObject()
} finally {
pendingResponses.remove(requestId)
}
}
private fun notify(
method: String,
params: JSONObject,
) {
sendMessage(
JSONObject()
.put("method", method)
.put("params", params),
)
}
private fun sendResult(
requestId: Any,
result: JSONObject,
) {
sendMessage(
JSONObject()
.put("id", requestId)
.put("result", result),
)
}
private fun sendError(
requestId: Any,
code: Int,
message: String,
) {
sendMessage(
JSONObject()
.put("id", requestId)
.put(
"error",
JSONObject()
.put("code", code)
.put("message", message),
),
)
}
private fun sendMessage(message: JSONObject) {
synchronized(writerLock) {
writer.write(message.toString())
writer.newLine()
writer.flush()
}
}
private fun buildBaseInstructions(): String {
return """
You are Codex acting as a child Android Genie bound to ${request.targetPackage}.
The user interacts only with the supervising Agent.
Decide your own local plan and choose tools yourself.
Prefer the Android dynamic tools for observing and driving the target app.
If you need clarification or a decision from the supervising Agent, call request_user_input with concise free-form question text.
Do not use hidden control protocols.
Finish with a normal assistant message describing what you accomplished or what blocked you.
Detached target mode allowed: ${request.isDetachedModeAllowed}.
Agent-owned runtime provider: ${runtimeStatus.modelProviderId}.
""".trimIndent()
}
private fun buildDelegatedPrompt(): String {
val targetSection = targetAppContext?.renderPromptSection()
?: "Target app inspection:\n- unavailable"
return """
Delegated objective:
${request.prompt}
$targetSection
""".trimIndent()
}
private fun buildDynamicToolSpecs(): JSONArray {
return JSONArray()
.put(
dynamicToolSpec(
name = "android.package.inspect",
description = "Inspect package metadata for the paired Android target app.",
inputSchema = objectSchema(
properties = mapOf(
"packageName" to stringSchema("Optional package name override."),
),
),
),
)
.put(
dynamicToolSpec(
name = "android.intent.launch",
description = "Launch the target app or an explicit target activity/intent.",
inputSchema = objectSchema(
properties = mapOf(
"packageName" to stringSchema("Optional package name override."),
"action" to stringSchema("Optional Android intent action."),
"component" to stringSchema("Optional flattened component name."),
),
),
),
)
.put(dynamicToolSpec("android.target.show", "Show the detached target window.", emptyObjectSchema()))
.put(dynamicToolSpec("android.target.hide", "Hide the detached target window.", emptyObjectSchema()))
.put(dynamicToolSpec("android.target.attach", "Reattach the detached target back to the main display.", emptyObjectSchema()))
.put(dynamicToolSpec("android.target.close", "Close the detached target window.", emptyObjectSchema()))
.put(dynamicToolSpec("android.target.capture_frame", "Capture the detached target window as an image.", emptyObjectSchema()))
.put(dynamicToolSpec("android.ui.dump", "Dump the current UI hierarchy via uiautomator.", emptyObjectSchema()))
.put(
dynamicToolSpec(
name = "android.input.tap",
description = "Inject a tap at absolute screen coordinates.",
inputSchema = objectSchema(
properties = mapOf(
"x" to numberSchema("Absolute X coordinate."),
"y" to numberSchema("Absolute Y coordinate."),
),
required = listOf("x", "y"),
),
),
)
.put(
dynamicToolSpec(
name = "android.input.text",
description = "Inject text into the focused field.",
inputSchema = objectSchema(
properties = mapOf(
"text" to stringSchema("Text to type."),
),
required = listOf("text"),
),
),
)
.put(
dynamicToolSpec(
name = "android.input.key",
description = "Inject an Android keyevent by name or keycode token.",
inputSchema = objectSchema(
properties = mapOf(
"key" to stringSchema("Android keyevent token, for example ENTER or BACK."),
),
required = listOf("key"),
),
),
)
.put(
dynamicToolSpec(
name = "android.wait",
description = "Pause briefly to let the UI settle.",
inputSchema = objectSchema(
properties = mapOf(
"millis" to numberSchema("Milliseconds to sleep (1-10000)."),
),
required = listOf("millis"),
),
),
)
}
private fun dynamicToolSpec(
name: String,
description: String,
inputSchema: JSONObject,
): JSONObject {
return JSONObject()
.put("name", name)
.put("description", description)
.put("inputSchema", inputSchema)
}
private fun emptyObjectSchema(): JSONObject {
return objectSchema(emptyMap())
}
private fun objectSchema(
properties: Map<String, JSONObject>,
required: List<String> = emptyList(),
): JSONObject {
val propertiesJson = JSONObject()
properties.forEach { (name, schema) -> propertiesJson.put(name, schema) }
return JSONObject()
.put("type", "object")
.put("properties", propertiesJson)
.put("required", JSONArray(required))
.put("additionalProperties", false)
}
private fun stringSchema(description: String): JSONObject {
return JSONObject()
.put("type", "string")
.put("description", description)
}
private fun numberSchema(description: String): JSONObject {
return JSONObject()
.put("type", "number")
.put("description", description)
}
private fun buildDynamicToolContentItems(observation: GenieToolObservation): JSONArray {
val items = JSONArray().put(
JSONObject()
.put("type", "inputText")
.put("text", observation.promptDetails),
)
observation.imageDataUrls.forEach { imageUrl ->
items.put(
JSONObject()
.put("type", "inputImage")
.put("imageUrl", imageUrl),
)
}
return items
}
private fun renderAgentQuestion(questions: JSONArray): String {
if (questions.length() == 0) {
return "Genie requested input but did not provide a question."
}
val rendered = buildString {
for (index in 0 until questions.length()) {
val question = questions.optJSONObject(index) ?: continue
if (length > 0) {
append("\n\n")
}
val header = question.optString("header").takeIf(String::isNotBlank)
if (header != null) {
append(header)
append(":\n")
}
append(question.optString("question"))
val options = question.optJSONArray("options")
if (options != null && options.length() > 0) {
append("\nOptions:")
for (optionIndex in 0 until options.length()) {
val option = options.optJSONObject(optionIndex) ?: continue
append("\n- ")
append(option.optString("label"))
val description = option.optString("description")
if (description.isNotBlank()) {
append(": ")
append(description)
}
}
}
}
}
return if (questions.length() == 1) {
rendered
} else {
"$rendered\n\nReply with one answer per question, separated by a blank line."
}
}
private fun buildQuestionAnswers(
questions: JSONArray,
answer: String,
): JSONObject {
val splitAnswers = answer
.split(Regex("\\n\\s*\\n"))
.map(String::trim)
.filter(String::isNotEmpty)
val answersJson = JSONObject()
for (index in 0 until questions.length()) {
val question = questions.optJSONObject(index) ?: continue
val questionId = question.optString("id")
if (questionId.isBlank()) {
continue
}
val responseText = splitAnswers.getOrNull(index)
?: if (index == 0) answer.trim() else ""
answersJson.put(
questionId,
JSONObject().put(
"answers",
JSONArray().put(responseText),
),
)
}
return answersJson
}
}

View File

@@ -0,0 +1,15 @@
package com.openai.codex.genie
import android.content.Context
import java.io.File
import java.io.IOException
object CodexBinaryLocator {
fun resolve(context: Context): File {
val binary = File(context.applicationInfo.nativeLibraryDir, "libcodex.so")
if (!binary.exists()) {
throw IOException("codex binary missing at ${binary.absolutePath}")
}
return binary
}
}

View File

@@ -5,47 +5,17 @@ import android.app.agent.GenieRequest
import android.app.agent.GenieService
import android.util.Log
import java.io.IOException
import java.util.ArrayDeque
import java.util.concurrent.ConcurrentHashMap
import java.util.concurrent.LinkedBlockingQueue
import java.util.concurrent.TimeUnit
class CodexGenieService : GenieService() {
companion object {
private const val TAG = "CodexGenieService"
private const val MAX_MODEL_TURNS = 12
private const val MAX_OBJECTIVE_PROMPT_CHARS = 240
private const val MAX_AGENT_ANSWER_CHARS = 120
private const val MAX_TOOL_OBSERVATIONS = 6
private val GENIE_RESPONSE_INSTRUCTIONS =
"""
You are Codex acting as an Android Genie.
Reply with exactly one line that starts with TOOL:, QUESTION:, or RESULT:.
Use TOOL: with a single JSON object on the same line, for example:
TOOL: {"name":"android.intent.launch","arguments":{"packageName":"com.android.deskclock"}}
Available tools:
- android.package.inspect {packageName?}
- android.intent.launch {packageName?, action?, component?}
- android.target.show {}
- android.target.hide {}
- android.target.attach {}
- android.target.close {}
- android.target.capture_frame {}
- android.ui.dump {}
- android.input.tap {x, y}
- android.input.text {text}
- android.input.key {key}
- android.wait {millis}
Use QUESTION: only when you need another free-form answer from the Agent.
Use RESULT: when you are ready to report the next concrete step or final outcome.
Do not emit markdown or extra lines.
""".trimIndent()
}
private val sessionControls = ConcurrentHashMap<String, SessionControl>()
private val sessionControls = ConcurrentHashMap<String, GenieSessionControl>()
override fun onStartGenieSession(request: GenieRequest, callback: Callback) {
val control = SessionControl()
val control = GenieSessionControl()
sessionControls[request.sessionId] = control
Thread {
runSession(request, callback, control)
@@ -56,16 +26,20 @@ class CodexGenieService : GenieService() {
}
override fun onCancelGenieSession(sessionId: String) {
sessionControls.remove(sessionId)?.cancelled = true
sessionControls.remove(sessionId)?.cancel()
Log.i(TAG, "Cancelled session $sessionId")
}
override fun onUserResponse(sessionId: String, response: String) {
sessionControls[sessionId]?.userResponses?.offer(response)
Log.i(TAG, "Received user response for $sessionId")
Log.i(TAG, "Received Agent response for $sessionId")
}
private fun runSession(request: GenieRequest, callback: Callback, control: SessionControl) {
private fun runSession(
request: GenieRequest,
callback: Callback,
control: GenieSessionControl,
) {
val sessionId = request.sessionId
try {
callback.updateState(sessionId, AgentSessionInfo.STATE_RUNNING)
@@ -75,8 +49,9 @@ class CodexGenieService : GenieService() {
)
callback.publishTrace(
sessionId,
"Genie is headless, routes control/data traffic through the Agent-owned Binder bridge, and uses structured Android tools locally.",
"Genie is headless. It hosts codex app-server locally, routes model traffic through the Agent-owned codexd socket, and exposes Android tooling as dynamic tools.",
)
val targetAppContext = runCatching { TargetAppInspector.inspect(this, request.targetPackage) }
targetAppContext.onSuccess { targetApp ->
callback.publishTrace(
@@ -91,275 +66,56 @@ class CodexGenieService : GenieService() {
)
}
if (request.isDetachedModeAllowed) {
callback.requestLaunchDetachedTargetHidden(sessionId)
callback.publishTrace(sessionId, "Requested detached target launch for ${request.targetPackage}.")
}
AgentBridgeClient(this).use { bridgeClient ->
val toolExecutor = AndroidGenieToolExecutor(
context = this,
callback = callback,
sessionId = sessionId,
defaultTargetPackage = request.targetPackage,
)
val runtimeStatus = runCatching { bridgeClient.getRuntimeStatus() }
runtimeStatus.onSuccess { status ->
val accountSuffix = status.accountEmail?.let { " (${it})" } ?: ""
callback.publishTrace(
sessionId,
"Reached Agent Binder bridge; authenticated=${status.authenticated}${accountSuffix}, provider=${status.modelProviderId}, model=${status.effectiveModel ?: "unknown"}, clients=${status.clientCount}.",
)
}
runtimeStatus.onFailure { err ->
callback.publishTrace(
sessionId,
"Agent Binder bridge probe failed: ${err.message}",
)
}
if (request.isDetachedModeAllowed) {
callback.requestLaunchDetachedTargetHidden(sessionId)
callback.publishTrace(sessionId, "Requested detached target launch for ${request.targetPackage}.")
}
callback.publishQuestion(
val runtimeStatus = bridgeClient.getRuntimeStatus()
val accountSuffix = runtimeStatus.accountEmail?.let { " ($it)" } ?: ""
callback.publishTrace(
sessionId,
buildAgentQuestion(
request = request,
targetAppContext = targetAppContext.getOrNull(),
),
"Reached Agent Binder bridge; authenticated=${runtimeStatus.authenticated}${accountSuffix}, provider=${runtimeStatus.modelProviderId}, model=${runtimeStatus.effectiveModel ?: "unknown"}, clients=${runtimeStatus.clientCount}.",
)
callback.updateState(sessionId, AgentSessionInfo.STATE_WAITING_FOR_USER)
if (control.cancelled) {
callback.publishError(sessionId, "Cancelled")
callback.updateState(sessionId, AgentSessionInfo.STATE_CANCELLED)
if (!runtimeStatus.authenticated || runtimeStatus.effectiveModel.isNullOrBlank()) {
callback.publishResult(
sessionId,
"Reached the Agent bridge, but the Agent runtime was not authenticated or did not expose an effective model for ${request.targetPackage}.",
)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
var runtime = runtimeStatus.getOrNull()
val toolObservations = ArrayDeque<GenieToolObservation>()
var answer = waitForAgentAnswer(
sessionId = sessionId,
CodexAppServerHost(
context = this,
request = request,
callback = callback,
control = control,
)
Log.i(TAG, "Received Agent answer for $sessionId")
callback.publishTrace(sessionId, "Received Agent answer: $answer")
repeat(MAX_MODEL_TURNS) {
if (control.cancelled) {
return@repeat
}
if (runtime == null || !runtime.authenticated || runtime.effectiveModel.isNullOrBlank()) {
runtime = runCatching { bridgeClient.getRuntimeStatus() }
.onFailure { err ->
callback.publishTrace(
sessionId,
"Agent Binder runtime refresh failed: ${err.message}",
)
}
.getOrNull()
}
if (runtime == null) {
callback.publishResult(
sessionId,
"Reached the Agent bridge, but runtime status was unavailable. Replace this scaffold with a real Codex-driven Genie executor.",
)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
if (!runtime.authenticated || runtime.effectiveModel.isNullOrBlank()) {
callback.publishResult(
sessionId,
"Reached the Agent bridge, but the Agent runtime was not authenticated or did not expose an effective model for ${request.targetPackage}.",
)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
val activeRuntime = requireNotNull(runtime)
callback.publishTrace(
sessionId,
"Requesting a streaming /v1/responses call through the Agent using ${activeRuntime.effectiveModel}.",
)
val modelResponse = runCatching {
requestModelNextStep(
request = request,
answer = answer,
runtimeStatus = activeRuntime,
targetAppContext = targetAppContext.getOrNull(),
toolObservations = toolObservations.toList(),
bridgeClient = bridgeClient,
)
}
if (modelResponse.isFailure) {
callback.publishTrace(
sessionId,
"Agent-mediated /v1/responses request failed: ${modelResponse.exceptionOrNull()?.message}",
)
callback.publishResult(
sessionId,
"Reached the Agent bridge for ${request.targetPackage}, but the proxied model request failed. Replace this scaffold with a real Codex-driven Genie executor.",
)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
when (val turn = GenieModelTurnParser.parse(modelResponse.getOrThrow())) {
is GenieModelTurn.Result -> {
Log.i(TAG, "Publishing Genie result for $sessionId")
callback.publishResult(sessionId, turn.text)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
is GenieModelTurn.Question -> {
Log.i(TAG, "Publishing Genie follow-up question for $sessionId")
callback.publishTrace(sessionId, "Genie follow-up question: ${turn.text}")
callback.publishQuestion(sessionId, turn.text)
callback.updateState(sessionId, AgentSessionInfo.STATE_WAITING_FOR_USER)
answer = waitForAgentAnswer(
sessionId = sessionId,
callback = callback,
control = control,
)
Log.i(TAG, "Received follow-up Agent answer for $sessionId")
callback.publishTrace(sessionId, "Received Agent answer: $answer")
}
is GenieModelTurn.ToolCall -> {
val observation = runCatching {
toolExecutor.execute(turn)
}.getOrElse { err ->
GenieToolObservation(
name = turn.name,
summary = "Tool ${turn.name} failed: ${err.message}",
promptDetails = "Tool ${turn.name} failed.\nError: ${err.message ?: err::class.java.simpleName}",
)
}
rememberToolObservation(toolObservations, observation)
callback.publishTrace(sessionId, observation.summary)
}
}
runtimeStatus = runtimeStatus,
targetAppContext = targetAppContext.getOrNull(),
).use { host ->
host.run()
}
callback.publishResult(
sessionId,
"Genie stopped after reaching the current tool/model turn limit. Continue the session with more guidance or increase the loop budget in code.",
)
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
return
}
} catch (err: InterruptedException) {
Thread.currentThread().interrupt()
callback.publishError(sessionId, "Interrupted: ${err.message}")
callback.updateState(sessionId, AgentSessionInfo.STATE_FAILED)
} catch (err: IOException) {
if (control.cancelled) {
callback.publishError(sessionId, "Cancelled")
callback.updateState(sessionId, AgentSessionInfo.STATE_CANCELLED)
} else {
callback.publishError(sessionId, err.message ?: err::class.java.simpleName)
callback.updateState(sessionId, AgentSessionInfo.STATE_FAILED)
}
} catch (err: RuntimeException) {
callback.publishError(sessionId, "${err::class.java.simpleName}: ${err.message}")
callback.updateState(sessionId, AgentSessionInfo.STATE_FAILED)
} finally {
sessionControls.remove(sessionId)
control.cancel()
}
}
private fun requestModelNextStep(
request: GenieRequest,
answer: String,
runtimeStatus: CodexAgentBridge.RuntimeStatus,
targetAppContext: TargetAppContext?,
toolObservations: List<GenieToolObservation>,
bridgeClient: AgentBridgeClient,
): String {
val model = checkNotNull(runtimeStatus.effectiveModel) { "missing effective model" }
val recentImageInputs = toolObservations
.flatMap(GenieToolObservation::imageDataUrls)
.takeLast(1)
val response = bridgeClient.sendHttpRequest(
method = "POST",
path = "/v1/responses",
body = CodexAgentBridge.buildResponsesRequest(
model = model,
instructions = GENIE_RESPONSE_INSTRUCTIONS,
prompt = buildModelPrompt(
request = request,
answer = answer,
targetAppContext = targetAppContext,
toolObservations = toolObservations,
),
imageDataUrls = recentImageInputs,
).toString(),
)
return CodexAgentBridge.parseResponsesOutputText(response)
}
private fun waitForAgentAnswer(
sessionId: String,
callback: Callback,
control: SessionControl,
): String {
val answer = waitForUserResponse(control)
callback.updateState(sessionId, AgentSessionInfo.STATE_RUNNING)
return answer
}
private fun waitForUserResponse(control: SessionControl): String {
while (!control.cancelled) {
val response = control.userResponses.poll(100, TimeUnit.MILLISECONDS)
if (response != null) {
return response
}
}
throw IOException("Cancelled while waiting for user response")
}
private fun buildModelPrompt(
request: GenieRequest,
answer: String,
targetAppContext: TargetAppContext?,
toolObservations: List<GenieToolObservation>,
): String {
val objective = abbreviate(request.prompt, MAX_OBJECTIVE_PROMPT_CHARS)
val agentAnswer = abbreviate(answer, MAX_AGENT_ANSWER_CHARS)
val targetSummary = targetAppContext?.renderPromptSection()
?: "Target app inspection:\n- unavailable"
val toolSummary = toolObservations.joinToString(separator = "\n\n") { it.renderForPrompt() }
.ifBlank { "No tool observations yet." }
return """
You are Codex acting as an Android Genie for the target package ${request.targetPackage}.
Original objective: $objective
The Agent answered your latest question with: $agentAnswer
$targetSummary
Recent tool observations:
$toolSummary
Emit exactly one line starting with TOOL:, QUESTION:, or RESULT:.
""".trimIndent()
}
private fun buildAgentQuestion(
request: GenieRequest,
targetAppContext: TargetAppContext?,
): String {
val displayName = targetAppContext?.displayName() ?: request.targetPackage
return "Codex Genie is ready to drive $displayName. Reply with any extra constraints or answer 'continue' to let Genie proceed."
}
private fun abbreviate(value: String, maxChars: Int): String {
if (value.length <= maxChars) {
return value
}
return value.take(maxChars - 1) + ""
}
private fun rememberToolObservation(
toolObservations: ArrayDeque<GenieToolObservation>,
observation: GenieToolObservation,
) {
toolObservations.addLast(observation)
while (toolObservations.size > MAX_TOOL_OBSERVATIONS) {
toolObservations.removeFirst()
}
}
private class SessionControl {
@Volatile var cancelled = false
val userResponses = LinkedBlockingQueue<String>()
}
}

View File

@@ -0,0 +1,31 @@
package com.openai.codex.genie
import java.io.IOException
import java.util.concurrent.LinkedBlockingQueue
import java.util.concurrent.TimeUnit
class GenieSessionControl {
@Volatile
var cancelled = false
@Volatile
var process: Process? = null
val userResponses = LinkedBlockingQueue<String>()
fun cancel() {
cancelled = true
process?.destroy()
process = null
}
fun waitForUserResponse(): String {
while (!cancelled) {
val response = userResponses.poll(100, TimeUnit.MILLISECONDS)
if (response != null) {
return response
}
}
throw IOException("Cancelled while waiting for Agent response")
}
}

View File

@@ -6,7 +6,7 @@ This document tracks the Android Agent Platform refactor that moves Codex from a
single `codexd` foreground-service wrapper to a framework-native Agent/Genie
architecture.
The current repo now contains the first implementation slice:
The current repo now contains these implementation slices:
- `android/app` is a buildable **Agent** app scaffold.
- `android/genie` is a buildable **Genie** app scaffold.
@@ -14,33 +14,32 @@ The current repo now contains the first implementation slice:
- register `AgentService`
- plan target packages for a user objective
- launch direct parent + child framework sessions
- start a Genie session for a target package
- start one Genie session per selected target package
- display framework session state and event timelines
- answer waiting Genie questions
- attach detached targets
- The Genie app currently validates framework lifecycle, detached-target
requests, question flow, and result publication with a placeholder executor.
- The first internal Agent<->Genie control plane now uses an exported
- The Genie app now hosts a real `codex app-server` subprocess, packaged inside
the Genie APK as `libcodex.so`.
- The first internal Agent<->Genie control plane uses an exported
**Binder/AIDL service** in the Agent app, not framework question/answer
events.
- The current Binder bridge exposes small fixed-form calls, and the Genie
runtime already uses it to fetch Agent-owned runtime metadata from the
embedded `codexd`, including auth status and the effective model/provider.
- The Genie runtime now also inspects the paired target package from inside the
- The Genie runtime inspects the paired target package from inside the
target-app sandbox and feeds package metadata plus launcher intent details
into the bridged model prompt.
- The Genie scaffold now issues one real **streaming `/v1/responses`** request
through that bridge after the user answer, proving that model traffic can
stay Agent-owned even while the Genie runs inside the target-app sandbox.
- The Genie runtime now supports a first generic structured tool loop with
reusable Android capabilities instead of app-specific hardcoded behavior.
- Non-bridge Genie questions now surface through an Agent-owned notification,
which gives the Agent a concrete user-escalation path without making the
Genie the user-facing surface.
- The Agent now also attempts to answer non-bridge Genie questions through the
embedded `codexd` runtime before falling back to that notification path.
- Runtime testing on the emulator now shows that the exported Agent Binder
service is reachable from Genie execution for the current control-plane calls.
into the delegated Codex prompt.
- The hosted `codex app-server` process routes model traffic through the
Agent-owned `codexd` abstract Unix socket, keeping network/auth Agent-owned
even while the Genie runs inside the target-app sandbox.
- The Genie runtime exposes reusable Android capabilities to Codex as
**dynamic tools**, not via a custom `TOOL:` text protocol.
- Non-bridge Genie questions surface through AgentSDK question flow by mapping
`request_user_input` back into Agent-managed questions and answers.
- The Agent also attempts to answer Genie questions through the embedded
`codexd` runtime before falling back to notification/UI escalation.
- Runtime testing on the emulator shows that the exported Agent Binder service
is reachable from Genie execution for the current bootstrap calls.
The Rust `codexd` service/client split remains in place and is still the
existing network/auth bridge while this refactor proceeds.
@@ -71,6 +70,7 @@ existing network/auth bridge while this refactor proceeds.
- Internal Agent<->Genie coordination now splits into:
- Binder/AIDL for fixed-form control/data RPC
- AgentSDK session events for free-form product dialogue
- hosted `codex app-server` inside Genie for the actual Codex execution loop
## Runtime Model
@@ -98,12 +98,12 @@ existing network/auth bridge while this refactor proceeds.
- driving the paired target app
- publishing trace, question, result, and error events
- requesting detached target actions when appropriate
- The current implementation is a placeholder executor. It proves:
- framework start/cancel behavior
- question/answer flow
- detached-target requests
- result publication
- Agent-mediated Binder bridge requests across the Agent/Genie boundary
- The current implementation hosts `codex app-server` inside the Genie sandbox.
- Kotlin is now only the host/bridge layer for:
- framework lifecycle and result publication
- Android dynamic tool execution
- Agent escalation via `request_user_input`
- runtime bootstrap from the Agent-owned Binder bridge
## First Milestone Scope
@@ -120,14 +120,12 @@ existing network/auth bridge while this refactor proceeds.
- Question answering and detached-target attach controls
- Exported Binder bridge request handling in `CodexAgentBridgeService`
- Binder bridge request issuance in `CodexGenieService`
- Generic small HTTP request/response envelopes over the Binder bridge, with
the Genie using the real `codexd` HTTP response bodies
- Agent-owned `/internal/runtime/status` metadata for Genie bootstrap
- Target-app package metadata and launcher-intent inspection from the Genie
sandbox, with that context included in the bridged model prompt
- One real streaming proxied `/v1/responses` request from Genie through the
Agent-owned bridge after the user answer
- A generic structured tool loop inside Genie with:
sandbox, with that context included in the delegated Codex prompt
- Hosted `codex app-server` inside Genie, with model traffic routed through the
Agent-owned `codexd` abstract socket
- Android dynamic tools registered on the Genie Codex thread with:
- `android.package.inspect`
- `android.intent.launch`
- detached target show/hide/attach/close
@@ -135,28 +133,31 @@ existing network/auth bridge while this refactor proceeds.
- UI hierarchy dump
- shell-backed input injection helpers (`tap`, `text`, `key`)
- bounded waits
- Agent-owned question notifications for non-bridge Genie questions
- Agent-mediated free-form answers for non-bridge Genie questions, using the
current embedded `codexd` runtime as the temporary answer engine
- `request_user_input` bridged from hosted Codex back into AgentSDK questions
- Agent-owned question notifications for Genie questions that need user input
- Agent-mediated free-form answers for Genie questions, using the current
embedded `codexd` runtime as the temporary answer engine
- Abstract-unix-socket support in the legacy Rust bridge via `@name` or
`abstract:name`, so the compatibility transport can move off app-private
filesystem sockets when Agent<->Genie traffic is introduced
### Not done yet
- Replacing the placeholder Genie executor with a real Codex runtime
- Moving network/auth mediation from `codexd` into the Agent runtime
- Expanding the Binder control plane beyond the current fixed-form runtime-status
and proxied-HTTP calls
- Wiring Android-native target-driving tools into the Genie runtime
- Expanding the Binder control plane beyond the current fixed-form runtime
bootstrap/status calls
- Making the Agent the default product surface instead of the legacy service app
- Replacing the remaining Agent-side use of embedded `codexd` for planning and
auto-answering with a first-class Agent runtime
- Adding more Android-native tool surfaces and richer observation types to the
hosted Genie runtime
## Current Code Layout
- `android/app`
- Agent scaffold and legacy `codexd` wrapper live together for now
- `android/genie`
- standalone Genie scaffold APK
- standalone Genie scaffold APK with hosted `codex app-server`
- `android/app/src/main/java/com/openai/codexd/CodexAgentService.kt`
- framework `AgentService`
- `android/app/src/main/java/com/openai/codexd/AgentSessionController.kt`
@@ -164,13 +165,14 @@ existing network/auth bridge while this refactor proceeds.
- `android/app/src/main/java/com/openai/codexd/MainActivity.kt`
- Agent session UI plus existing `codexd` bridge controls
- `android/genie/src/main/java/com/openai/codex/genie/CodexGenieService.kt`
- placeholder Genie executor
- Genie lifecycle host for the embedded `codex app-server`
- `android/genie/src/main/java/com/openai/codex/genie/CodexAppServerHost.kt`
- stdio JSON-RPC host for `codex app-server`, dynamic tools, and
`request_user_input` bridging
- `android/app/src/main/java/com/openai/codexd/CodexAgentBridgeService.kt`
- exported Binder/AIDL bridge for Genie control-plane calls
- `android/genie/src/main/java/com/openai/codex/genie/AgentBridgeClient.kt`
- Genie-side Binder client for the Agent bridge service
- `android/genie/src/main/java/com/openai/codex/genie/CodexAgentBridge.kt`
- Genie-side request/response helpers for bridged model traffic
- `android/app/src/main/java/com/openai/codexd/CodexdLocalClient.kt`
- Agent-local client for the embedded `codexd` bridge
@@ -182,6 +184,13 @@ Set the Agent Platform stub SDK zip path:
export ANDROID_AGENT_PLATFORM_STUB_SDK_ZIP=/path/to/android-agent-platform-stub-sdk.zip
```
Build both Android binaries first:
```bash
just android-build
just android-service-build
```
Build both Android apps:
```bash
@@ -189,13 +198,14 @@ cd android
./gradlew :genie:assembleDebug :app:assembleDebug
```
The Agent app still depends on `just android-service-build` for the packaged
`codexd` JNI binaries.
The Agent app depends on `just android-service-build` for the packaged
`codexd` JNI binaries. The Genie app now also depends on `just android-build`
for the packaged `codex` JNI binaries.
## Next Implementation Steps
1. Move the placeholder Genie session executor to a real Codex runtime role.
2. Expand the Binder control plane into a fuller Agent<->Genie runtime API.
3. Split the legacy `codexd` concerns out of the Agent UI once the Agent owns
1. Expand the Binder control plane into a fuller Agent<->Genie runtime API.
2. Split the legacy `codexd` concerns out of the Agent UI once the Agent owns
auth and transport directly.
4. Add Android-native tool surfaces to Genie for target inspection and control.
3. Add more Android-native tool surfaces and richer observation types to the
hosted Genie runtime.

View File

@@ -32,13 +32,16 @@ stub SDK:
```bash
export ANDROID_AGENT_PLATFORM_STUB_SDK_ZIP=/path/to/android-agent-platform-stub-sdk.zip
just android-build
just android-service-build
cd android
./gradlew :genie:assembleDebug :app:assembleDebug
```
The Agent/Genie prototype modules require
`ANDROID_AGENT_PLATFORM_STUB_SDK_ZIP` (or `-PagentPlatformStubSdkZip=...`) so
Gradle can compile against the stub SDK jar.
Gradle can compile against the stub SDK jar. The Genie APK now also packages
the Android `codex` binary as `libcodex.so`, so `just android-build` must run
before `:genie:assembleDebug`.
If `cargo-ndk` cannot find your NDK, set:
```bash