mirror of
https://github.com/openai/codex.git
synced 2026-04-24 14:45:27 +00:00
Host Genie sessions with codex app-server
Package the Android codex binary into the Genie APK and run hosted codex app-server inside Genie sessions, with Android dynamic tools and AgentSDK question bridging replacing the live TOOL/QUESTION/RESULT scaffold. Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -22,6 +22,12 @@ val agentPlatformStubSdkZip = providers
|
||||
val extractedAgentPlatformJar = layout.buildDirectory.file(
|
||||
"generated/agent-platform/android-agent-platform-stub-sdk.jar"
|
||||
)
|
||||
val repoRoot = rootProject.projectDir.parentFile
|
||||
val codexTargets = mapOf(
|
||||
"arm64-v8a" to "aarch64-linux-android",
|
||||
"x86_64" to "x86_64-linux-android",
|
||||
)
|
||||
val codexJniDir = layout.buildDirectory.dir("generated/codex-jni")
|
||||
|
||||
android {
|
||||
namespace = "com.openai.codex.genie"
|
||||
@@ -49,6 +55,10 @@ android {
|
||||
sourceCompatibility = androidJavaVersion
|
||||
targetCompatibility = androidJavaVersion
|
||||
}
|
||||
|
||||
packaging {
|
||||
jniLibs.useLegacyPackaging = true
|
||||
}
|
||||
}
|
||||
|
||||
val extractAgentPlatformStubSdk = tasks.register<Sync>("extractAgentPlatformStubSdk") {
|
||||
@@ -65,8 +75,35 @@ val extractAgentPlatformStubSdk = tasks.register<Sync>("extractAgentPlatformStub
|
||||
into(outputDir)
|
||||
}
|
||||
|
||||
val syncCodexCliJniLibs = tasks.register<Sync>("syncCodexCliJniLibs") {
|
||||
val outputDir = codexJniDir
|
||||
into(outputDir)
|
||||
|
||||
codexTargets.forEach { (abi, triple) ->
|
||||
val binary = file("${repoRoot}/codex-rs/target/android/${triple}/release/codex")
|
||||
from(binary) {
|
||||
into(abi)
|
||||
rename { "libcodex.so" }
|
||||
}
|
||||
}
|
||||
|
||||
doFirst {
|
||||
codexTargets.forEach { (abi, triple) ->
|
||||
val binary = file("${repoRoot}/codex-rs/target/android/${triple}/release/codex")
|
||||
if (!binary.exists()) {
|
||||
throw GradleException(
|
||||
"Missing codex binary for ${abi} at ${binary}. Run `just android-build` from the repo root."
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
android.sourceSets["main"].jniLibs.srcDir(codexJniDir.get().asFile)
|
||||
|
||||
tasks.named("preBuild").configure {
|
||||
dependsOn(extractAgentPlatformStubSdk)
|
||||
dependsOn(syncCodexCliJniLibs)
|
||||
}
|
||||
|
||||
dependencies {
|
||||
|
||||
@@ -25,10 +25,13 @@ class AndroidGenieToolExecutor(
|
||||
private const val MAX_UI_XML_CHARS = 8_000
|
||||
}
|
||||
|
||||
fun execute(toolCall: GenieModelTurn.ToolCall): GenieToolObservation {
|
||||
return when (toolCall.name) {
|
||||
"android.package.inspect" -> inspectPackage(toolCall.arguments)
|
||||
"android.intent.launch" -> launchIntent(toolCall.arguments)
|
||||
fun execute(
|
||||
toolName: String,
|
||||
arguments: JSONObject,
|
||||
): GenieToolObservation {
|
||||
return when (toolName) {
|
||||
"android.package.inspect" -> inspectPackage(arguments)
|
||||
"android.intent.launch" -> launchIntent(arguments)
|
||||
"android.target.show" -> requestTargetVisibility(
|
||||
action = "show",
|
||||
request = callback::requestShowDetachedTarget,
|
||||
@@ -47,11 +50,11 @@ class AndroidGenieToolExecutor(
|
||||
)
|
||||
"android.target.capture_frame" -> captureDetachedTargetFrame()
|
||||
"android.ui.dump" -> dumpUiHierarchy()
|
||||
"android.input.tap" -> tap(toolCall.arguments)
|
||||
"android.input.text" -> inputText(toolCall.arguments)
|
||||
"android.input.key" -> inputKey(toolCall.arguments)
|
||||
"android.wait" -> waitFor(toolCall.arguments)
|
||||
else -> throw IOException("Unknown tool: ${toolCall.name}")
|
||||
"android.input.tap" -> tap(arguments)
|
||||
"android.input.text" -> inputText(arguments)
|
||||
"android.input.key" -> inputKey(arguments)
|
||||
"android.wait" -> waitFor(arguments)
|
||||
else -> throw IOException("Unknown tool: $toolName")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,681 @@
|
||||
package com.openai.codex.genie
|
||||
|
||||
import android.app.agent.AgentSessionInfo
|
||||
import android.app.agent.GenieRequest
|
||||
import android.app.agent.GenieService
|
||||
import android.content.Context
|
||||
import android.util.Log
|
||||
import java.io.BufferedWriter
|
||||
import java.io.Closeable
|
||||
import java.io.File
|
||||
import java.io.IOException
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
import java.util.concurrent.TimeUnit
|
||||
import java.util.concurrent.atomic.AtomicInteger
|
||||
import org.json.JSONArray
|
||||
import org.json.JSONObject
|
||||
|
||||
class CodexAppServerHost(
|
||||
private val context: Context,
|
||||
private val request: GenieRequest,
|
||||
private val callback: GenieService.Callback,
|
||||
private val control: GenieSessionControl,
|
||||
private val runtimeStatus: CodexAgentBridge.RuntimeStatus,
|
||||
private val targetAppContext: TargetAppContext?,
|
||||
) : Closeable {
|
||||
companion object {
|
||||
private const val TAG = "CodexAppServerHost"
|
||||
private const val AGENT_SOCKET_PATH = "@com.openai.codexd.codexd"
|
||||
private const val REQUEST_TIMEOUT_MS = 30_000L
|
||||
private const val POLL_TIMEOUT_MS = 250L
|
||||
}
|
||||
|
||||
private val requestIdSequence = AtomicInteger(1)
|
||||
private val pendingResponses = ConcurrentHashMap<String, LinkedBlockingQueue<JSONObject>>()
|
||||
private val inboundMessages = LinkedBlockingQueue<JSONObject>()
|
||||
private val writerLock = Any()
|
||||
private val streamedAgentMessages = mutableMapOf<String, StringBuilder>()
|
||||
|
||||
private lateinit var process: Process
|
||||
private lateinit var writer: BufferedWriter
|
||||
private var stdoutThread: Thread? = null
|
||||
private var stderrThread: Thread? = null
|
||||
private var finalAgentMessage: String? = null
|
||||
private var resultPublished = false
|
||||
|
||||
fun run() {
|
||||
startProcess()
|
||||
initialize()
|
||||
val threadId = startThread()
|
||||
startTurn(threadId)
|
||||
callback.publishTrace(request.sessionId, "Hosted codex app-server thread $threadId for ${request.targetPackage}.")
|
||||
eventLoop()
|
||||
}
|
||||
|
||||
override fun close() {
|
||||
stdoutThread?.interrupt()
|
||||
stderrThread?.interrupt()
|
||||
synchronized(writerLock) {
|
||||
runCatching { writer.close() }
|
||||
}
|
||||
if (::process.isInitialized) {
|
||||
process.destroy()
|
||||
}
|
||||
control.process = null
|
||||
}
|
||||
|
||||
private fun startProcess() {
|
||||
val codexHome = File(context.filesDir, "codex-home").apply { mkdirs() }
|
||||
val processBuilder = ProcessBuilder(
|
||||
listOf(
|
||||
CodexBinaryLocator.resolve(context).absolutePath,
|
||||
"app-server",
|
||||
"--listen",
|
||||
"stdio://",
|
||||
),
|
||||
)
|
||||
val env = processBuilder.environment()
|
||||
env["CODEX_HOME"] = codexHome.absolutePath
|
||||
env["CODEX_OPENAI_UNIX_SOCKET"] = AGENT_SOCKET_PATH
|
||||
env["OPENAI_BASE_URL"] = "http://localhost/v1"
|
||||
env["RUST_LOG"] = "info"
|
||||
process = processBuilder.start()
|
||||
control.process = process
|
||||
writer = process.outputStream.bufferedWriter()
|
||||
startStdoutPump()
|
||||
startStderrPump()
|
||||
}
|
||||
|
||||
private fun startStdoutPump() {
|
||||
stdoutThread = Thread {
|
||||
process.inputStream.bufferedReader().useLines { lines ->
|
||||
lines.forEach { line ->
|
||||
if (line.isBlank()) {
|
||||
return@forEach
|
||||
}
|
||||
val message = runCatching { JSONObject(line) }
|
||||
.getOrElse { err ->
|
||||
Log.w(TAG, "Failed to parse codex app-server stdout line", err)
|
||||
return@forEach
|
||||
}
|
||||
routeInbound(message)
|
||||
}
|
||||
}
|
||||
}.also {
|
||||
it.name = "CodexAppServerStdout-${request.sessionId}"
|
||||
it.start()
|
||||
}
|
||||
}
|
||||
|
||||
private fun startStderrPump() {
|
||||
stderrThread = Thread {
|
||||
process.errorStream.bufferedReader().useLines { lines ->
|
||||
lines.forEach { line ->
|
||||
if (line.isNotBlank()) {
|
||||
Log.i(TAG, line)
|
||||
}
|
||||
}
|
||||
}
|
||||
}.also {
|
||||
it.name = "CodexAppServerStderr-${request.sessionId}"
|
||||
it.start()
|
||||
}
|
||||
}
|
||||
|
||||
private fun routeInbound(message: JSONObject) {
|
||||
if (message.has("id") && !message.has("method")) {
|
||||
pendingResponses[message.get("id").toString()]?.offer(message)
|
||||
return
|
||||
}
|
||||
inboundMessages.offer(message)
|
||||
}
|
||||
|
||||
private fun initialize() {
|
||||
request(
|
||||
method = "initialize",
|
||||
params = JSONObject()
|
||||
.put(
|
||||
"clientInfo",
|
||||
JSONObject()
|
||||
.put("name", "android_genie")
|
||||
.put("title", "Android Genie")
|
||||
.put("version", "0.1.0"),
|
||||
)
|
||||
.put(
|
||||
"capabilities",
|
||||
JSONObject().put("experimentalApi", true),
|
||||
),
|
||||
)
|
||||
notify("initialized", JSONObject())
|
||||
}
|
||||
|
||||
private fun startThread(): String {
|
||||
val result = request(
|
||||
method = "thread/start",
|
||||
params = JSONObject()
|
||||
.put("model", runtimeStatus.effectiveModel)
|
||||
.put("approvalPolicy", "never")
|
||||
.put("sandbox", "read-only")
|
||||
.put("ephemeral", true)
|
||||
.put("cwd", context.filesDir.absolutePath)
|
||||
.put("serviceName", "android_genie")
|
||||
.put("baseInstructions", buildBaseInstructions())
|
||||
.put("dynamicTools", buildDynamicToolSpecs()),
|
||||
)
|
||||
return result.getJSONObject("thread").getString("id")
|
||||
}
|
||||
|
||||
private fun startTurn(threadId: String) {
|
||||
request(
|
||||
method = "turn/start",
|
||||
params = JSONObject()
|
||||
.put("threadId", threadId)
|
||||
.put(
|
||||
"input",
|
||||
JSONArray().put(
|
||||
JSONObject()
|
||||
.put("type", "text")
|
||||
.put("text", buildDelegatedPrompt()),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
private fun eventLoop() {
|
||||
while (!control.cancelled) {
|
||||
val message = inboundMessages.poll(POLL_TIMEOUT_MS, TimeUnit.MILLISECONDS)
|
||||
if (message == null) {
|
||||
if (!process.isAlive) {
|
||||
throw IOException("codex app-server exited with code ${process.exitValue()}")
|
||||
}
|
||||
continue
|
||||
}
|
||||
if (message.has("method") && message.has("id")) {
|
||||
handleServerRequest(message)
|
||||
continue
|
||||
}
|
||||
if (message.has("method") && handleNotification(message)) {
|
||||
return
|
||||
}
|
||||
}
|
||||
throw IOException("Cancelled")
|
||||
}
|
||||
|
||||
private fun handleServerRequest(message: JSONObject) {
|
||||
val method = message.getString("method")
|
||||
val requestId = message.get("id")
|
||||
val params = message.optJSONObject("params") ?: JSONObject()
|
||||
when (method) {
|
||||
"item/tool/call" -> handleDynamicToolCall(requestId, params)
|
||||
"item/tool/requestUserInput" -> handleRequestUserInput(requestId, params)
|
||||
else -> {
|
||||
callback.publishTrace(request.sessionId, "Unsupported codex app-server request: $method")
|
||||
sendError(
|
||||
requestId = requestId,
|
||||
code = -32601,
|
||||
message = "Unsupported app-server request: $method",
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun handleDynamicToolCall(
|
||||
requestId: Any,
|
||||
params: JSONObject,
|
||||
) {
|
||||
val toolName = params.optString("tool").trim()
|
||||
val arguments = params.optJSONObject("arguments") ?: JSONObject()
|
||||
val toolExecutor = AndroidGenieToolExecutor(
|
||||
context = context,
|
||||
callback = callback,
|
||||
sessionId = request.sessionId,
|
||||
defaultTargetPackage = request.targetPackage,
|
||||
)
|
||||
val observation = runCatching {
|
||||
toolExecutor.execute(toolName, arguments)
|
||||
}.getOrElse { err ->
|
||||
GenieToolObservation(
|
||||
name = toolName.ifBlank { "unknown" },
|
||||
summary = "Tool $toolName failed: ${err.message}",
|
||||
promptDetails = "Tool $toolName failed.\nError: ${err.message ?: err::class.java.simpleName}",
|
||||
)
|
||||
}
|
||||
callback.publishTrace(request.sessionId, observation.summary)
|
||||
sendResult(
|
||||
requestId = requestId,
|
||||
result = JSONObject()
|
||||
.put("success", !observation.summary.contains(" failed:"))
|
||||
.put("contentItems", buildDynamicToolContentItems(observation)),
|
||||
)
|
||||
}
|
||||
|
||||
private fun handleRequestUserInput(
|
||||
requestId: Any,
|
||||
params: JSONObject,
|
||||
) {
|
||||
val questions = params.optJSONArray("questions") ?: JSONArray()
|
||||
val renderedQuestion = renderAgentQuestion(questions)
|
||||
callback.publishQuestion(request.sessionId, renderedQuestion)
|
||||
callback.updateState(request.sessionId, AgentSessionInfo.STATE_WAITING_FOR_USER)
|
||||
val answer = control.waitForUserResponse()
|
||||
callback.updateState(request.sessionId, AgentSessionInfo.STATE_RUNNING)
|
||||
callback.publishTrace(request.sessionId, "Received Agent answer for ${request.targetPackage}.")
|
||||
sendResult(
|
||||
requestId = requestId,
|
||||
result = JSONObject().put("answers", buildQuestionAnswers(questions, answer)),
|
||||
)
|
||||
}
|
||||
|
||||
private fun handleNotification(message: JSONObject): Boolean {
|
||||
val method = message.getString("method")
|
||||
val params = message.optJSONObject("params") ?: JSONObject()
|
||||
return when (method) {
|
||||
"turn/started" -> {
|
||||
callback.publishTrace(request.sessionId, "codex turn started for ${request.targetPackage}.")
|
||||
false
|
||||
}
|
||||
"item/agentMessage/delta" -> {
|
||||
val itemId = params.optString("itemId")
|
||||
if (itemId.isNotBlank()) {
|
||||
streamedAgentMessages.getOrPut(itemId, ::StringBuilder)
|
||||
.append(params.optString("delta"))
|
||||
}
|
||||
false
|
||||
}
|
||||
"item/started" -> {
|
||||
publishItemStartedTrace(params.optJSONObject("item"))
|
||||
false
|
||||
}
|
||||
"item/completed" -> {
|
||||
captureCompletedItem(params.optJSONObject("item"))
|
||||
false
|
||||
}
|
||||
"turn/completed" -> {
|
||||
finishTurn(params)
|
||||
true
|
||||
}
|
||||
else -> false
|
||||
}
|
||||
}
|
||||
|
||||
private fun publishItemStartedTrace(item: JSONObject?) {
|
||||
if (item == null) {
|
||||
return
|
||||
}
|
||||
when (item.optString("type")) {
|
||||
"dynamicToolCall" -> {
|
||||
val tool = item.optString("tool")
|
||||
callback.publishTrace(request.sessionId, "Codex requested dynamic tool $tool.")
|
||||
}
|
||||
"commandExecution" -> {
|
||||
val command = item.optJSONArray("command")?.join(" ") ?: "command"
|
||||
callback.publishTrace(request.sessionId, "Codex started command execution: $command")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun captureCompletedItem(item: JSONObject?) {
|
||||
if (item == null) {
|
||||
return
|
||||
}
|
||||
when (item.optString("type")) {
|
||||
"agentMessage" -> {
|
||||
val itemId = item.optString("id")
|
||||
val text = item.optString("text").ifBlank {
|
||||
streamedAgentMessages[itemId]?.toString().orEmpty()
|
||||
}
|
||||
if (text.isNotBlank()) {
|
||||
finalAgentMessage = text
|
||||
}
|
||||
}
|
||||
"commandExecution" -> {
|
||||
val status = item.optString("status")
|
||||
val exitCode = if (item.has("exitCode")) item.opt("exitCode") else null
|
||||
callback.publishTrace(
|
||||
request.sessionId,
|
||||
"Command execution completed with status=$status exitCode=${exitCode ?: "unknown"}.",
|
||||
)
|
||||
}
|
||||
"dynamicToolCall" -> {
|
||||
val tool = item.optString("tool")
|
||||
val status = item.optString("status")
|
||||
callback.publishTrace(request.sessionId, "Dynamic tool $tool completed with status=$status.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun finishTurn(params: JSONObject) {
|
||||
val turn = params.optJSONObject("turn") ?: JSONObject()
|
||||
when (turn.optString("status")) {
|
||||
"completed" -> {
|
||||
val resultText = finalAgentMessage?.takeIf(String::isNotBlank)
|
||||
?: "Genie completed without a final assistant message."
|
||||
publishResultOnce(resultText)
|
||||
callback.updateState(request.sessionId, AgentSessionInfo.STATE_COMPLETED)
|
||||
}
|
||||
"interrupted" -> {
|
||||
callback.publishError(request.sessionId, "Genie turn interrupted")
|
||||
callback.updateState(request.sessionId, AgentSessionInfo.STATE_CANCELLED)
|
||||
}
|
||||
else -> {
|
||||
val errorDetail = turn.opt("error")?.toString()
|
||||
?: "Genie turn failed with status ${turn.optString("status", "unknown")}"
|
||||
callback.publishError(request.sessionId, errorDetail)
|
||||
callback.updateState(request.sessionId, AgentSessionInfo.STATE_FAILED)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun publishResultOnce(text: String) {
|
||||
if (resultPublished) {
|
||||
return
|
||||
}
|
||||
resultPublished = true
|
||||
callback.publishResult(request.sessionId, text)
|
||||
}
|
||||
|
||||
private fun request(
|
||||
method: String,
|
||||
params: JSONObject,
|
||||
): JSONObject {
|
||||
val requestId = requestIdSequence.getAndIncrement().toString()
|
||||
val responseQueue = LinkedBlockingQueue<JSONObject>(1)
|
||||
pendingResponses[requestId] = responseQueue
|
||||
try {
|
||||
sendMessage(
|
||||
JSONObject()
|
||||
.put("id", requestId)
|
||||
.put("method", method)
|
||||
.put("params", params),
|
||||
)
|
||||
val response = responseQueue.poll(REQUEST_TIMEOUT_MS, TimeUnit.MILLISECONDS)
|
||||
?: throw IOException("Timed out waiting for $method response")
|
||||
val error = response.optJSONObject("error")
|
||||
if (error != null) {
|
||||
throw IOException("$method failed: ${error.optString("message", error.toString())}")
|
||||
}
|
||||
return response.optJSONObject("result") ?: JSONObject()
|
||||
} finally {
|
||||
pendingResponses.remove(requestId)
|
||||
}
|
||||
}
|
||||
|
||||
private fun notify(
|
||||
method: String,
|
||||
params: JSONObject,
|
||||
) {
|
||||
sendMessage(
|
||||
JSONObject()
|
||||
.put("method", method)
|
||||
.put("params", params),
|
||||
)
|
||||
}
|
||||
|
||||
private fun sendResult(
|
||||
requestId: Any,
|
||||
result: JSONObject,
|
||||
) {
|
||||
sendMessage(
|
||||
JSONObject()
|
||||
.put("id", requestId)
|
||||
.put("result", result),
|
||||
)
|
||||
}
|
||||
|
||||
private fun sendError(
|
||||
requestId: Any,
|
||||
code: Int,
|
||||
message: String,
|
||||
) {
|
||||
sendMessage(
|
||||
JSONObject()
|
||||
.put("id", requestId)
|
||||
.put(
|
||||
"error",
|
||||
JSONObject()
|
||||
.put("code", code)
|
||||
.put("message", message),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
private fun sendMessage(message: JSONObject) {
|
||||
synchronized(writerLock) {
|
||||
writer.write(message.toString())
|
||||
writer.newLine()
|
||||
writer.flush()
|
||||
}
|
||||
}
|
||||
|
||||
private fun buildBaseInstructions(): String {
|
||||
return """
|
||||
You are Codex acting as a child Android Genie bound to ${request.targetPackage}.
|
||||
The user interacts only with the supervising Agent.
|
||||
Decide your own local plan and choose tools yourself.
|
||||
Prefer the Android dynamic tools for observing and driving the target app.
|
||||
If you need clarification or a decision from the supervising Agent, call request_user_input with concise free-form question text.
|
||||
Do not use hidden control protocols.
|
||||
Finish with a normal assistant message describing what you accomplished or what blocked you.
|
||||
Detached target mode allowed: ${request.isDetachedModeAllowed}.
|
||||
Agent-owned runtime provider: ${runtimeStatus.modelProviderId}.
|
||||
""".trimIndent()
|
||||
}
|
||||
|
||||
private fun buildDelegatedPrompt(): String {
|
||||
val targetSection = targetAppContext?.renderPromptSection()
|
||||
?: "Target app inspection:\n- unavailable"
|
||||
return """
|
||||
Delegated objective:
|
||||
${request.prompt}
|
||||
|
||||
$targetSection
|
||||
""".trimIndent()
|
||||
}
|
||||
|
||||
private fun buildDynamicToolSpecs(): JSONArray {
|
||||
return JSONArray()
|
||||
.put(
|
||||
dynamicToolSpec(
|
||||
name = "android.package.inspect",
|
||||
description = "Inspect package metadata for the paired Android target app.",
|
||||
inputSchema = objectSchema(
|
||||
properties = mapOf(
|
||||
"packageName" to stringSchema("Optional package name override."),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
.put(
|
||||
dynamicToolSpec(
|
||||
name = "android.intent.launch",
|
||||
description = "Launch the target app or an explicit target activity/intent.",
|
||||
inputSchema = objectSchema(
|
||||
properties = mapOf(
|
||||
"packageName" to stringSchema("Optional package name override."),
|
||||
"action" to stringSchema("Optional Android intent action."),
|
||||
"component" to stringSchema("Optional flattened component name."),
|
||||
),
|
||||
),
|
||||
),
|
||||
)
|
||||
.put(dynamicToolSpec("android.target.show", "Show the detached target window.", emptyObjectSchema()))
|
||||
.put(dynamicToolSpec("android.target.hide", "Hide the detached target window.", emptyObjectSchema()))
|
||||
.put(dynamicToolSpec("android.target.attach", "Reattach the detached target back to the main display.", emptyObjectSchema()))
|
||||
.put(dynamicToolSpec("android.target.close", "Close the detached target window.", emptyObjectSchema()))
|
||||
.put(dynamicToolSpec("android.target.capture_frame", "Capture the detached target window as an image.", emptyObjectSchema()))
|
||||
.put(dynamicToolSpec("android.ui.dump", "Dump the current UI hierarchy via uiautomator.", emptyObjectSchema()))
|
||||
.put(
|
||||
dynamicToolSpec(
|
||||
name = "android.input.tap",
|
||||
description = "Inject a tap at absolute screen coordinates.",
|
||||
inputSchema = objectSchema(
|
||||
properties = mapOf(
|
||||
"x" to numberSchema("Absolute X coordinate."),
|
||||
"y" to numberSchema("Absolute Y coordinate."),
|
||||
),
|
||||
required = listOf("x", "y"),
|
||||
),
|
||||
),
|
||||
)
|
||||
.put(
|
||||
dynamicToolSpec(
|
||||
name = "android.input.text",
|
||||
description = "Inject text into the focused field.",
|
||||
inputSchema = objectSchema(
|
||||
properties = mapOf(
|
||||
"text" to stringSchema("Text to type."),
|
||||
),
|
||||
required = listOf("text"),
|
||||
),
|
||||
),
|
||||
)
|
||||
.put(
|
||||
dynamicToolSpec(
|
||||
name = "android.input.key",
|
||||
description = "Inject an Android keyevent by name or keycode token.",
|
||||
inputSchema = objectSchema(
|
||||
properties = mapOf(
|
||||
"key" to stringSchema("Android keyevent token, for example ENTER or BACK."),
|
||||
),
|
||||
required = listOf("key"),
|
||||
),
|
||||
),
|
||||
)
|
||||
.put(
|
||||
dynamicToolSpec(
|
||||
name = "android.wait",
|
||||
description = "Pause briefly to let the UI settle.",
|
||||
inputSchema = objectSchema(
|
||||
properties = mapOf(
|
||||
"millis" to numberSchema("Milliseconds to sleep (1-10000)."),
|
||||
),
|
||||
required = listOf("millis"),
|
||||
),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
private fun dynamicToolSpec(
|
||||
name: String,
|
||||
description: String,
|
||||
inputSchema: JSONObject,
|
||||
): JSONObject {
|
||||
return JSONObject()
|
||||
.put("name", name)
|
||||
.put("description", description)
|
||||
.put("inputSchema", inputSchema)
|
||||
}
|
||||
|
||||
private fun emptyObjectSchema(): JSONObject {
|
||||
return objectSchema(emptyMap())
|
||||
}
|
||||
|
||||
private fun objectSchema(
|
||||
properties: Map<String, JSONObject>,
|
||||
required: List<String> = emptyList(),
|
||||
): JSONObject {
|
||||
val propertiesJson = JSONObject()
|
||||
properties.forEach { (name, schema) -> propertiesJson.put(name, schema) }
|
||||
return JSONObject()
|
||||
.put("type", "object")
|
||||
.put("properties", propertiesJson)
|
||||
.put("required", JSONArray(required))
|
||||
.put("additionalProperties", false)
|
||||
}
|
||||
|
||||
private fun stringSchema(description: String): JSONObject {
|
||||
return JSONObject()
|
||||
.put("type", "string")
|
||||
.put("description", description)
|
||||
}
|
||||
|
||||
private fun numberSchema(description: String): JSONObject {
|
||||
return JSONObject()
|
||||
.put("type", "number")
|
||||
.put("description", description)
|
||||
}
|
||||
|
||||
private fun buildDynamicToolContentItems(observation: GenieToolObservation): JSONArray {
|
||||
val items = JSONArray().put(
|
||||
JSONObject()
|
||||
.put("type", "inputText")
|
||||
.put("text", observation.promptDetails),
|
||||
)
|
||||
observation.imageDataUrls.forEach { imageUrl ->
|
||||
items.put(
|
||||
JSONObject()
|
||||
.put("type", "inputImage")
|
||||
.put("imageUrl", imageUrl),
|
||||
)
|
||||
}
|
||||
return items
|
||||
}
|
||||
|
||||
private fun renderAgentQuestion(questions: JSONArray): String {
|
||||
if (questions.length() == 0) {
|
||||
return "Genie requested input but did not provide a question."
|
||||
}
|
||||
val rendered = buildString {
|
||||
for (index in 0 until questions.length()) {
|
||||
val question = questions.optJSONObject(index) ?: continue
|
||||
if (length > 0) {
|
||||
append("\n\n")
|
||||
}
|
||||
val header = question.optString("header").takeIf(String::isNotBlank)
|
||||
if (header != null) {
|
||||
append(header)
|
||||
append(":\n")
|
||||
}
|
||||
append(question.optString("question"))
|
||||
val options = question.optJSONArray("options")
|
||||
if (options != null && options.length() > 0) {
|
||||
append("\nOptions:")
|
||||
for (optionIndex in 0 until options.length()) {
|
||||
val option = options.optJSONObject(optionIndex) ?: continue
|
||||
append("\n- ")
|
||||
append(option.optString("label"))
|
||||
val description = option.optString("description")
|
||||
if (description.isNotBlank()) {
|
||||
append(": ")
|
||||
append(description)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return if (questions.length() == 1) {
|
||||
rendered
|
||||
} else {
|
||||
"$rendered\n\nReply with one answer per question, separated by a blank line."
|
||||
}
|
||||
}
|
||||
|
||||
private fun buildQuestionAnswers(
|
||||
questions: JSONArray,
|
||||
answer: String,
|
||||
): JSONObject {
|
||||
val splitAnswers = answer
|
||||
.split(Regex("\\n\\s*\\n"))
|
||||
.map(String::trim)
|
||||
.filter(String::isNotEmpty)
|
||||
val answersJson = JSONObject()
|
||||
for (index in 0 until questions.length()) {
|
||||
val question = questions.optJSONObject(index) ?: continue
|
||||
val questionId = question.optString("id")
|
||||
if (questionId.isBlank()) {
|
||||
continue
|
||||
}
|
||||
val responseText = splitAnswers.getOrNull(index)
|
||||
?: if (index == 0) answer.trim() else ""
|
||||
answersJson.put(
|
||||
questionId,
|
||||
JSONObject().put(
|
||||
"answers",
|
||||
JSONArray().put(responseText),
|
||||
),
|
||||
)
|
||||
}
|
||||
return answersJson
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
package com.openai.codex.genie
|
||||
|
||||
import android.content.Context
|
||||
import java.io.File
|
||||
import java.io.IOException
|
||||
|
||||
object CodexBinaryLocator {
|
||||
fun resolve(context: Context): File {
|
||||
val binary = File(context.applicationInfo.nativeLibraryDir, "libcodex.so")
|
||||
if (!binary.exists()) {
|
||||
throw IOException("codex binary missing at ${binary.absolutePath}")
|
||||
}
|
||||
return binary
|
||||
}
|
||||
}
|
||||
@@ -5,47 +5,17 @@ import android.app.agent.GenieRequest
|
||||
import android.app.agent.GenieService
|
||||
import android.util.Log
|
||||
import java.io.IOException
|
||||
import java.util.ArrayDeque
|
||||
import java.util.concurrent.ConcurrentHashMap
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
import java.util.concurrent.TimeUnit
|
||||
|
||||
class CodexGenieService : GenieService() {
|
||||
companion object {
|
||||
private const val TAG = "CodexGenieService"
|
||||
private const val MAX_MODEL_TURNS = 12
|
||||
private const val MAX_OBJECTIVE_PROMPT_CHARS = 240
|
||||
private const val MAX_AGENT_ANSWER_CHARS = 120
|
||||
private const val MAX_TOOL_OBSERVATIONS = 6
|
||||
private val GENIE_RESPONSE_INSTRUCTIONS =
|
||||
"""
|
||||
You are Codex acting as an Android Genie.
|
||||
Reply with exactly one line that starts with TOOL:, QUESTION:, or RESULT:.
|
||||
Use TOOL: with a single JSON object on the same line, for example:
|
||||
TOOL: {"name":"android.intent.launch","arguments":{"packageName":"com.android.deskclock"}}
|
||||
Available tools:
|
||||
- android.package.inspect {packageName?}
|
||||
- android.intent.launch {packageName?, action?, component?}
|
||||
- android.target.show {}
|
||||
- android.target.hide {}
|
||||
- android.target.attach {}
|
||||
- android.target.close {}
|
||||
- android.target.capture_frame {}
|
||||
- android.ui.dump {}
|
||||
- android.input.tap {x, y}
|
||||
- android.input.text {text}
|
||||
- android.input.key {key}
|
||||
- android.wait {millis}
|
||||
Use QUESTION: only when you need another free-form answer from the Agent.
|
||||
Use RESULT: when you are ready to report the next concrete step or final outcome.
|
||||
Do not emit markdown or extra lines.
|
||||
""".trimIndent()
|
||||
}
|
||||
|
||||
private val sessionControls = ConcurrentHashMap<String, SessionControl>()
|
||||
private val sessionControls = ConcurrentHashMap<String, GenieSessionControl>()
|
||||
|
||||
override fun onStartGenieSession(request: GenieRequest, callback: Callback) {
|
||||
val control = SessionControl()
|
||||
val control = GenieSessionControl()
|
||||
sessionControls[request.sessionId] = control
|
||||
Thread {
|
||||
runSession(request, callback, control)
|
||||
@@ -56,16 +26,20 @@ class CodexGenieService : GenieService() {
|
||||
}
|
||||
|
||||
override fun onCancelGenieSession(sessionId: String) {
|
||||
sessionControls.remove(sessionId)?.cancelled = true
|
||||
sessionControls.remove(sessionId)?.cancel()
|
||||
Log.i(TAG, "Cancelled session $sessionId")
|
||||
}
|
||||
|
||||
override fun onUserResponse(sessionId: String, response: String) {
|
||||
sessionControls[sessionId]?.userResponses?.offer(response)
|
||||
Log.i(TAG, "Received user response for $sessionId")
|
||||
Log.i(TAG, "Received Agent response for $sessionId")
|
||||
}
|
||||
|
||||
private fun runSession(request: GenieRequest, callback: Callback, control: SessionControl) {
|
||||
private fun runSession(
|
||||
request: GenieRequest,
|
||||
callback: Callback,
|
||||
control: GenieSessionControl,
|
||||
) {
|
||||
val sessionId = request.sessionId
|
||||
try {
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_RUNNING)
|
||||
@@ -75,8 +49,9 @@ class CodexGenieService : GenieService() {
|
||||
)
|
||||
callback.publishTrace(
|
||||
sessionId,
|
||||
"Genie is headless, routes control/data traffic through the Agent-owned Binder bridge, and uses structured Android tools locally.",
|
||||
"Genie is headless. It hosts codex app-server locally, routes model traffic through the Agent-owned codexd socket, and exposes Android tooling as dynamic tools.",
|
||||
)
|
||||
|
||||
val targetAppContext = runCatching { TargetAppInspector.inspect(this, request.targetPackage) }
|
||||
targetAppContext.onSuccess { targetApp ->
|
||||
callback.publishTrace(
|
||||
@@ -91,275 +66,56 @@ class CodexGenieService : GenieService() {
|
||||
)
|
||||
}
|
||||
|
||||
if (request.isDetachedModeAllowed) {
|
||||
callback.requestLaunchDetachedTargetHidden(sessionId)
|
||||
callback.publishTrace(sessionId, "Requested detached target launch for ${request.targetPackage}.")
|
||||
}
|
||||
|
||||
AgentBridgeClient(this).use { bridgeClient ->
|
||||
val toolExecutor = AndroidGenieToolExecutor(
|
||||
context = this,
|
||||
callback = callback,
|
||||
sessionId = sessionId,
|
||||
defaultTargetPackage = request.targetPackage,
|
||||
)
|
||||
val runtimeStatus = runCatching { bridgeClient.getRuntimeStatus() }
|
||||
runtimeStatus.onSuccess { status ->
|
||||
val accountSuffix = status.accountEmail?.let { " (${it})" } ?: ""
|
||||
callback.publishTrace(
|
||||
sessionId,
|
||||
"Reached Agent Binder bridge; authenticated=${status.authenticated}${accountSuffix}, provider=${status.modelProviderId}, model=${status.effectiveModel ?: "unknown"}, clients=${status.clientCount}.",
|
||||
)
|
||||
}
|
||||
runtimeStatus.onFailure { err ->
|
||||
callback.publishTrace(
|
||||
sessionId,
|
||||
"Agent Binder bridge probe failed: ${err.message}",
|
||||
)
|
||||
}
|
||||
|
||||
if (request.isDetachedModeAllowed) {
|
||||
callback.requestLaunchDetachedTargetHidden(sessionId)
|
||||
callback.publishTrace(sessionId, "Requested detached target launch for ${request.targetPackage}.")
|
||||
}
|
||||
|
||||
callback.publishQuestion(
|
||||
val runtimeStatus = bridgeClient.getRuntimeStatus()
|
||||
val accountSuffix = runtimeStatus.accountEmail?.let { " ($it)" } ?: ""
|
||||
callback.publishTrace(
|
||||
sessionId,
|
||||
buildAgentQuestion(
|
||||
request = request,
|
||||
targetAppContext = targetAppContext.getOrNull(),
|
||||
),
|
||||
"Reached Agent Binder bridge; authenticated=${runtimeStatus.authenticated}${accountSuffix}, provider=${runtimeStatus.modelProviderId}, model=${runtimeStatus.effectiveModel ?: "unknown"}, clients=${runtimeStatus.clientCount}.",
|
||||
)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_WAITING_FOR_USER)
|
||||
|
||||
if (control.cancelled) {
|
||||
callback.publishError(sessionId, "Cancelled")
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_CANCELLED)
|
||||
if (!runtimeStatus.authenticated || runtimeStatus.effectiveModel.isNullOrBlank()) {
|
||||
callback.publishResult(
|
||||
sessionId,
|
||||
"Reached the Agent bridge, but the Agent runtime was not authenticated or did not expose an effective model for ${request.targetPackage}.",
|
||||
)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
|
||||
return
|
||||
}
|
||||
|
||||
var runtime = runtimeStatus.getOrNull()
|
||||
val toolObservations = ArrayDeque<GenieToolObservation>()
|
||||
|
||||
var answer = waitForAgentAnswer(
|
||||
sessionId = sessionId,
|
||||
CodexAppServerHost(
|
||||
context = this,
|
||||
request = request,
|
||||
callback = callback,
|
||||
control = control,
|
||||
)
|
||||
Log.i(TAG, "Received Agent answer for $sessionId")
|
||||
callback.publishTrace(sessionId, "Received Agent answer: $answer")
|
||||
|
||||
repeat(MAX_MODEL_TURNS) {
|
||||
if (control.cancelled) {
|
||||
return@repeat
|
||||
}
|
||||
if (runtime == null || !runtime.authenticated || runtime.effectiveModel.isNullOrBlank()) {
|
||||
runtime = runCatching { bridgeClient.getRuntimeStatus() }
|
||||
.onFailure { err ->
|
||||
callback.publishTrace(
|
||||
sessionId,
|
||||
"Agent Binder runtime refresh failed: ${err.message}",
|
||||
)
|
||||
}
|
||||
.getOrNull()
|
||||
}
|
||||
if (runtime == null) {
|
||||
callback.publishResult(
|
||||
sessionId,
|
||||
"Reached the Agent bridge, but runtime status was unavailable. Replace this scaffold with a real Codex-driven Genie executor.",
|
||||
)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
|
||||
return
|
||||
}
|
||||
if (!runtime.authenticated || runtime.effectiveModel.isNullOrBlank()) {
|
||||
callback.publishResult(
|
||||
sessionId,
|
||||
"Reached the Agent bridge, but the Agent runtime was not authenticated or did not expose an effective model for ${request.targetPackage}.",
|
||||
)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
|
||||
return
|
||||
}
|
||||
val activeRuntime = requireNotNull(runtime)
|
||||
callback.publishTrace(
|
||||
sessionId,
|
||||
"Requesting a streaming /v1/responses call through the Agent using ${activeRuntime.effectiveModel}.",
|
||||
)
|
||||
val modelResponse = runCatching {
|
||||
requestModelNextStep(
|
||||
request = request,
|
||||
answer = answer,
|
||||
runtimeStatus = activeRuntime,
|
||||
targetAppContext = targetAppContext.getOrNull(),
|
||||
toolObservations = toolObservations.toList(),
|
||||
bridgeClient = bridgeClient,
|
||||
)
|
||||
}
|
||||
if (modelResponse.isFailure) {
|
||||
callback.publishTrace(
|
||||
sessionId,
|
||||
"Agent-mediated /v1/responses request failed: ${modelResponse.exceptionOrNull()?.message}",
|
||||
)
|
||||
callback.publishResult(
|
||||
sessionId,
|
||||
"Reached the Agent bridge for ${request.targetPackage}, but the proxied model request failed. Replace this scaffold with a real Codex-driven Genie executor.",
|
||||
)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
|
||||
return
|
||||
}
|
||||
|
||||
when (val turn = GenieModelTurnParser.parse(modelResponse.getOrThrow())) {
|
||||
is GenieModelTurn.Result -> {
|
||||
Log.i(TAG, "Publishing Genie result for $sessionId")
|
||||
callback.publishResult(sessionId, turn.text)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
|
||||
return
|
||||
}
|
||||
is GenieModelTurn.Question -> {
|
||||
Log.i(TAG, "Publishing Genie follow-up question for $sessionId")
|
||||
callback.publishTrace(sessionId, "Genie follow-up question: ${turn.text}")
|
||||
callback.publishQuestion(sessionId, turn.text)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_WAITING_FOR_USER)
|
||||
answer = waitForAgentAnswer(
|
||||
sessionId = sessionId,
|
||||
callback = callback,
|
||||
control = control,
|
||||
)
|
||||
Log.i(TAG, "Received follow-up Agent answer for $sessionId")
|
||||
callback.publishTrace(sessionId, "Received Agent answer: $answer")
|
||||
}
|
||||
is GenieModelTurn.ToolCall -> {
|
||||
val observation = runCatching {
|
||||
toolExecutor.execute(turn)
|
||||
}.getOrElse { err ->
|
||||
GenieToolObservation(
|
||||
name = turn.name,
|
||||
summary = "Tool ${turn.name} failed: ${err.message}",
|
||||
promptDetails = "Tool ${turn.name} failed.\nError: ${err.message ?: err::class.java.simpleName}",
|
||||
)
|
||||
}
|
||||
rememberToolObservation(toolObservations, observation)
|
||||
callback.publishTrace(sessionId, observation.summary)
|
||||
}
|
||||
}
|
||||
runtimeStatus = runtimeStatus,
|
||||
targetAppContext = targetAppContext.getOrNull(),
|
||||
).use { host ->
|
||||
host.run()
|
||||
}
|
||||
|
||||
callback.publishResult(
|
||||
sessionId,
|
||||
"Genie stopped after reaching the current tool/model turn limit. Continue the session with more guidance or increase the loop budget in code.",
|
||||
)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_COMPLETED)
|
||||
return
|
||||
}
|
||||
} catch (err: InterruptedException) {
|
||||
Thread.currentThread().interrupt()
|
||||
callback.publishError(sessionId, "Interrupted: ${err.message}")
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_FAILED)
|
||||
} catch (err: IOException) {
|
||||
if (control.cancelled) {
|
||||
callback.publishError(sessionId, "Cancelled")
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_CANCELLED)
|
||||
} else {
|
||||
callback.publishError(sessionId, err.message ?: err::class.java.simpleName)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_FAILED)
|
||||
}
|
||||
} catch (err: RuntimeException) {
|
||||
callback.publishError(sessionId, "${err::class.java.simpleName}: ${err.message}")
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_FAILED)
|
||||
} finally {
|
||||
sessionControls.remove(sessionId)
|
||||
control.cancel()
|
||||
}
|
||||
}
|
||||
|
||||
private fun requestModelNextStep(
|
||||
request: GenieRequest,
|
||||
answer: String,
|
||||
runtimeStatus: CodexAgentBridge.RuntimeStatus,
|
||||
targetAppContext: TargetAppContext?,
|
||||
toolObservations: List<GenieToolObservation>,
|
||||
bridgeClient: AgentBridgeClient,
|
||||
): String {
|
||||
val model = checkNotNull(runtimeStatus.effectiveModel) { "missing effective model" }
|
||||
val recentImageInputs = toolObservations
|
||||
.flatMap(GenieToolObservation::imageDataUrls)
|
||||
.takeLast(1)
|
||||
val response = bridgeClient.sendHttpRequest(
|
||||
method = "POST",
|
||||
path = "/v1/responses",
|
||||
body = CodexAgentBridge.buildResponsesRequest(
|
||||
model = model,
|
||||
instructions = GENIE_RESPONSE_INSTRUCTIONS,
|
||||
prompt = buildModelPrompt(
|
||||
request = request,
|
||||
answer = answer,
|
||||
targetAppContext = targetAppContext,
|
||||
toolObservations = toolObservations,
|
||||
),
|
||||
imageDataUrls = recentImageInputs,
|
||||
).toString(),
|
||||
)
|
||||
return CodexAgentBridge.parseResponsesOutputText(response)
|
||||
}
|
||||
|
||||
private fun waitForAgentAnswer(
|
||||
sessionId: String,
|
||||
callback: Callback,
|
||||
control: SessionControl,
|
||||
): String {
|
||||
val answer = waitForUserResponse(control)
|
||||
callback.updateState(sessionId, AgentSessionInfo.STATE_RUNNING)
|
||||
return answer
|
||||
}
|
||||
|
||||
private fun waitForUserResponse(control: SessionControl): String {
|
||||
while (!control.cancelled) {
|
||||
val response = control.userResponses.poll(100, TimeUnit.MILLISECONDS)
|
||||
if (response != null) {
|
||||
return response
|
||||
}
|
||||
}
|
||||
throw IOException("Cancelled while waiting for user response")
|
||||
}
|
||||
|
||||
private fun buildModelPrompt(
|
||||
request: GenieRequest,
|
||||
answer: String,
|
||||
targetAppContext: TargetAppContext?,
|
||||
toolObservations: List<GenieToolObservation>,
|
||||
): String {
|
||||
val objective = abbreviate(request.prompt, MAX_OBJECTIVE_PROMPT_CHARS)
|
||||
val agentAnswer = abbreviate(answer, MAX_AGENT_ANSWER_CHARS)
|
||||
val targetSummary = targetAppContext?.renderPromptSection()
|
||||
?: "Target app inspection:\n- unavailable"
|
||||
val toolSummary = toolObservations.joinToString(separator = "\n\n") { it.renderForPrompt() }
|
||||
.ifBlank { "No tool observations yet." }
|
||||
return """
|
||||
You are Codex acting as an Android Genie for the target package ${request.targetPackage}.
|
||||
Original objective: $objective
|
||||
The Agent answered your latest question with: $agentAnswer
|
||||
|
||||
$targetSummary
|
||||
|
||||
Recent tool observations:
|
||||
$toolSummary
|
||||
|
||||
Emit exactly one line starting with TOOL:, QUESTION:, or RESULT:.
|
||||
""".trimIndent()
|
||||
}
|
||||
|
||||
private fun buildAgentQuestion(
|
||||
request: GenieRequest,
|
||||
targetAppContext: TargetAppContext?,
|
||||
): String {
|
||||
val displayName = targetAppContext?.displayName() ?: request.targetPackage
|
||||
return "Codex Genie is ready to drive $displayName. Reply with any extra constraints or answer 'continue' to let Genie proceed."
|
||||
}
|
||||
|
||||
private fun abbreviate(value: String, maxChars: Int): String {
|
||||
if (value.length <= maxChars) {
|
||||
return value
|
||||
}
|
||||
return value.take(maxChars - 1) + "…"
|
||||
}
|
||||
|
||||
private fun rememberToolObservation(
|
||||
toolObservations: ArrayDeque<GenieToolObservation>,
|
||||
observation: GenieToolObservation,
|
||||
) {
|
||||
toolObservations.addLast(observation)
|
||||
while (toolObservations.size > MAX_TOOL_OBSERVATIONS) {
|
||||
toolObservations.removeFirst()
|
||||
}
|
||||
}
|
||||
|
||||
private class SessionControl {
|
||||
@Volatile var cancelled = false
|
||||
val userResponses = LinkedBlockingQueue<String>()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
package com.openai.codex.genie
|
||||
|
||||
import java.io.IOException
|
||||
import java.util.concurrent.LinkedBlockingQueue
|
||||
import java.util.concurrent.TimeUnit
|
||||
|
||||
class GenieSessionControl {
|
||||
@Volatile
|
||||
var cancelled = false
|
||||
|
||||
@Volatile
|
||||
var process: Process? = null
|
||||
|
||||
val userResponses = LinkedBlockingQueue<String>()
|
||||
|
||||
fun cancel() {
|
||||
cancelled = true
|
||||
process?.destroy()
|
||||
process = null
|
||||
}
|
||||
|
||||
fun waitForUserResponse(): String {
|
||||
while (!cancelled) {
|
||||
val response = userResponses.poll(100, TimeUnit.MILLISECONDS)
|
||||
if (response != null) {
|
||||
return response
|
||||
}
|
||||
}
|
||||
throw IOException("Cancelled while waiting for Agent response")
|
||||
}
|
||||
}
|
||||
@@ -6,7 +6,7 @@ This document tracks the Android Agent Platform refactor that moves Codex from a
|
||||
single `codexd` foreground-service wrapper to a framework-native Agent/Genie
|
||||
architecture.
|
||||
|
||||
The current repo now contains the first implementation slice:
|
||||
The current repo now contains these implementation slices:
|
||||
|
||||
- `android/app` is a buildable **Agent** app scaffold.
|
||||
- `android/genie` is a buildable **Genie** app scaffold.
|
||||
@@ -14,33 +14,32 @@ The current repo now contains the first implementation slice:
|
||||
- register `AgentService`
|
||||
- plan target packages for a user objective
|
||||
- launch direct parent + child framework sessions
|
||||
- start a Genie session for a target package
|
||||
- start one Genie session per selected target package
|
||||
- display framework session state and event timelines
|
||||
- answer waiting Genie questions
|
||||
- attach detached targets
|
||||
- The Genie app currently validates framework lifecycle, detached-target
|
||||
requests, question flow, and result publication with a placeholder executor.
|
||||
- The first internal Agent<->Genie control plane now uses an exported
|
||||
- The Genie app now hosts a real `codex app-server` subprocess, packaged inside
|
||||
the Genie APK as `libcodex.so`.
|
||||
- The first internal Agent<->Genie control plane uses an exported
|
||||
**Binder/AIDL service** in the Agent app, not framework question/answer
|
||||
events.
|
||||
- The current Binder bridge exposes small fixed-form calls, and the Genie
|
||||
runtime already uses it to fetch Agent-owned runtime metadata from the
|
||||
embedded `codexd`, including auth status and the effective model/provider.
|
||||
- The Genie runtime now also inspects the paired target package from inside the
|
||||
- The Genie runtime inspects the paired target package from inside the
|
||||
target-app sandbox and feeds package metadata plus launcher intent details
|
||||
into the bridged model prompt.
|
||||
- The Genie scaffold now issues one real **streaming `/v1/responses`** request
|
||||
through that bridge after the user answer, proving that model traffic can
|
||||
stay Agent-owned even while the Genie runs inside the target-app sandbox.
|
||||
- The Genie runtime now supports a first generic structured tool loop with
|
||||
reusable Android capabilities instead of app-specific hardcoded behavior.
|
||||
- Non-bridge Genie questions now surface through an Agent-owned notification,
|
||||
which gives the Agent a concrete user-escalation path without making the
|
||||
Genie the user-facing surface.
|
||||
- The Agent now also attempts to answer non-bridge Genie questions through the
|
||||
embedded `codexd` runtime before falling back to that notification path.
|
||||
- Runtime testing on the emulator now shows that the exported Agent Binder
|
||||
service is reachable from Genie execution for the current control-plane calls.
|
||||
into the delegated Codex prompt.
|
||||
- The hosted `codex app-server` process routes model traffic through the
|
||||
Agent-owned `codexd` abstract Unix socket, keeping network/auth Agent-owned
|
||||
even while the Genie runs inside the target-app sandbox.
|
||||
- The Genie runtime exposes reusable Android capabilities to Codex as
|
||||
**dynamic tools**, not via a custom `TOOL:` text protocol.
|
||||
- Non-bridge Genie questions surface through AgentSDK question flow by mapping
|
||||
`request_user_input` back into Agent-managed questions and answers.
|
||||
- The Agent also attempts to answer Genie questions through the embedded
|
||||
`codexd` runtime before falling back to notification/UI escalation.
|
||||
- Runtime testing on the emulator shows that the exported Agent Binder service
|
||||
is reachable from Genie execution for the current bootstrap calls.
|
||||
|
||||
The Rust `codexd` service/client split remains in place and is still the
|
||||
existing network/auth bridge while this refactor proceeds.
|
||||
@@ -71,6 +70,7 @@ existing network/auth bridge while this refactor proceeds.
|
||||
- Internal Agent<->Genie coordination now splits into:
|
||||
- Binder/AIDL for fixed-form control/data RPC
|
||||
- AgentSDK session events for free-form product dialogue
|
||||
- hosted `codex app-server` inside Genie for the actual Codex execution loop
|
||||
|
||||
## Runtime Model
|
||||
|
||||
@@ -98,12 +98,12 @@ existing network/auth bridge while this refactor proceeds.
|
||||
- driving the paired target app
|
||||
- publishing trace, question, result, and error events
|
||||
- requesting detached target actions when appropriate
|
||||
- The current implementation is a placeholder executor. It proves:
|
||||
- framework start/cancel behavior
|
||||
- question/answer flow
|
||||
- detached-target requests
|
||||
- result publication
|
||||
- Agent-mediated Binder bridge requests across the Agent/Genie boundary
|
||||
- The current implementation hosts `codex app-server` inside the Genie sandbox.
|
||||
- Kotlin is now only the host/bridge layer for:
|
||||
- framework lifecycle and result publication
|
||||
- Android dynamic tool execution
|
||||
- Agent escalation via `request_user_input`
|
||||
- runtime bootstrap from the Agent-owned Binder bridge
|
||||
|
||||
## First Milestone Scope
|
||||
|
||||
@@ -120,14 +120,12 @@ existing network/auth bridge while this refactor proceeds.
|
||||
- Question answering and detached-target attach controls
|
||||
- Exported Binder bridge request handling in `CodexAgentBridgeService`
|
||||
- Binder bridge request issuance in `CodexGenieService`
|
||||
- Generic small HTTP request/response envelopes over the Binder bridge, with
|
||||
the Genie using the real `codexd` HTTP response bodies
|
||||
- Agent-owned `/internal/runtime/status` metadata for Genie bootstrap
|
||||
- Target-app package metadata and launcher-intent inspection from the Genie
|
||||
sandbox, with that context included in the bridged model prompt
|
||||
- One real streaming proxied `/v1/responses` request from Genie through the
|
||||
Agent-owned bridge after the user answer
|
||||
- A generic structured tool loop inside Genie with:
|
||||
sandbox, with that context included in the delegated Codex prompt
|
||||
- Hosted `codex app-server` inside Genie, with model traffic routed through the
|
||||
Agent-owned `codexd` abstract socket
|
||||
- Android dynamic tools registered on the Genie Codex thread with:
|
||||
- `android.package.inspect`
|
||||
- `android.intent.launch`
|
||||
- detached target show/hide/attach/close
|
||||
@@ -135,28 +133,31 @@ existing network/auth bridge while this refactor proceeds.
|
||||
- UI hierarchy dump
|
||||
- shell-backed input injection helpers (`tap`, `text`, `key`)
|
||||
- bounded waits
|
||||
- Agent-owned question notifications for non-bridge Genie questions
|
||||
- Agent-mediated free-form answers for non-bridge Genie questions, using the
|
||||
current embedded `codexd` runtime as the temporary answer engine
|
||||
- `request_user_input` bridged from hosted Codex back into AgentSDK questions
|
||||
- Agent-owned question notifications for Genie questions that need user input
|
||||
- Agent-mediated free-form answers for Genie questions, using the current
|
||||
embedded `codexd` runtime as the temporary answer engine
|
||||
- Abstract-unix-socket support in the legacy Rust bridge via `@name` or
|
||||
`abstract:name`, so the compatibility transport can move off app-private
|
||||
filesystem sockets when Agent<->Genie traffic is introduced
|
||||
|
||||
### Not done yet
|
||||
|
||||
- Replacing the placeholder Genie executor with a real Codex runtime
|
||||
- Moving network/auth mediation from `codexd` into the Agent runtime
|
||||
- Expanding the Binder control plane beyond the current fixed-form runtime-status
|
||||
and proxied-HTTP calls
|
||||
- Wiring Android-native target-driving tools into the Genie runtime
|
||||
- Expanding the Binder control plane beyond the current fixed-form runtime
|
||||
bootstrap/status calls
|
||||
- Making the Agent the default product surface instead of the legacy service app
|
||||
- Replacing the remaining Agent-side use of embedded `codexd` for planning and
|
||||
auto-answering with a first-class Agent runtime
|
||||
- Adding more Android-native tool surfaces and richer observation types to the
|
||||
hosted Genie runtime
|
||||
|
||||
## Current Code Layout
|
||||
|
||||
- `android/app`
|
||||
- Agent scaffold and legacy `codexd` wrapper live together for now
|
||||
- `android/genie`
|
||||
- standalone Genie scaffold APK
|
||||
- standalone Genie scaffold APK with hosted `codex app-server`
|
||||
- `android/app/src/main/java/com/openai/codexd/CodexAgentService.kt`
|
||||
- framework `AgentService`
|
||||
- `android/app/src/main/java/com/openai/codexd/AgentSessionController.kt`
|
||||
@@ -164,13 +165,14 @@ existing network/auth bridge while this refactor proceeds.
|
||||
- `android/app/src/main/java/com/openai/codexd/MainActivity.kt`
|
||||
- Agent session UI plus existing `codexd` bridge controls
|
||||
- `android/genie/src/main/java/com/openai/codex/genie/CodexGenieService.kt`
|
||||
- placeholder Genie executor
|
||||
- Genie lifecycle host for the embedded `codex app-server`
|
||||
- `android/genie/src/main/java/com/openai/codex/genie/CodexAppServerHost.kt`
|
||||
- stdio JSON-RPC host for `codex app-server`, dynamic tools, and
|
||||
`request_user_input` bridging
|
||||
- `android/app/src/main/java/com/openai/codexd/CodexAgentBridgeService.kt`
|
||||
- exported Binder/AIDL bridge for Genie control-plane calls
|
||||
- `android/genie/src/main/java/com/openai/codex/genie/AgentBridgeClient.kt`
|
||||
- Genie-side Binder client for the Agent bridge service
|
||||
- `android/genie/src/main/java/com/openai/codex/genie/CodexAgentBridge.kt`
|
||||
- Genie-side request/response helpers for bridged model traffic
|
||||
- `android/app/src/main/java/com/openai/codexd/CodexdLocalClient.kt`
|
||||
- Agent-local client for the embedded `codexd` bridge
|
||||
|
||||
@@ -182,6 +184,13 @@ Set the Agent Platform stub SDK zip path:
|
||||
export ANDROID_AGENT_PLATFORM_STUB_SDK_ZIP=/path/to/android-agent-platform-stub-sdk.zip
|
||||
```
|
||||
|
||||
Build both Android binaries first:
|
||||
|
||||
```bash
|
||||
just android-build
|
||||
just android-service-build
|
||||
```
|
||||
|
||||
Build both Android apps:
|
||||
|
||||
```bash
|
||||
@@ -189,13 +198,14 @@ cd android
|
||||
./gradlew :genie:assembleDebug :app:assembleDebug
|
||||
```
|
||||
|
||||
The Agent app still depends on `just android-service-build` for the packaged
|
||||
`codexd` JNI binaries.
|
||||
The Agent app depends on `just android-service-build` for the packaged
|
||||
`codexd` JNI binaries. The Genie app now also depends on `just android-build`
|
||||
for the packaged `codex` JNI binaries.
|
||||
|
||||
## Next Implementation Steps
|
||||
|
||||
1. Move the placeholder Genie session executor to a real Codex runtime role.
|
||||
2. Expand the Binder control plane into a fuller Agent<->Genie runtime API.
|
||||
3. Split the legacy `codexd` concerns out of the Agent UI once the Agent owns
|
||||
1. Expand the Binder control plane into a fuller Agent<->Genie runtime API.
|
||||
2. Split the legacy `codexd` concerns out of the Agent UI once the Agent owns
|
||||
auth and transport directly.
|
||||
4. Add Android-native tool surfaces to Genie for target inspection and control.
|
||||
3. Add more Android-native tool surfaces and richer observation types to the
|
||||
hosted Genie runtime.
|
||||
|
||||
@@ -32,13 +32,16 @@ stub SDK:
|
||||
|
||||
```bash
|
||||
export ANDROID_AGENT_PLATFORM_STUB_SDK_ZIP=/path/to/android-agent-platform-stub-sdk.zip
|
||||
just android-build
|
||||
just android-service-build
|
||||
cd android
|
||||
./gradlew :genie:assembleDebug :app:assembleDebug
|
||||
```
|
||||
The Agent/Genie prototype modules require
|
||||
`ANDROID_AGENT_PLATFORM_STUB_SDK_ZIP` (or `-PagentPlatformStubSdkZip=...`) so
|
||||
Gradle can compile against the stub SDK jar.
|
||||
Gradle can compile against the stub SDK jar. The Genie APK now also packages
|
||||
the Android `codex` binary as `libcodex.so`, so `just android-build` must run
|
||||
before `:genie:assembleDebug`.
|
||||
If `cargo-ndk` cannot find your NDK, set:
|
||||
|
||||
```bash
|
||||
|
||||
Reference in New Issue
Block a user