mirror of
https://github.com/openai/codex.git
synced 2026-04-29 17:06:51 +00:00
Harden hosted Android Agent Genie bridge
Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
@@ -45,6 +45,7 @@ object AgentCodexAppServerClient {
|
||||
context: Context,
|
||||
instructions: String,
|
||||
prompt: String,
|
||||
outputSchema: JSONObject? = null,
|
||||
dynamicTools: JSONArray? = null,
|
||||
toolCallHandler: ((String, JSONObject) -> JSONObject)? = null,
|
||||
requestUserInputHandler: ((JSONArray) -> JSONObject)? = null,
|
||||
@@ -62,7 +63,11 @@ object AgentCodexAppServerClient {
|
||||
instructions = instructions,
|
||||
dynamicTools = dynamicTools,
|
||||
)
|
||||
startTurn(threadId, prompt)
|
||||
startTurn(
|
||||
threadId = threadId,
|
||||
prompt = prompt,
|
||||
outputSchema = outputSchema,
|
||||
)
|
||||
waitForTurnCompletion(toolCallHandler, requestUserInputHandler).also { response ->
|
||||
Log.i(TAG, "requestText completed response=${response.take(160)}")
|
||||
}
|
||||
@@ -180,19 +185,24 @@ object AgentCodexAppServerClient {
|
||||
private fun startTurn(
|
||||
threadId: String,
|
||||
prompt: String,
|
||||
outputSchema: JSONObject?,
|
||||
) {
|
||||
val turnParams = JSONObject()
|
||||
.put("threadId", threadId)
|
||||
.put(
|
||||
"input",
|
||||
JSONArray().put(
|
||||
JSONObject()
|
||||
.put("type", "text")
|
||||
.put("text", prompt),
|
||||
),
|
||||
)
|
||||
if (outputSchema != null) {
|
||||
turnParams.put("outputSchema", outputSchema)
|
||||
}
|
||||
request(
|
||||
method = "turn/start",
|
||||
params = JSONObject()
|
||||
.put("threadId", threadId)
|
||||
.put(
|
||||
"input",
|
||||
JSONArray().put(
|
||||
JSONObject()
|
||||
.put("type", "text")
|
||||
.put("text", prompt),
|
||||
),
|
||||
),
|
||||
params = turnParams,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -226,6 +236,16 @@ object AgentCodexAppServerClient {
|
||||
.append(params.optString("delta"))
|
||||
}
|
||||
}
|
||||
"item/commandExecution/outputDelta" -> {
|
||||
val itemId = params.optString("itemId")
|
||||
val delta = params.optString("delta")
|
||||
if (delta.isNotBlank()) {
|
||||
Log.i(
|
||||
TAG,
|
||||
"commandExecution/outputDelta itemId=$itemId delta=${delta.take(400)}",
|
||||
)
|
||||
}
|
||||
}
|
||||
"item/started" -> {
|
||||
val item = params.optJSONObject("item")
|
||||
Log.i(
|
||||
@@ -239,6 +259,9 @@ object AgentCodexAppServerClient {
|
||||
TAG,
|
||||
"item/completed type=${item.optString("type")} status=${item.optString("status")} tool=${item.optString("tool")}",
|
||||
)
|
||||
if (item.optString("type") == "commandExecution") {
|
||||
Log.i(TAG, "commandExecution/completed item=$item")
|
||||
}
|
||||
if (item.optString("type") == "agentMessage") {
|
||||
val itemId = item.optString("id")
|
||||
val text = item.optString("text").ifBlank {
|
||||
|
||||
@@ -12,6 +12,12 @@ class AgentFrameworkToolBridge(
|
||||
) {
|
||||
companion object {
|
||||
private const val TAG = "AgentFrameworkTool"
|
||||
private val DISALLOWED_TARGET_PACKAGES = setOf(
|
||||
"com.android.shell",
|
||||
"com.android.systemui",
|
||||
"com.openai.codexd",
|
||||
"com.openai.codex.genie",
|
||||
)
|
||||
const val START_DIRECT_SESSION_TOOL = "android_framework_sessions_start_direct"
|
||||
const val LIST_SESSIONS_TOOL = "android_framework_sessions_list"
|
||||
const val ANSWER_QUESTION_TOOL = "android_framework_sessions_answer_question"
|
||||
@@ -21,15 +27,20 @@ class AgentFrameworkToolBridge(
|
||||
internal fun parseStartDirectSessionArguments(
|
||||
arguments: JSONObject,
|
||||
userObjective: String,
|
||||
isLaunchablePackage: (String) -> Boolean,
|
||||
isEligibleTargetPackage: (String) -> Boolean,
|
||||
): StartDirectSessionRequest {
|
||||
val targetsJson = arguments.optJSONArray("targets")
|
||||
?: throw IOException("Framework session tool arguments missing targets")
|
||||
val rejectedPackages = mutableListOf<String>()
|
||||
val targets = buildList {
|
||||
for (index in 0 until targetsJson.length()) {
|
||||
val target = targetsJson.optJSONObject(index) ?: continue
|
||||
val packageName = target.optString("packageName").trim()
|
||||
if (packageName.isEmpty() || !isLaunchablePackage(packageName)) {
|
||||
if (packageName.isEmpty()) {
|
||||
continue
|
||||
}
|
||||
if (!isEligibleTargetPackage(packageName)) {
|
||||
rejectedPackages += packageName
|
||||
continue
|
||||
}
|
||||
val objective = target.optString("objective").trim().ifEmpty { userObjective }
|
||||
@@ -42,7 +53,12 @@ class AgentFrameworkToolBridge(
|
||||
}
|
||||
}.distinctBy(AgentDelegationTarget::packageName)
|
||||
if (targets.isEmpty()) {
|
||||
throw IOException("Framework session tool did not select a launchable package")
|
||||
if (rejectedPackages.isNotEmpty()) {
|
||||
throw IOException(
|
||||
"Framework session tool selected missing or disallowed package(s): ${rejectedPackages.joinToString(", ")}",
|
||||
)
|
||||
}
|
||||
throw IOException("Framework session tool did not select an eligible target package")
|
||||
}
|
||||
return StartDirectSessionRequest(
|
||||
plan = AgentDelegationPlan(
|
||||
@@ -90,7 +106,7 @@ class AgentFrameworkToolBridge(
|
||||
val request = parseStartDirectSessionArguments(
|
||||
arguments = arguments,
|
||||
userObjective = userObjective,
|
||||
isLaunchablePackage = ::isLaunchablePackage,
|
||||
isEligibleTargetPackage = ::isEligibleTargetPackage,
|
||||
)
|
||||
val startedSession = sessionController.startDirectSession(
|
||||
plan = request.plan,
|
||||
@@ -267,8 +283,13 @@ class AgentFrameworkToolBridge(
|
||||
.put("sessions", sessions)
|
||||
}
|
||||
|
||||
private fun isLaunchablePackage(packageName: String): Boolean {
|
||||
return context.packageManager.getLaunchIntentForPackage(packageName) != null
|
||||
private fun isEligibleTargetPackage(packageName: String): Boolean {
|
||||
if (packageName in DISALLOWED_TARGET_PACKAGES) {
|
||||
return false
|
||||
}
|
||||
return runCatching {
|
||||
context.packageManager.getApplicationInfo(packageName, 0)
|
||||
}.isSuccess
|
||||
}
|
||||
|
||||
private fun requireString(arguments: JSONObject, fieldName: String): String {
|
||||
|
||||
@@ -1,18 +1,21 @@
|
||||
package com.openai.codexd
|
||||
|
||||
import android.content.Context
|
||||
import android.os.ParcelFileDescriptor
|
||||
import android.util.Log
|
||||
import java.io.File
|
||||
import java.io.IOException
|
||||
import java.io.OutputStream
|
||||
import java.net.HttpURLConnection
|
||||
import java.net.URL
|
||||
import java.nio.charset.StandardCharsets
|
||||
import kotlin.concurrent.thread
|
||||
import org.json.JSONObject
|
||||
|
||||
object AgentResponsesProxy {
|
||||
private const val TAG = "AgentResponsesProxy"
|
||||
private const val CONNECT_TIMEOUT_MS = 30_000
|
||||
private const val READ_TIMEOUT_MS = 30_000
|
||||
private const val READ_TIMEOUT_MS = 0
|
||||
private const val DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1"
|
||||
private const val DEFAULT_CHATGPT_BASE_URL = "https://chatgpt.com/backend-api/codex"
|
||||
private const val DEFAULT_ORIGINATOR = "codex_cli_rs"
|
||||
@@ -37,6 +40,33 @@ object AgentResponsesProxy {
|
||||
return executeRequest(upstreamUrl, requestBody, authSnapshot)
|
||||
}
|
||||
|
||||
fun openResponsesStream(
|
||||
context: Context,
|
||||
requestBody: String,
|
||||
): ParcelFileDescriptor {
|
||||
val pipe = ParcelFileDescriptor.createPipe()
|
||||
thread(name = "AgentResponsesProxyStream") {
|
||||
ParcelFileDescriptor.AutoCloseOutputStream(pipe[1]).use { output ->
|
||||
streamResponsesTo(context, requestBody, output)
|
||||
}
|
||||
}
|
||||
return pipe[0]
|
||||
}
|
||||
|
||||
fun streamResponsesTo(
|
||||
context: Context,
|
||||
requestBody: String,
|
||||
output: OutputStream,
|
||||
) {
|
||||
val authSnapshot = loadAuthSnapshot(File(context.filesDir, "codex-home/auth.json"))
|
||||
val upstreamUrl = buildResponsesUrl(
|
||||
upstreamBaseUrl = "provider-default",
|
||||
authSnapshot.authMode,
|
||||
)
|
||||
Log.i(TAG, "Streaming /v1/responses -> $upstreamUrl (auth_mode=${authSnapshot.authMode})")
|
||||
streamRequest(upstreamUrl, requestBody, authSnapshot, output)
|
||||
}
|
||||
|
||||
internal fun buildResponsesUrl(
|
||||
upstreamBaseUrl: String,
|
||||
authMode: String,
|
||||
@@ -92,23 +122,7 @@ object AgentResponsesProxy {
|
||||
requestBody: String,
|
||||
authSnapshot: AuthSnapshot,
|
||||
): CodexdLocalClient.HttpResponse {
|
||||
val connection = (URL(upstreamUrl).openConnection() as HttpURLConnection).apply {
|
||||
requestMethod = "POST"
|
||||
connectTimeout = CONNECT_TIMEOUT_MS
|
||||
readTimeout = READ_TIMEOUT_MS
|
||||
doInput = true
|
||||
doOutput = true
|
||||
instanceFollowRedirects = true
|
||||
setRequestProperty("Authorization", "Bearer ${authSnapshot.bearerToken}")
|
||||
setRequestProperty("Content-Type", "application/json")
|
||||
setRequestProperty("Accept", "text/event-stream")
|
||||
setRequestProperty("Accept-Encoding", "identity")
|
||||
setRequestProperty("originator", DEFAULT_ORIGINATOR)
|
||||
setRequestProperty("User-Agent", DEFAULT_USER_AGENT)
|
||||
if (authSnapshot.authMode == "chatgpt" && !authSnapshot.accountId.isNullOrBlank()) {
|
||||
setRequestProperty("ChatGPT-Account-ID", authSnapshot.accountId)
|
||||
}
|
||||
}
|
||||
val connection = openConnection(upstreamUrl, authSnapshot)
|
||||
return try {
|
||||
connection.outputStream.use { output ->
|
||||
output.write(requestBody.toByteArray(StandardCharsets.UTF_8))
|
||||
@@ -127,6 +141,99 @@ object AgentResponsesProxy {
|
||||
}
|
||||
}
|
||||
|
||||
private fun streamRequest(
|
||||
upstreamUrl: String,
|
||||
requestBody: String,
|
||||
authSnapshot: AuthSnapshot,
|
||||
output: OutputStream,
|
||||
) {
|
||||
val connection = openConnection(upstreamUrl, authSnapshot)
|
||||
var headersWritten = false
|
||||
try {
|
||||
connection.outputStream.use { requestStream ->
|
||||
requestStream.write(requestBody.toByteArray(StandardCharsets.UTF_8))
|
||||
requestStream.flush()
|
||||
}
|
||||
val statusCode = connection.responseCode
|
||||
val responseStream = if (statusCode >= 400) connection.errorStream else connection.inputStream
|
||||
writeRawHttpResponseHeaders(
|
||||
output = output,
|
||||
statusCode = statusCode,
|
||||
contentType = connection.contentType,
|
||||
)
|
||||
headersWritten = true
|
||||
responseStream?.use { it.copyTo(output) }
|
||||
} catch (err: Exception) {
|
||||
Log.w(TAG, "Streaming /v1/responses failed", err)
|
||||
if (!headersWritten) {
|
||||
writeRawHttpResponseHeaders(
|
||||
output = output,
|
||||
statusCode = 502,
|
||||
contentType = "text/plain; charset=utf-8",
|
||||
)
|
||||
output.write(
|
||||
(err.message ?: err::class.java.simpleName).toByteArray(StandardCharsets.UTF_8),
|
||||
)
|
||||
}
|
||||
} finally {
|
||||
runCatching { output.flush() }
|
||||
connection.disconnect()
|
||||
}
|
||||
}
|
||||
|
||||
private fun openConnection(
|
||||
upstreamUrl: String,
|
||||
authSnapshot: AuthSnapshot,
|
||||
): HttpURLConnection {
|
||||
return (URL(upstreamUrl).openConnection() as HttpURLConnection).apply {
|
||||
requestMethod = "POST"
|
||||
connectTimeout = CONNECT_TIMEOUT_MS
|
||||
readTimeout = READ_TIMEOUT_MS
|
||||
doInput = true
|
||||
doOutput = true
|
||||
instanceFollowRedirects = true
|
||||
setRequestProperty("Authorization", "Bearer ${authSnapshot.bearerToken}")
|
||||
setRequestProperty("Content-Type", "application/json")
|
||||
setRequestProperty("Accept", "text/event-stream")
|
||||
setRequestProperty("Accept-Encoding", "identity")
|
||||
setRequestProperty("originator", DEFAULT_ORIGINATOR)
|
||||
setRequestProperty("User-Agent", DEFAULT_USER_AGENT)
|
||||
if (authSnapshot.authMode == "chatgpt" && !authSnapshot.accountId.isNullOrBlank()) {
|
||||
setRequestProperty("ChatGPT-Account-ID", authSnapshot.accountId)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun writeRawHttpResponseHeaders(
|
||||
output: OutputStream,
|
||||
statusCode: Int,
|
||||
contentType: String?,
|
||||
) {
|
||||
val headers = buildString {
|
||||
append("HTTP/1.1 $statusCode ${reasonPhrase(statusCode)}\r\n")
|
||||
if (!contentType.isNullOrBlank()) {
|
||||
append("Content-Type: $contentType\r\n")
|
||||
}
|
||||
append("Connection: close\r\n")
|
||||
append("\r\n")
|
||||
}
|
||||
output.write(headers.toByteArray(StandardCharsets.US_ASCII))
|
||||
}
|
||||
|
||||
private fun reasonPhrase(statusCode: Int): String {
|
||||
return when (statusCode) {
|
||||
200 -> "OK"
|
||||
400 -> "Bad Request"
|
||||
401 -> "Unauthorized"
|
||||
403 -> "Forbidden"
|
||||
404 -> "Not Found"
|
||||
500 -> "Internal Server Error"
|
||||
502 -> "Bad Gateway"
|
||||
503 -> "Service Unavailable"
|
||||
else -> "Response"
|
||||
}
|
||||
}
|
||||
|
||||
private fun JSONObject.stringOrNull(key: String): String? {
|
||||
if (!has(key) || isNull(key)) {
|
||||
return null
|
||||
|
||||
@@ -0,0 +1,281 @@
|
||||
package com.openai.codexd
|
||||
|
||||
import android.content.Context
|
||||
import android.net.LocalServerSocket
|
||||
import android.net.LocalSocket
|
||||
import android.net.LocalSocketAddress
|
||||
import android.util.Log
|
||||
import com.openai.codex.bridge.AgentSocketBridgeContract
|
||||
import java.io.ByteArrayOutputStream
|
||||
import java.io.Closeable
|
||||
import java.io.EOFException
|
||||
import java.io.IOException
|
||||
import java.io.InputStream
|
||||
import java.io.OutputStream
|
||||
import java.nio.charset.StandardCharsets
|
||||
import java.util.Collections
|
||||
import java.util.concurrent.atomic.AtomicBoolean
|
||||
import org.json.JSONObject
|
||||
|
||||
object AgentSocketBridgeServer {
|
||||
@Volatile
|
||||
private var runningServer: RunningServer? = null
|
||||
|
||||
fun ensureStarted(context: Context) {
|
||||
synchronized(this) {
|
||||
if (runningServer != null) {
|
||||
return
|
||||
}
|
||||
runningServer = RunningServer(context.applicationContext).also(RunningServer::start)
|
||||
}
|
||||
}
|
||||
|
||||
private class RunningServer(
|
||||
private val context: Context,
|
||||
) : Closeable {
|
||||
companion object {
|
||||
private const val TAG = "AgentSocketBridge"
|
||||
}
|
||||
|
||||
private val boundSocket = LocalSocket().apply {
|
||||
bind(
|
||||
LocalSocketAddress(
|
||||
AgentSocketBridgeContract.SOCKET_NAME,
|
||||
LocalSocketAddress.Namespace.ABSTRACT,
|
||||
),
|
||||
)
|
||||
}
|
||||
private val serverSocket = LocalServerSocket(boundSocket.fileDescriptor)
|
||||
private val closed = AtomicBoolean(false)
|
||||
private val clientSockets = Collections.synchronizedSet(mutableSetOf<LocalSocket>())
|
||||
private val acceptThread = Thread(::acceptLoop, "AgentSocketBridge")
|
||||
|
||||
fun start() {
|
||||
acceptThread.start()
|
||||
Log.i(TAG, "Listening on ${AgentSocketBridgeContract.SOCKET_PATH}")
|
||||
}
|
||||
|
||||
override fun close() {
|
||||
if (!closed.compareAndSet(false, true)) {
|
||||
return
|
||||
}
|
||||
runCatching { serverSocket.close() }
|
||||
runCatching { boundSocket.close() }
|
||||
synchronized(clientSockets) {
|
||||
clientSockets.forEach { socket -> runCatching { socket.close() } }
|
||||
clientSockets.clear()
|
||||
}
|
||||
acceptThread.interrupt()
|
||||
}
|
||||
|
||||
private fun acceptLoop() {
|
||||
while (!closed.get()) {
|
||||
val socket = try {
|
||||
serverSocket.accept()
|
||||
} catch (err: IOException) {
|
||||
if (!closed.get()) {
|
||||
Log.w(TAG, "Failed to accept Agent socket bridge connection", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
clientSockets += socket
|
||||
Thread(
|
||||
{ handleClient(socket) },
|
||||
"AgentSocketBridgeClient",
|
||||
).start()
|
||||
}
|
||||
}
|
||||
|
||||
private fun handleClient(socket: LocalSocket) {
|
||||
socket.use { client ->
|
||||
try {
|
||||
val request = readRequest(client.inputStream)
|
||||
when {
|
||||
request.method == "GET" && request.path == "/internal/runtime/status" -> {
|
||||
writeJsonResponse(
|
||||
output = client.outputStream,
|
||||
statusCode = 200,
|
||||
body = buildRuntimeStatusJson().toString(),
|
||||
)
|
||||
}
|
||||
request.method == "POST" && request.path == "/v1/responses" -> {
|
||||
AgentResponsesProxy.streamResponsesTo(
|
||||
context = context,
|
||||
requestBody = request.body.orEmpty(),
|
||||
output = client.outputStream,
|
||||
)
|
||||
}
|
||||
request.method != "POST" && request.path == "/v1/responses" -> {
|
||||
writeTextResponse(
|
||||
output = client.outputStream,
|
||||
statusCode = 405,
|
||||
body = "Unsupported socket bridge method: ${request.method}",
|
||||
)
|
||||
}
|
||||
else -> {
|
||||
writeTextResponse(
|
||||
output = client.outputStream,
|
||||
statusCode = 404,
|
||||
body = "Unsupported socket bridge path: ${request.path}",
|
||||
)
|
||||
}
|
||||
}
|
||||
} catch (err: Exception) {
|
||||
if (!closed.get()) {
|
||||
Log.w(TAG, "Agent socket bridge request failed", err)
|
||||
runCatching {
|
||||
writeTextResponse(
|
||||
output = client.outputStream,
|
||||
statusCode = 502,
|
||||
body = err.message ?: err::class.java.simpleName,
|
||||
)
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
clientSockets -= client
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private fun buildRuntimeStatusJson(): JSONObject {
|
||||
val status = AgentCodexAppServerClient.readRuntimeStatus(context)
|
||||
return JSONObject()
|
||||
.put("authenticated", status.authenticated)
|
||||
.put("accountEmail", status.accountEmail)
|
||||
.put("clientCount", status.clientCount)
|
||||
.put("modelProviderId", status.modelProviderId)
|
||||
.put("configuredModel", status.configuredModel)
|
||||
.put("effectiveModel", status.effectiveModel)
|
||||
.put("upstreamBaseUrl", status.upstreamBaseUrl)
|
||||
}
|
||||
}
|
||||
|
||||
private data class ParsedRequest(
|
||||
val method: String,
|
||||
val path: String,
|
||||
val body: String?,
|
||||
)
|
||||
|
||||
private fun readRequest(input: InputStream): ParsedRequest {
|
||||
val headerBuffer = ByteArrayOutputStream()
|
||||
var matched = 0
|
||||
while (matched < 4) {
|
||||
val next = input.read()
|
||||
if (next == -1) {
|
||||
throw EOFException("unexpected EOF while reading Agent socket bridge request headers")
|
||||
}
|
||||
headerBuffer.write(next)
|
||||
matched = when {
|
||||
matched == 0 && next == '\r'.code -> 1
|
||||
matched == 1 && next == '\n'.code -> 2
|
||||
matched == 2 && next == '\r'.code -> 3
|
||||
matched == 3 && next == '\n'.code -> 4
|
||||
next == '\r'.code -> 1
|
||||
else -> 0
|
||||
}
|
||||
}
|
||||
|
||||
val headerBytes = headerBuffer.toByteArray()
|
||||
val headerText = headerBytes
|
||||
.copyOfRange(0, headerBytes.size - 4)
|
||||
.toString(StandardCharsets.US_ASCII)
|
||||
val lines = headerText.split("\r\n")
|
||||
val requestLine = lines.firstOrNull()
|
||||
?: throw IOException("socket bridge request line missing")
|
||||
val requestParts = requestLine.split(" ", limit = 3)
|
||||
if (requestParts.size < 2) {
|
||||
throw IOException("invalid socket bridge request line: $requestLine")
|
||||
}
|
||||
|
||||
val headers = mutableMapOf<String, String>()
|
||||
lines.drop(1).forEach { line ->
|
||||
val separatorIndex = line.indexOf(':')
|
||||
if (separatorIndex <= 0) {
|
||||
return@forEach
|
||||
}
|
||||
val name = line.substring(0, separatorIndex).trim().lowercase()
|
||||
val value = line.substring(separatorIndex + 1).trim()
|
||||
headers[name] = value
|
||||
}
|
||||
|
||||
if (headers["transfer-encoding"]?.contains("chunked", ignoreCase = true) == true) {
|
||||
throw IOException("chunked socket bridge requests are unsupported")
|
||||
}
|
||||
|
||||
val contentLength = headers["content-length"]?.toIntOrNull() ?: 0
|
||||
val bodyBytes = ByteArray(contentLength)
|
||||
var offset = 0
|
||||
while (offset < bodyBytes.size) {
|
||||
val read = input.read(bodyBytes, offset, bodyBytes.size - offset)
|
||||
if (read == -1) {
|
||||
throw EOFException("unexpected EOF while reading Agent socket bridge request body")
|
||||
}
|
||||
offset += read
|
||||
}
|
||||
|
||||
return ParsedRequest(
|
||||
method = requestParts[0],
|
||||
path = requestParts[1],
|
||||
body = if (bodyBytes.isEmpty()) null else bodyBytes.toString(StandardCharsets.UTF_8),
|
||||
)
|
||||
}
|
||||
|
||||
private fun writeJsonResponse(
|
||||
output: OutputStream,
|
||||
statusCode: Int,
|
||||
body: String,
|
||||
) {
|
||||
writeResponse(
|
||||
output = output,
|
||||
statusCode = statusCode,
|
||||
body = body,
|
||||
contentType = "application/json; charset=utf-8",
|
||||
)
|
||||
}
|
||||
|
||||
private fun writeTextResponse(
|
||||
output: OutputStream,
|
||||
statusCode: Int,
|
||||
body: String,
|
||||
) {
|
||||
writeResponse(
|
||||
output = output,
|
||||
statusCode = statusCode,
|
||||
body = body,
|
||||
contentType = "text/plain; charset=utf-8",
|
||||
)
|
||||
}
|
||||
|
||||
private fun writeResponse(
|
||||
output: OutputStream,
|
||||
statusCode: Int,
|
||||
body: String,
|
||||
contentType: String,
|
||||
) {
|
||||
val bodyBytes = body.toByteArray(StandardCharsets.UTF_8)
|
||||
val headers = buildString {
|
||||
append("HTTP/1.1 $statusCode ${reasonPhrase(statusCode)}\r\n")
|
||||
append("Content-Type: $contentType\r\n")
|
||||
append("Content-Length: ${bodyBytes.size}\r\n")
|
||||
append("Connection: close\r\n")
|
||||
append("\r\n")
|
||||
}
|
||||
output.write(headers.toByteArray(StandardCharsets.US_ASCII))
|
||||
output.write(bodyBytes)
|
||||
output.flush()
|
||||
}
|
||||
|
||||
private fun reasonPhrase(statusCode: Int): String {
|
||||
return when (statusCode) {
|
||||
200 -> "OK"
|
||||
400 -> "Bad Request"
|
||||
401 -> "Unauthorized"
|
||||
403 -> "Forbidden"
|
||||
404 -> "Not Found"
|
||||
500 -> "Internal Server Error"
|
||||
502 -> "Bad Gateway"
|
||||
503 -> "Service Unavailable"
|
||||
else -> "Response"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5,6 +5,7 @@ import android.util.Log
|
||||
import java.io.IOException
|
||||
import org.json.JSONArray
|
||||
import org.json.JSONObject
|
||||
import org.json.JSONTokener
|
||||
|
||||
data class AgentDelegationTarget(
|
||||
val packageName: String,
|
||||
@@ -23,19 +24,69 @@ data class AgentDelegationPlan(
|
||||
|
||||
object AgentTaskPlanner {
|
||||
private const val TAG = "AgentTaskPlanner"
|
||||
private const val PLANNER_ATTEMPTS = 2
|
||||
|
||||
private val PLANNER_INSTRUCTIONS =
|
||||
"""
|
||||
You are Codex acting as the Android Agent orchestrator.
|
||||
The user interacts only with the Agent. Decide which installed Android packages should receive delegated Genie sessions.
|
||||
Use the standard Android shell tools already available in this runtime, such as `cmd package`, `pm`, and `am`, to inspect installed packages and resolve the correct targets.
|
||||
After deciding on the target packages, call the framework session tool `${AgentFrameworkToolBridge.START_DIRECT_SESSION_TOOL}` exactly once.
|
||||
Return exactly one JSON object and nothing else. Do not wrap it in markdown fences.
|
||||
JSON schema:
|
||||
{
|
||||
"targets": [
|
||||
{
|
||||
"packageName": "installed.package",
|
||||
"objective": "free-form delegated objective for the child Genie"
|
||||
}
|
||||
],
|
||||
"reason": "short rationale",
|
||||
"allowDetachedMode": true
|
||||
}
|
||||
Rules:
|
||||
- Choose the fewest packages needed to complete the request.
|
||||
- The framework session tool `targets` must be non-empty.
|
||||
- `targets` must be non-empty.
|
||||
- Each delegated `objective` should be written for the child Genie, not the user.
|
||||
- After the framework session tool succeeds, reply with a short summary for the Agent UI.
|
||||
- Stop after at most 6 shell commands.
|
||||
- Prefer direct package-manager commands over grepping large package lists.
|
||||
- Verify each chosen package by inspecting its package dump or query-activities output before returning it.
|
||||
- Only choose packages that directly own the requested app behavior. Never choose helper packages such as `com.android.shell`, `com.android.systemui`, or the Codex Agent/Genie packages unless the user explicitly asked for them.
|
||||
- For intent resolution commands, include `--user 0`.
|
||||
- If the user objective already names a specific installed package, use it directly after verification.
|
||||
- `cmd package list packages PACKAGE_NAME` alone is not sufficient verification.
|
||||
- Prefer focused verification commands such as `cmd package dump PACKAGE | sed -n '1,120p'`, `cmd package query-activities --brief --user 0 -p PACKAGE -a android.intent.action.MAIN`, and `cmd package query-activities --brief --user 0 -p PACKAGE -a RELEVANT_ACTION`.
|
||||
- Do not enumerate every launcher activity on the device. Query specific candidate packages instead.
|
||||
""".trimIndent()
|
||||
private val PLANNER_OUTPUT_SCHEMA =
|
||||
JSONObject()
|
||||
.put("type", "object")
|
||||
.put(
|
||||
"properties",
|
||||
JSONObject()
|
||||
.put(
|
||||
"targets",
|
||||
JSONObject()
|
||||
.put("type", "array")
|
||||
.put("minItems", 1)
|
||||
.put(
|
||||
"items",
|
||||
JSONObject()
|
||||
.put("type", "object")
|
||||
.put(
|
||||
"properties",
|
||||
JSONObject()
|
||||
.put("packageName", JSONObject().put("type", "string"))
|
||||
.put("objective", JSONObject().put("type", "string")),
|
||||
)
|
||||
.put("required", JSONArray().put("packageName").put("objective"))
|
||||
.put("additionalProperties", false),
|
||||
),
|
||||
)
|
||||
.put("reason", JSONObject().put("type", "string"))
|
||||
.put("allowDetachedMode", JSONObject().put("type", "boolean")),
|
||||
)
|
||||
.put("required", JSONArray().put("targets").put("reason").put("allowDetachedMode"))
|
||||
.put("additionalProperties", false)
|
||||
|
||||
fun startSession(
|
||||
context: Context,
|
||||
@@ -62,42 +113,117 @@ object AgentTaskPlanner {
|
||||
allowDetachedMode = allowDetachedMode,
|
||||
)
|
||||
}
|
||||
var sessionStartResult: SessionStartResult? = null
|
||||
val frameworkToolBridge = AgentFrameworkToolBridge(context, sessionController)
|
||||
Log.i(TAG, "Planning Agent session for objective=${userObjective.take(160)}")
|
||||
AgentCodexAppServerClient.requestText(
|
||||
context = context,
|
||||
instructions = PLANNER_INSTRUCTIONS,
|
||||
prompt = buildPlannerPrompt(userObjective),
|
||||
dynamicTools = frameworkToolBridge.buildPlanningToolSpecs(),
|
||||
toolCallHandler = { toolName, arguments ->
|
||||
frameworkToolBridge.handleToolCall(
|
||||
toolName = toolName,
|
||||
arguments = arguments,
|
||||
userObjective = userObjective,
|
||||
onSessionStarted = { startedSession ->
|
||||
if (sessionStartResult != null) {
|
||||
throw IOException("Agent runtime attempted to start multiple Genie batches")
|
||||
}
|
||||
Log.i(
|
||||
TAG,
|
||||
"Framework tool started parent=${startedSession.parentSessionId} children=${startedSession.childSessionIds}",
|
||||
)
|
||||
sessionStartResult = startedSession
|
||||
},
|
||||
val isEligibleTargetPackage = { packageName: String ->
|
||||
runCatching { context.packageManager.getApplicationInfo(packageName, 0) }.isSuccess &&
|
||||
packageName !in setOf(
|
||||
"com.android.shell",
|
||||
"com.android.systemui",
|
||||
"com.openai.codexd",
|
||||
"com.openai.codex.genie",
|
||||
)
|
||||
},
|
||||
requestUserInputHandler = requestUserInputHandler,
|
||||
}
|
||||
var previousPlannerResponse: String? = null
|
||||
var plannerRequest: AgentFrameworkToolBridge.StartDirectSessionRequest? = null
|
||||
var lastPlannerError: IOException? = null
|
||||
for (attemptIndex in 0 until PLANNER_ATTEMPTS) {
|
||||
val plannerResponse = AgentCodexAppServerClient.requestText(
|
||||
context = context,
|
||||
instructions = PLANNER_INSTRUCTIONS,
|
||||
prompt = buildPlannerPrompt(
|
||||
userObjective = userObjective,
|
||||
previousPlannerResponse = previousPlannerResponse,
|
||||
previousPlannerError = lastPlannerError?.message,
|
||||
),
|
||||
outputSchema = PLANNER_OUTPUT_SCHEMA,
|
||||
requestUserInputHandler = requestUserInputHandler,
|
||||
)
|
||||
Log.i(TAG, "Planner response=${plannerResponse.take(400)}")
|
||||
previousPlannerResponse = plannerResponse
|
||||
val parsedRequest = runCatching {
|
||||
parsePlannerResponse(
|
||||
responseText = plannerResponse,
|
||||
userObjective = userObjective,
|
||||
isEligibleTargetPackage = isEligibleTargetPackage,
|
||||
)
|
||||
}.getOrElse { err ->
|
||||
if (err is IOException && attemptIndex < PLANNER_ATTEMPTS - 1) {
|
||||
Log.w(TAG, "Planner response rejected: ${err.message}")
|
||||
lastPlannerError = err
|
||||
continue
|
||||
}
|
||||
throw err
|
||||
}
|
||||
plannerRequest = parsedRequest
|
||||
break
|
||||
}
|
||||
val request = plannerRequest ?: throw (lastPlannerError
|
||||
?: IOException("Planner did not return a valid session plan"))
|
||||
val sessionStartResult = sessionController.startDirectSession(
|
||||
plan = request.plan,
|
||||
allowDetachedMode = allowDetachedMode && request.allowDetachedMode,
|
||||
)
|
||||
Log.i(TAG, "Planner sessionStartResult=$sessionStartResult")
|
||||
return sessionStartResult
|
||||
?: throw IOException("Agent runtime did not launch any Genie sessions")
|
||||
}
|
||||
|
||||
private fun buildPlannerPrompt(userObjective: String): String {
|
||||
return """
|
||||
User objective:
|
||||
$userObjective
|
||||
""".trimIndent()
|
||||
private fun buildPlannerPrompt(
|
||||
userObjective: String,
|
||||
previousPlannerResponse: String?,
|
||||
previousPlannerError: String?,
|
||||
): String {
|
||||
return buildString {
|
||||
appendLine("User objective:")
|
||||
appendLine(userObjective)
|
||||
if (!previousPlannerError.isNullOrBlank()) {
|
||||
appendLine()
|
||||
appendLine("Previous candidate plan was rejected by host validation:")
|
||||
appendLine(previousPlannerError)
|
||||
appendLine("Choose a different installed target package and verify it with focused package commands.")
|
||||
}
|
||||
if (!previousPlannerResponse.isNullOrBlank()) {
|
||||
appendLine()
|
||||
appendLine("Previous invalid planner response:")
|
||||
appendLine(previousPlannerResponse)
|
||||
}
|
||||
}.trim()
|
||||
}
|
||||
|
||||
internal fun parsePlannerResponse(
|
||||
responseText: String,
|
||||
userObjective: String,
|
||||
isEligibleTargetPackage: (String) -> Boolean,
|
||||
): AgentFrameworkToolBridge.StartDirectSessionRequest {
|
||||
val plannerJson = extractPlannerJson(responseText)
|
||||
return AgentFrameworkToolBridge.parseStartDirectSessionArguments(
|
||||
arguments = plannerJson,
|
||||
userObjective = userObjective,
|
||||
isEligibleTargetPackage = isEligibleTargetPackage,
|
||||
)
|
||||
}
|
||||
|
||||
private fun extractPlannerJson(responseText: String): JSONObject {
|
||||
val trimmed = responseText.trim()
|
||||
parseJsonObject(trimmed)?.let { return it }
|
||||
val unfenced = trimmed
|
||||
.removePrefix("```json")
|
||||
.removePrefix("```")
|
||||
.removeSuffix("```")
|
||||
.trim()
|
||||
parseJsonObject(unfenced)?.let { return it }
|
||||
val firstBrace = trimmed.indexOf('{')
|
||||
val lastBrace = trimmed.lastIndexOf('}')
|
||||
if (firstBrace >= 0 && lastBrace > firstBrace) {
|
||||
parseJsonObject(trimmed.substring(firstBrace, lastBrace + 1))?.let { return it }
|
||||
}
|
||||
throw IOException("Planner did not return a valid JSON object")
|
||||
}
|
||||
|
||||
private fun parseJsonObject(text: String): JSONObject? {
|
||||
return runCatching {
|
||||
val tokener = JSONTokener(text)
|
||||
val value = tokener.nextValue()
|
||||
value as? JSONObject
|
||||
}.getOrNull()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package com.openai.codexd
|
||||
import android.app.Service
|
||||
import android.content.Intent
|
||||
import android.os.IBinder
|
||||
import android.os.ParcelFileDescriptor
|
||||
import android.util.Log
|
||||
import com.openai.codex.bridge.BridgeHttpResponse
|
||||
import com.openai.codex.bridge.BridgeRuntimeStatus
|
||||
@@ -45,6 +46,19 @@ class CodexAgentBridgeService : Service() {
|
||||
Log.i(TAG, "Proxied /v1/responses")
|
||||
return BridgeHttpResponse(response.statusCode, response.body)
|
||||
}
|
||||
|
||||
override fun openResponsesStream(requestBody: String?): ParcelFileDescriptor {
|
||||
val stream = runCatching {
|
||||
AgentResponsesProxy.openResponsesStream(
|
||||
this@CodexAgentBridgeService,
|
||||
requestBody.orEmpty(),
|
||||
)
|
||||
}.getOrElse { err ->
|
||||
throw err.asBinderError("openResponsesStream")
|
||||
}
|
||||
Log.i(TAG, "Opened /v1/responses stream")
|
||||
return stream
|
||||
}
|
||||
}
|
||||
|
||||
override fun onBind(intent: Intent?): IBinder {
|
||||
|
||||
@@ -20,6 +20,9 @@ class CodexAgentService : AgentService() {
|
||||
private const val AUTO_ANSWER_INSTRUCTIONS =
|
||||
"You are Codex acting as the Android Agent supervising a Genie execution. If you can answer the current Genie question from the available session context, call the framework session tool `android.framework.sessions.answer_question` exactly once with a short free-form answer. You may inspect current framework state with `android.framework.sessions.list`. If user input is required, do not call any framework tool. Instead reply with `ESCALATE: ` followed by the exact question the Agent should ask the user."
|
||||
private const val MAX_AUTO_ANSWER_CONTEXT_CHARS = 800
|
||||
private val handledGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
|
||||
private val pendingGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
|
||||
private val handledBridgeRequests = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
|
||||
}
|
||||
|
||||
private sealed class AutoAnswerResult {
|
||||
@@ -30,11 +33,14 @@ class CodexAgentService : AgentService() {
|
||||
) : AutoAnswerResult()
|
||||
}
|
||||
|
||||
private val handledGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
|
||||
private val pendingGenieQuestions = java.util.concurrent.ConcurrentHashMap.newKeySet<String>()
|
||||
private val agentManager by lazy { getSystemService(AgentManager::class.java) }
|
||||
private val sessionController by lazy { AgentSessionController(this) }
|
||||
|
||||
override fun onCreate() {
|
||||
super.onCreate()
|
||||
AgentSocketBridgeServer.ensureStarted(this)
|
||||
}
|
||||
|
||||
override fun onSessionChanged(session: AgentSessionInfo) {
|
||||
Log.i(TAG, "onSessionChanged $session")
|
||||
maybeAutoAnswerGenieQuestion(session)
|
||||
@@ -45,6 +51,7 @@ class CodexAgentService : AgentService() {
|
||||
Log.i(TAG, "onSessionRemoved sessionId=$sessionId")
|
||||
AgentQuestionNotifier.cancel(this, sessionId)
|
||||
handledGenieQuestions.removeIf { it.startsWith("$sessionId:") }
|
||||
handledBridgeRequests.removeIf { it.startsWith("$sessionId:") }
|
||||
pendingGenieQuestions.removeIf { it.startsWith("$sessionId:") }
|
||||
}
|
||||
|
||||
@@ -227,6 +234,20 @@ class CodexAgentService : AgentService() {
|
||||
) {
|
||||
val request = JSONObject(question.removePrefix(BRIDGE_REQUEST_PREFIX))
|
||||
val requestId = request.optString("requestId")
|
||||
if (requestId.isNotBlank()) {
|
||||
val bridgeRequestKey = "${session.sessionId}:$requestId"
|
||||
if (!handledBridgeRequests.add(bridgeRequestKey)) {
|
||||
Log.i(
|
||||
TAG,
|
||||
"Skipping duplicate bridge question method=${request.optString("method")} requestId=$requestId session=${session.sessionId}",
|
||||
)
|
||||
return
|
||||
}
|
||||
}
|
||||
Log.i(
|
||||
TAG,
|
||||
"Answering bridge question method=${request.optString("method")} requestId=$requestId session=${session.sessionId}",
|
||||
)
|
||||
val response: JSONObject = runCatching {
|
||||
when (request.optString("method")) {
|
||||
BRIDGE_METHOD_GET_RUNTIME_STATUS -> {
|
||||
@@ -292,6 +313,14 @@ class CodexAgentService : AgentService() {
|
||||
}
|
||||
|
||||
private fun genieQuestionKey(sessionId: String, question: String): String {
|
||||
if (isBridgeQuestion(question)) {
|
||||
val requestId = runCatching {
|
||||
JSONObject(question.removePrefix(BRIDGE_REQUEST_PREFIX)).optString("requestId").trim()
|
||||
}.getOrNull()
|
||||
if (!requestId.isNullOrEmpty()) {
|
||||
return "$sessionId:bridge:$requestId"
|
||||
}
|
||||
}
|
||||
return "$sessionId:$question"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,8 @@ class MainActivity : Activity() {
|
||||
private const val STATUS_REFRESH_INTERVAL_MS = 2000L
|
||||
private const val ACTION_DEBUG_START_AGENT_SESSION =
|
||||
"com.openai.codexd.action.DEBUG_START_AGENT_SESSION"
|
||||
private const val ACTION_DEBUG_CANCEL_ALL_AGENT_SESSIONS =
|
||||
"com.openai.codexd.action.DEBUG_CANCEL_ALL_AGENT_SESSIONS"
|
||||
private const val EXTRA_DEBUG_PROMPT = "prompt"
|
||||
private const val EXTRA_DEBUG_TARGET_PACKAGE = "targetPackage"
|
||||
}
|
||||
@@ -86,6 +88,7 @@ class MainActivity : Activity() {
|
||||
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
AgentSocketBridgeServer.ensureStarted(this)
|
||||
setContentView(R.layout.activity_main)
|
||||
updatePaths()
|
||||
handleSessionIntent(intent)
|
||||
@@ -131,6 +134,25 @@ class MainActivity : Activity() {
|
||||
}
|
||||
|
||||
private fun maybeStartSessionFromIntent(intent: Intent?) {
|
||||
if (intent?.action == ACTION_DEBUG_CANCEL_ALL_AGENT_SESSIONS) {
|
||||
Log.i(TAG, "Handling debug cancel-all Agent sessions intent")
|
||||
thread {
|
||||
val result = runCatching { agentSessionController.cancelActiveSessions() }
|
||||
result.onFailure { err ->
|
||||
Log.w(TAG, "Failed to cancel Agent sessions from debug intent", err)
|
||||
showToast("Failed to cancel active sessions: ${err.message}")
|
||||
}
|
||||
result.onSuccess { cancelResult ->
|
||||
focusedFrameworkSessionId = null
|
||||
val cancelledCount = cancelResult.cancelledSessionIds.size
|
||||
val failedCount = cancelResult.failedSessionIds.size
|
||||
showToast("Cancelled $cancelledCount sessions, $failedCount failed")
|
||||
refreshAgentSessions(force = true)
|
||||
}
|
||||
}
|
||||
intent.action = null
|
||||
return
|
||||
}
|
||||
if (intent?.action != ACTION_DEBUG_START_AGENT_SESSION) {
|
||||
return
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user