Bridge Android framework session APIs

Co-authored-by: Codex <noreply@openai.com>
This commit is contained in:
Iliyan Malchev
2026-03-19 14:15:37 -07:00
parent 186368fe88
commit be817e3421
6 changed files with 400 additions and 363 deletions

View File

@@ -0,0 +1,287 @@
package com.openai.codexd
import android.content.Context
import java.io.IOException
import org.json.JSONArray
import org.json.JSONObject
class AgentFrameworkToolBridge(
private val context: Context,
private val sessionController: AgentSessionController,
) {
companion object {
const val START_DIRECT_SESSION_TOOL = "android.framework.sessions.start_direct"
const val LIST_SESSIONS_TOOL = "android.framework.sessions.list"
const val ANSWER_QUESTION_TOOL = "android.framework.sessions.answer_question"
const val ATTACH_TARGET_TOOL = "android.framework.sessions.attach_target"
const val CANCEL_SESSION_TOOL = "android.framework.sessions.cancel"
internal fun parseStartDirectSessionArguments(
arguments: JSONObject,
userObjective: String,
isLaunchablePackage: (String) -> Boolean,
): StartDirectSessionRequest {
val targetsJson = arguments.optJSONArray("targets")
?: throw IOException("Framework session tool arguments missing targets")
val targets = buildList {
for (index in 0 until targetsJson.length()) {
val target = targetsJson.optJSONObject(index) ?: continue
val packageName = target.optString("packageName").trim()
if (packageName.isEmpty() || !isLaunchablePackage(packageName)) {
continue
}
val objective = target.optString("objective").trim().ifEmpty { userObjective }
add(
AgentDelegationTarget(
packageName = packageName,
objective = objective,
),
)
}
}.distinctBy(AgentDelegationTarget::packageName)
if (targets.isEmpty()) {
throw IOException("Framework session tool did not select a launchable package")
}
return StartDirectSessionRequest(
plan = AgentDelegationPlan(
originalObjective = userObjective,
targets = targets,
rationale = arguments.optString("reason").trim().ifEmpty { null },
usedOverride = false,
),
allowDetachedMode = arguments.optBoolean("allowDetachedMode", true),
)
}
}
data class StartDirectSessionRequest(
val plan: AgentDelegationPlan,
val allowDetachedMode: Boolean,
)
fun buildPlanningToolSpecs(): JSONArray {
return JSONArray().put(buildStartDirectSessionToolSpec())
}
fun buildSessionManagementToolSpecs(): JSONArray {
return JSONArray()
.put(buildListSessionsToolSpec())
.put(buildAnswerQuestionToolSpec())
.put(buildAttachTargetToolSpec())
.put(buildCancelSessionToolSpec())
}
fun handleToolCall(
toolName: String,
arguments: JSONObject,
userObjective: String,
onSessionStarted: ((SessionStartResult) -> Unit)? = null,
focusedSessionId: String? = null,
): JSONObject {
return when (toolName) {
START_DIRECT_SESSION_TOOL -> {
val request = parseStartDirectSessionArguments(
arguments = arguments,
userObjective = userObjective,
isLaunchablePackage = ::isLaunchablePackage,
)
val startedSession = sessionController.startDirectSession(
plan = request.plan,
allowDetachedMode = request.allowDetachedMode,
)
onSessionStarted?.invoke(startedSession)
successText(
JSONObject()
.put("parentSessionId", startedSession.parentSessionId)
.put("childSessionIds", JSONArray(startedSession.childSessionIds))
.put("plannedTargets", JSONArray(startedSession.plannedTargets))
.put("geniePackage", startedSession.geniePackage)
.toString(),
)
}
LIST_SESSIONS_TOOL -> {
val snapshot = sessionController.loadSnapshot(focusedSessionId)
successText(renderSessionSnapshot(snapshot).toString())
}
ANSWER_QUESTION_TOOL -> {
val sessionId = requireString(arguments, "sessionId")
val answer = requireString(arguments, "answer")
val parentSessionId = arguments.optString("parentSessionId").trim().ifEmpty { null }
sessionController.answerQuestion(sessionId, answer, parentSessionId)
successText("Answered framework session $sessionId.")
}
ATTACH_TARGET_TOOL -> {
val sessionId = requireString(arguments, "sessionId")
sessionController.attachTarget(sessionId)
successText("Requested target attach for framework session $sessionId.")
}
CANCEL_SESSION_TOOL -> {
val sessionId = requireString(arguments, "sessionId")
sessionController.cancelSession(sessionId)
successText("Cancelled framework session $sessionId.")
}
else -> throw IOException("Unsupported framework session tool: $toolName")
}
}
private fun buildStartDirectSessionToolSpec(): JSONObject {
return JSONObject()
.put("name", START_DIRECT_SESSION_TOOL)
.put(
"description",
"Start direct parent and child framework sessions for one or more target Android packages.",
)
.put(
"inputSchema",
JSONObject()
.put("type", "object")
.put(
"properties",
JSONObject()
.put(
"targets",
JSONObject()
.put("type", "array")
.put(
"items",
JSONObject()
.put("type", "object")
.put(
"properties",
JSONObject()
.put("packageName", stringSchema("Installed target Android package name."))
.put("objective", stringSchema("Delegated free-form objective for the child Genie.")),
)
.put("required", JSONArray().put("packageName"))
.put("additionalProperties", false),
),
)
.put("reason", stringSchema("Short explanation for why these target packages were selected."))
.put(
"allowDetachedMode",
JSONObject()
.put("type", "boolean")
.put("description", "Whether Genie child sessions may use detached target mode."),
),
)
.put("required", JSONArray().put("targets"))
.put("additionalProperties", false),
)
}
private fun buildListSessionsToolSpec(): JSONObject {
return JSONObject()
.put("name", LIST_SESSIONS_TOOL)
.put("description", "List the current Android framework sessions visible to the Agent.")
.put(
"inputSchema",
JSONObject()
.put("type", "object")
.put("properties", JSONObject())
.put("additionalProperties", false),
)
}
private fun buildAnswerQuestionToolSpec(): JSONObject {
return JSONObject()
.put("name", ANSWER_QUESTION_TOOL)
.put("description", "Answer a waiting Android framework session question.")
.put(
"inputSchema",
JSONObject()
.put("type", "object")
.put(
"properties",
JSONObject()
.put("sessionId", stringSchema("Framework session id to answer."))
.put("answer", stringSchema("Free-form answer text."))
.put("parentSessionId", stringSchema("Optional parent framework session id for trace publication.")),
)
.put("required", JSONArray().put("sessionId").put("answer"))
.put("additionalProperties", false),
)
}
private fun buildAttachTargetToolSpec(): JSONObject {
return JSONObject()
.put("name", ATTACH_TARGET_TOOL)
.put("description", "Request the framework to attach the detached target back to the current display.")
.put(
"inputSchema",
JSONObject()
.put("type", "object")
.put(
"properties",
JSONObject().put("sessionId", stringSchema("Framework session id whose target should be attached.")),
)
.put("required", JSONArray().put("sessionId"))
.put("additionalProperties", false),
)
}
private fun buildCancelSessionToolSpec(): JSONObject {
return JSONObject()
.put("name", CANCEL_SESSION_TOOL)
.put("description", "Cancel an Android framework session.")
.put(
"inputSchema",
JSONObject()
.put("type", "object")
.put(
"properties",
JSONObject().put("sessionId", stringSchema("Framework session id to cancel.")),
)
.put("required", JSONArray().put("sessionId"))
.put("additionalProperties", false),
)
}
private fun renderSessionSnapshot(snapshot: AgentSnapshot): JSONObject {
val sessions = JSONArray()
snapshot.sessions.forEach { session ->
sessions.put(
JSONObject()
.put("sessionId", session.sessionId)
.put("parentSessionId", session.parentSessionId)
.put("targetPackage", session.targetPackage)
.put("state", session.stateLabel)
.put("targetDetached", session.targetDetached),
)
}
return JSONObject()
.put("available", snapshot.available)
.put("selectedGeniePackage", snapshot.selectedGeniePackage)
.put("selectedSessionId", snapshot.selectedSession?.sessionId)
.put("parentSessionId", snapshot.parentSession?.sessionId)
.put("sessions", sessions)
}
private fun isLaunchablePackage(packageName: String): Boolean {
return context.packageManager.getLaunchIntentForPackage(packageName) != null
}
private fun requireString(arguments: JSONObject, fieldName: String): String {
return arguments.optString(fieldName).trim().ifEmpty {
throw IOException("Framework session tool requires non-empty $fieldName")
}
}
private fun successText(text: String): JSONObject {
return JSONObject()
.put("success", true)
.put(
"contentItems",
JSONArray().put(
JSONObject()
.put("type", "inputText")
.put("text", text),
),
)
}
private fun stringSchema(description: String): JSONObject {
return JSONObject()
.put("type", "string")
.put("description", description)
}
}

View File

@@ -1,28 +0,0 @@
package com.openai.codexd
import android.content.Context
import android.content.Intent
data class InstalledLaunchableApp(
val packageName: String,
val label: String,
)
object AgentInstalledAppCatalog {
fun listLaunchableApps(context: Context): List<InstalledLaunchableApp> {
val packageManager = context.packageManager
return packageManager.queryIntentActivities(
Intent(Intent.ACTION_MAIN).addCategory(Intent.CATEGORY_LAUNCHER),
0,
)
.map { resolveInfo ->
val packageName = resolveInfo.activityInfo.packageName
InstalledLaunchableApp(
packageName = packageName,
label = resolveInfo.loadLabel(packageManager)?.toString().orEmpty().ifBlank { packageName },
)
}
.distinctBy(InstalledLaunchableApp::packageName)
.sortedWith(compareBy(InstalledLaunchableApp::label, InstalledLaunchableApp::packageName))
}
}

View File

@@ -2,8 +2,6 @@ package com.openai.codexd
import android.content.Context
import java.io.IOException
import org.json.JSONArray
import org.json.JSONObject
data class AgentDelegationTarget(
val packageName: String,
@@ -21,20 +19,17 @@ data class AgentDelegationPlan(
}
object AgentTaskPlanner {
private const val MAX_LAUNCHABLE_APPS = 80
private const val LIST_LAUNCHABLE_APPS_TOOL = "android.apps.list_launchable"
private const val START_GENIE_SESSIONS_TOOL = "android.agent.start_genie_sessions"
private val PLANNER_INSTRUCTIONS =
"""
You are Codex acting as the Android Agent orchestrator.
The user interacts only with the Agent. Decide which installed Android packages should receive delegated Genie sessions.
Use the available Android app-list tool before selecting targets.
Choose the fewest packages needed to complete the request and then call the Genie-session launch tool exactly once.
Use the standard Android shell tools already available in this runtime, such as `cmd package`, `pm`, and `am`, to inspect installed packages and resolve the correct targets.
After deciding on the target packages, call the framework session tool `${AgentFrameworkToolBridge.START_DIRECT_SESSION_TOOL}` exactly once.
Rules:
- Use only package names returned by the Android app-list tool.
- The launch tool `targets` must be non-empty.
- Choose the fewest packages needed to complete the request.
- The framework session tool `targets` must be non-empty.
- Each delegated `objective` should be written for the child Genie, not the user.
- After the launch tool succeeds, reply with a short summary for the Agent UI.
- After the framework session tool succeeds, reply with a short summary for the Agent UI.
""".trimIndent()
fun startSession(
@@ -60,25 +55,18 @@ object AgentTaskPlanner {
allowDetachedMode = allowDetachedMode,
)
}
val launchableApps = AgentInstalledAppCatalog.listLaunchableApps(context)
.take(MAX_LAUNCHABLE_APPS)
if (launchableApps.isEmpty()) {
throw IOException("No launchable apps available for planning")
}
var sessionStartResult: SessionStartResult? = null
val frameworkToolBridge = AgentFrameworkToolBridge(context, sessionController)
AgentCodexAppServerClient.requestText(
context = context,
instructions = PLANNER_INSTRUCTIONS,
prompt = buildPlannerPrompt(userObjective),
dynamicTools = buildDynamicToolSpecs(),
dynamicTools = frameworkToolBridge.buildPlanningToolSpecs(),
toolCallHandler = { toolName, arguments ->
handleToolCall(
frameworkToolBridge.handleToolCall(
toolName = toolName,
arguments = arguments,
launchableApps = launchableApps,
userObjective = userObjective,
allowDetachedMode = allowDetachedMode,
sessionController = sessionController,
onSessionStarted = { startedSession ->
if (sessionStartResult != null) {
throw IOException("Agent runtime attempted to start multiple Genie batches")
@@ -92,208 +80,10 @@ object AgentTaskPlanner {
?: throw IOException("Agent runtime did not launch any Genie sessions")
}
internal fun parsePlanResponse(
responseText: String,
userObjective: String,
allowedPackageNames: Set<String>,
): AgentDelegationPlan {
val responseJson = extractJsonObject(responseText)
val targetsJson = responseJson.optJSONArray("targets")
?: throw IOException("Planner response missing targets")
val targets = parseTargets(
targetsJson = targetsJson,
userObjective = userObjective,
allowedPackageNames = allowedPackageNames,
)
return AgentDelegationPlan(
originalObjective = userObjective,
targets = targets,
rationale = responseJson.optString("reason").ifBlank { null },
usedOverride = false,
)
}
private fun buildPlannerPrompt(userObjective: String): String {
return """
User objective:
$userObjective
""".trimIndent()
}
private fun buildDynamicToolSpecs(): JSONArray {
val launchableAppsTool = JSONObject()
.put("name", LIST_LAUNCHABLE_APPS_TOOL)
.put(
"description",
"List the launchable Android packages currently installed on this device.",
)
.put(
"inputSchema",
JSONObject()
.put("type", "object")
.put("properties", JSONObject())
.put("additionalProperties", false),
)
val startGenieSessionsTool = JSONObject()
.put("name", START_GENIE_SESSIONS_TOOL)
.put(
"description",
"Start the child Genie sessions needed for the user objective.",
)
.put(
"inputSchema",
JSONObject()
.put("type", "object")
.put(
"properties",
JSONObject()
.put(
"targets",
JSONObject()
.put("type", "array")
.put(
"items",
JSONObject()
.put("type", "object")
.put(
"properties",
JSONObject()
.put("packageName", stringSchema("Installed Android package name."))
.put("objective", stringSchema("Delegated free-form objective for the child Genie.")),
)
.put("required", JSONArray().put("packageName"))
.put("additionalProperties", false),
),
)
.put("reason", stringSchema("Short explanation for why these targets were selected.")),
)
.put("required", JSONArray().put("targets"))
.put("additionalProperties", false),
)
return JSONArray()
.put(launchableAppsTool)
.put(startGenieSessionsTool)
}
internal fun parseLaunchToolArguments(
arguments: JSONObject,
userObjective: String,
allowedPackageNames: Set<String>,
): AgentDelegationPlan {
val targetsJson = arguments.optJSONArray("targets")
?: throw IOException("Launch tool arguments missing targets")
val targets = parseTargets(
targetsJson = targetsJson,
userObjective = userObjective,
allowedPackageNames = allowedPackageNames,
)
return AgentDelegationPlan(
originalObjective = userObjective,
targets = targets,
rationale = arguments.optString("reason").ifBlank { null },
usedOverride = false,
)
}
private fun handleToolCall(
toolName: String,
arguments: JSONObject,
launchableApps: List<InstalledLaunchableApp>,
userObjective: String,
allowDetachedMode: Boolean,
sessionController: AgentSessionController,
onSessionStarted: (SessionStartResult) -> Unit,
): JSONObject {
return when (toolName) {
LIST_LAUNCHABLE_APPS_TOOL -> {
val appList = launchableApps.joinToString(separator = "\n") { app ->
"- ${app.label} (${app.packageName})"
}
JSONObject()
.put("success", true)
.put(
"contentItems",
JSONArray().put(
JSONObject()
.put("type", "inputText")
.put("text", "Launchable Android apps:\n$appList"),
),
)
}
START_GENIE_SESSIONS_TOOL -> {
val allowedPackageNames = launchableApps
.mapTo(linkedSetOf(), InstalledLaunchableApp::packageName)
val plan = parseLaunchToolArguments(
arguments = arguments,
userObjective = userObjective,
allowedPackageNames = allowedPackageNames,
)
val startedSession = sessionController.startDirectSession(
plan = plan,
allowDetachedMode = allowDetachedMode,
)
onSessionStarted(startedSession)
JSONObject()
.put("success", true)
.put(
"contentItems",
JSONArray().put(
JSONObject()
.put("type", "inputText")
.put(
"text",
"Started parent session ${startedSession.parentSessionId} for ${startedSession.plannedTargets.joinToString(", ")} using ${startedSession.geniePackage}.",
),
),
)
}
else -> throw IOException("Unsupported Agent planning tool: $toolName")
}
}
private fun parseTargets(
targetsJson: JSONArray,
userObjective: String,
allowedPackageNames: Set<String>,
): List<AgentDelegationTarget> {
val targets = buildList {
for (index in 0 until targetsJson.length()) {
val target = targetsJson.optJSONObject(index) ?: continue
val packageName = target.optString("packageName").trim()
if (packageName.isEmpty() || !allowedPackageNames.contains(packageName)) {
continue
}
val objective = target.optString("objective").trim().ifEmpty { userObjective }
add(
AgentDelegationTarget(
packageName = packageName,
objective = objective,
),
)
}
}.distinctBy(AgentDelegationTarget::packageName)
if (targets.isEmpty()) {
throw IOException("Planner response did not select an installed package")
}
return targets
}
private fun stringSchema(description: String): JSONObject {
return JSONObject()
.put("type", "string")
.put("description", description)
}
private fun extractJsonObject(responseText: String): JSONObject {
val start = responseText.indexOf('{')
val end = responseText.lastIndexOf('}')
if (start == -1 || end == -1 || end <= start) {
throw IOException("Planner response did not contain JSON")
}
return try {
JSONObject(responseText.substring(start, end + 1))
} catch (err: Exception) {
throw IOException("Planner response was not valid JSON: ${err.message}", err)
}
}
}