feat(core): update Copilot for token-based billing (#30181)

2026-06-01 22:10:06 +02:00 · 2026-06-01 14:55:23 -05:00
parent fa23fb5d38
commit ae92f3158f
10 changed files with 340 additions and 77 deletions
@@ -10,6 +10,8 @@ import { MessageV2 } from "@/session/message-v2"
 const log = Log.create({ service: "plugin.copilot" })

 const CLIENT_ID = "Ov23li8tweQw6odWQebz"
+const API_VERSION = "2026-06-01"
+const UTILITY_MODELS = ["gpt-5.4-nano", "gpt-4.1", "gpt-4o", "gpt-4o-mini"]
 // Add a small safety buffer when polling to avoid hitting the server
 // slightly too early due to clock skew / timer drift.
 const OAUTH_POLLING_SAFETY_MARGIN_MS = 3000 // 3 seconds
@@ -56,11 +58,13 @@ function fix(model: Model, url: string): Model {

 export async function CopilotAuthPlugin(input: PluginInput): Promise<Hooks> {
  const sdk = input.client
+  let models: Record<string, Model> = {}
  return {
    provider: {
      id: "github-copilot",
      async models(provider, ctx) {
        if (ctx.auth?.type !== "oauth") {
+          models = {}
          return Object.fromEntries(Object.entries(provider.models).map(([id, model]) => [id, fix(model, base())]))
        }

@@ -71,14 +75,23 @@ export async function CopilotAuthPlugin(input: PluginInput): Promise<Hooks> {
          {
            Authorization: `Bearer ${auth.refresh}`,
            "User-Agent": `opencode/${InstallationVersion}`,
+            "X-GitHub-Api-Version": API_VERSION,
          },
          provider.models,
-        ).catch((error) => {
-          log.error("failed to fetch copilot models", { error })
-          return Object.fromEntries(
-            Object.entries(provider.models).map(([id, model]) => [id, fix(model, base(auth.enterpriseUrl))]),
-          )
-        })
+        )
+          .then((result) => {
+            models = result.models
+            return Object.fromEntries(
+              Object.entries(result.models).filter(([, model]) => result.pickerEnabled.has(model.api.id)),
+            )
+          })
+          .catch((error) => {
+            models = {}
+            log.error("failed to fetch copilot models", { error })
+            return Object.fromEntries(
+              Object.entries(provider.models).map(([id, model]) => [id, fix(model, base(auth.enterpriseUrl))]),
+            )
+          })
      },
    },
    auth: {
@@ -342,9 +355,19 @@ export async function CopilotAuthPlugin(input: PluginInput): Promise<Hooks> {
        output.options.toolStreaming = false
      }
    },
+    "experimental.provider.small_model": async (incoming, output) => {
+      if (incoming.provider.id !== "github-copilot") return
+      // GitHub exposes utility models for title generation without including them in the picker.
+      output.model = UTILITY_MODELS.map((id) => models[id]).find((model) => model !== undefined)
+    },
    "chat.headers": async (incoming, output) => {
      if (!incoming.model.providerID.includes("github-copilot")) return

+      output.headers["X-GitHub-Api-Version"] = API_VERSION
+      if (incoming.agent === "title") {
+        output.headers["X-Interaction-Type"] = "agent-session-name-generation"
+      }
+
      if (incoming.model.api.npm === "@ai-sdk/anthropic") {
        output.headers["anthropic-beta"] = "interleaved-thinking-2025-05-14"
      }
@@ -1,53 +1,81 @@
 import type { Model } from "@opencode-ai/sdk/v2"
-import { Schema } from "effect"
+import { Option, Schema } from "effect"

-export const schema = Schema.Struct({
-  data: Schema.Array(
+const item = Schema.Struct({
+  model_picker_enabled: Schema.Boolean,
+  id: Schema.String,
+  name: Schema.String,
+  // every version looks like: `{model.id}-YYYY-MM-DD`
+  version: Schema.String,
+  supported_endpoints: Schema.optional(Schema.Array(Schema.String)),
+  policy: Schema.optional(
    Schema.Struct({
-      model_picker_enabled: Schema.Boolean,
-      id: Schema.String,
-      name: Schema.String,
-      // every version looks like: `{model.id}-YYYY-MM-DD`
-      version: Schema.String,
-      supported_endpoints: Schema.optional(Schema.Array(Schema.String)),
-      policy: Schema.optional(
-        Schema.Struct({
-          state: Schema.optional(Schema.String),
-        }),
-      ),
-      capabilities: Schema.Struct({
-        family: Schema.String,
-        limits: Schema.Struct({
-          max_context_window_tokens: Schema.Number,
-          max_output_tokens: Schema.Number,
-          max_prompt_tokens: Schema.Number,
-          vision: Schema.optional(
-            Schema.Struct({
-              max_prompt_image_size: Schema.Number,
-              max_prompt_images: Schema.Number,
-              supported_media_types: Schema.Array(Schema.String),
-            }),
-          ),
-        }),
-        supports: Schema.Struct({
-          adaptive_thinking: Schema.optional(Schema.Boolean),
-          max_thinking_budget: Schema.optional(Schema.Number),
-          min_thinking_budget: Schema.optional(Schema.Number),
-          reasoning_effort: Schema.optional(Schema.Array(Schema.String)),
-          streaming: Schema.Boolean,
-          structured_outputs: Schema.optional(Schema.Boolean),
-          tool_calls: Schema.Boolean,
-          vision: Schema.optional(Schema.Boolean),
-        }),
-      }),
+      state: Schema.optional(Schema.String),
    }),
  ),
+  billing: Schema.optional(
+    Schema.Struct({
+      token_prices: Schema.optional(
+        Schema.Struct({
+          batch_size: Schema.Number,
+          default: Schema.Struct({
+            cache_price: Schema.Number,
+            input_price: Schema.Number,
+            output_price: Schema.Number,
+          }),
+        }),
+      ),
+    }),
+  ),
+  capabilities: Schema.Struct({
+    family: Schema.String,
+    limits: Schema.optional(
+      Schema.Struct({
+        max_context_window_tokens: Schema.optional(Schema.Number),
+        max_output_tokens: Schema.optional(Schema.Number),
+        max_prompt_tokens: Schema.optional(Schema.Number),
+        vision: Schema.optional(
+          Schema.Struct({
+            max_prompt_image_size: Schema.Number,
+            max_prompt_images: Schema.Number,
+            supported_media_types: Schema.Array(Schema.String),
+          }),
+        ),
+      }),
+    ),
+    supports: Schema.Struct({
+      adaptive_thinking: Schema.optional(Schema.Boolean),
+      max_thinking_budget: Schema.optional(Schema.Number),
+      min_thinking_budget: Schema.optional(Schema.Number),
+      reasoning_effort: Schema.optional(Schema.Array(Schema.String)),
+      streaming: Schema.optional(Schema.Boolean),
+      structured_outputs: Schema.optional(Schema.Boolean),
+      tool_calls: Schema.optional(Schema.Boolean),
+      vision: Schema.optional(Schema.Boolean),
+    }),
+  }),
 })

-type Item = Schema.Schema.Type<typeof schema>["data"][number]
-const decodeModels = Schema.decodeUnknownSync(schema)
+export const schema = Schema.Struct({
+  data: Schema.Array(Schema.Unknown),
+})

-function build(key: string, remote: Item, url: string, prev?: Model): Model {
+type Item = Schema.Schema.Type<typeof item>
+type SelectableItem = Item & {
+  capabilities: Item["capabilities"] & {
+    limits: NonNullable<Item["capabilities"]["limits"]> & {
+      max_output_tokens: number
+      max_prompt_tokens: number
+    }
+    supports: Item["capabilities"]["supports"] & {
+      tool_calls: boolean
+    }
+  }
+}
+const decodeModels = Schema.decodeUnknownSync(schema)
+const decodeItem = Schema.decodeUnknownOption(item)
+
+function build(key: string, remote: SelectableItem, url: string, prev?: Model): Model {
  const reasoning =
    !!remote.capabilities.supports.adaptive_thinking ||
    !!remote.capabilities.supports.reasoning_effort?.length ||
@@ -58,6 +86,9 @@ function build(key: string, remote: Item, url: string, prev?: Model): Model {
    (remote.capabilities.limits.vision?.supported_media_types ?? []).some((item) => item.startsWith("image/"))

  const isMsgApi = remote.supported_endpoints?.includes("/v1/messages")
+  const prices = remote.billing?.token_prices
+  // Copilot prices are AIC per billing batch; OpenCode stores USD per million tokens.
+  const usdPerMillion = prices ? 10_000 / prices.batch_size : 0

  const model: Model = {
    id: key,
@@ -70,7 +101,7 @@ function build(key: string, remote: Item, url: string, prev?: Model): Model {
    // API response wins
    status: "active",
    limit: {
-      context: remote.capabilities.limits.max_context_window_tokens,
+      context: remote.capabilities.limits.max_context_window_tokens ?? remote.capabilities.limits.max_prompt_tokens,
      input: remote.capabilities.limits.max_prompt_tokens,
      output: remote.capabilities.limits.max_output_tokens,
    },
@@ -99,9 +130,13 @@ function build(key: string, remote: Item, url: string, prev?: Model): Model {
    family: prev?.family ?? remote.capabilities.family,
    name: prev?.name ?? remote.name,
    cost: {
-      input: 0,
-      output: 0,
-      cache: { read: 0, write: 0 },
+      input: (prices?.default.input_price ?? 0) * usdPerMillion,
+      output: (prices?.default.output_price ?? 0) * usdPerMillion,
+      cache: {
+        read: (prices?.default.cache_price ?? 0) * usdPerMillion,
+        // `/models` exposes cached-input reads only; per-request billing accounts for cache writes.
+        write: 0,
+      },
    },
    options: prev?.options ?? {},
    headers: prev?.headers ?? {},
@@ -154,11 +189,20 @@ function build(key: string, remote: Item, url: string, prev?: Model): Model {
  return model
 }

+function usable(item: Item): item is SelectableItem {
+  return (
+    item.policy?.state !== "disabled" &&
+    item.capabilities.limits?.max_output_tokens !== undefined &&
+    item.capabilities.limits.max_prompt_tokens !== undefined &&
+    item.capabilities.supports.tool_calls !== undefined
+  )
+}
+
 export async function get(
  baseURL: string,
  headers: HeadersInit = {},
  existing: Record<string, Model> = {},
-): Promise<Record<string, Model>> {
+): Promise<{ models: Record<string, Model>; pickerEnabled: Set<string> }> {
  const data = await fetch(`${baseURL}/models`, {
    headers,
    signal: AbortSignal.timeout(5_000),
@@ -171,7 +215,10 @@ export async function get(

  const result = { ...existing }
  const remote = new Map(
-    data.data.filter((m) => m.model_picker_enabled && m.policy?.state !== "disabled").map((m) => [m.id, m] as const),
+    data.data.flatMap((raw) => {
+      const item = Option.getOrUndefined(decodeItem(raw))
+      return item && usable(item) ? ([[item.id, item]] as const) : []
+    }),
  )

  // prune existing models whose api.id isn't in the endpoint response
@@ -190,7 +237,10 @@ export async function get(
    result[id] = build(id, m, baseURL)
  }

-  return result
+  return {
+    models: result,
+    pickerEnabled: new Set([...remote].filter(([, item]) => item.model_picker_enabled).map(([id]) => id)),
+  }
 }

 export * as CopilotModels from "./models"
@@ -1765,6 +1765,19 @@ export const layer = Layer.effect(
      const provider = s.providers[providerID]
      if (!provider) return undefined

+      const experimental = yield* plugin.trigger<"experimental.provider.small_model">(
+        "experimental.provider.small_model",
+        { provider: toPublicInfo(provider) },
+        { model: undefined },
+      )
+      if (experimental.model) {
+        return {
+          ...experimental.model,
+          id: ProviderV2.ModelID.make(experimental.model.id),
+          providerID: ProviderV2.ID.make(experimental.model.providerID),
+        }
+      }
+
      const defaultPriority = [
        "claude-haiku-4-5",
        "claude-haiku-4.5",
@@ -280,6 +280,8 @@ const live: Layer.Layer<
      return {
        type: "ai-sdk" as const,
        result: streamText({
+          // Copilot returns the authoritative billed amount only in provider-specific response fields.
+          includeRawChunks: input.model.providerID.includes("github-copilot"),
          onError(error) {
            l.error("stream error", {
              error,
@@ -14,6 +14,7 @@ export function adapterState() {
    currentTextID: undefined as string | undefined,
    currentReasoningID: undefined as string | undefined,
    toolNames: {} as Record<string, string>,
+    copilotTotalNanoAiu: undefined as number | undefined,
  }
 }

@@ -26,6 +27,20 @@ function providerMetadata(value: unknown): ProviderMetadata | undefined {
  return Schema.is(ProviderMetadata)(value) ? value : undefined
 }

+// Temporary AI SDK bridge: Copilot billing survives only in raw provider chunks here.
+// Move this extraction into @opencode-ai/llm when Copilot is handled by the native runtime.
+function copilotTotalNanoAiu(value: unknown) {
+  if (!value || typeof value !== "object") return
+  const raw = value as Record<string, unknown>
+  const response =
+    raw.response && typeof raw.response === "object" ? (raw.response as Record<string, unknown>) : undefined
+  const usage = raw.copilot_usage ?? response?.copilot_usage
+  if (!usage || typeof usage !== "object") return
+  const total = (usage as Record<string, unknown>).total_nano_aiu
+  if (typeof total !== "number" || !Number.isFinite(total) || total < 0) return
+  return total
+}
+
 function usage(value: unknown) {
  if (!value || typeof value !== "object") return undefined
  const item = value as {
@@ -70,14 +85,28 @@ export function toLLMEvents(
      return Effect.succeed([LLMEvent.stepStart({ index: state.step })])

    case "finish-step":
-      return Effect.sync(() => [
-        LLMEvent.stepFinish({
-          index: state.step++,
-          reason: finishReason(event.finishReason),
-          usage: usage(event.usage),
-          providerMetadata: providerMetadata(event.providerMetadata),
-        }),
-      ])
+      return Effect.sync(() => {
+        const original = providerMetadata(event.providerMetadata)
+        const metadata =
+          state.copilotTotalNanoAiu === undefined
+            ? original
+            : {
+                ...original,
+                copilot: {
+                  ...original?.copilot,
+                  totalNanoAiu: state.copilotTotalNanoAiu,
+                },
+              }
+        state.copilotTotalNanoAiu = undefined
+        return [
+          LLMEvent.stepFinish({
+            index: state.step++,
+            reason: finishReason(event.finishReason),
+            usage: usage(event.usage),
+            providerMetadata: metadata,
+          }),
+        ]
+      })

    case "finish":
      return Effect.sync(() => {
@@ -238,11 +267,16 @@ export function toLLMEvents(
    case "abort":
    case "source":
    case "file":
-    case "raw":
    case "tool-output-denied":
    case "tool-approval-request":
      return Effect.succeed([])

+    case "raw":
+      return Effect.sync(() => {
+        state.copilotTotalNanoAiu = copilotTotalNanoAiu(event.rawValue) ?? state.copilotTotalNanoAiu
+        return []
+      })
+
    default: {
      const _exhaustive: never = event
      void _exhaustive
@@ -436,18 +436,22 @@ export const getUsage = (input: { model: Provider.Model; usage: Usage; metadata?
    (input.model.cost?.experimentalOver200K && contextTokens > 200_000
      ? input.model.cost.experimentalOver200K
      : input.model.cost)
+  const totalNanoAiu = input.metadata?.["copilot"]?.["totalNanoAiu"]
  return {
-    cost: safe(
-      new Decimal(0)
-        .add(new Decimal(tokens.input).mul(costInfo?.input ?? 0).div(1_000_000))
-        .add(new Decimal(tokens.output).mul(costInfo?.output ?? 0).div(1_000_000))
-        .add(new Decimal(tokens.cache.read).mul(costInfo?.cache?.read ?? 0).div(1_000_000))
-        .add(new Decimal(tokens.cache.write).mul(costInfo?.cache?.write ?? 0).div(1_000_000))
-        // TODO: update models.dev to have better pricing model, for now:
-        // charge reasoning tokens at the same rate as output tokens
-        .add(new Decimal(tokens.reasoning).mul(costInfo?.output ?? 0).div(1_000_000))
-        .toNumber(),
-    ),
+    cost:
+      typeof totalNanoAiu === "number" && Number.isFinite(totalNanoAiu) && totalNanoAiu >= 0
+        ? new Decimal(totalNanoAiu).div(100_000_000_000).toNumber()
+        : safe(
+            new Decimal(0)
+              .add(new Decimal(tokens.input).mul(costInfo?.input ?? 0).div(1_000_000))
+              .add(new Decimal(tokens.output).mul(costInfo?.output ?? 0).div(1_000_000))
+              .add(new Decimal(tokens.cache.read).mul(costInfo?.cache?.read ?? 0).div(1_000_000))
+              .add(new Decimal(tokens.cache.write).mul(costInfo?.cache?.write ?? 0).div(1_000_000))
+              // TODO: update models.dev to have better pricing model, for now:
+              // charge reasoning tokens at the same rate as output tokens
+              .add(new Decimal(tokens.reasoning).mul(costInfo?.output ?? 0).div(1_000_000))
+              .toNumber(),
+          ),
    tokens,
  }
 }
@@ -57,7 +57,7 @@ test("preserves temperature support from existing provider models", async () =>
    ),
  ) as unknown as typeof fetch

-  const models = await CopilotModels.get(
+  const result = await CopilotModels.get(
    "https://api.githubcopilot.com",
    {},
    {
@@ -112,11 +112,81 @@ test("preserves temperature support from existing provider models", async () =>
      },
    },
  )
+  const models = result.models

  expect(models["gpt-4o"].capabilities.temperature).toBe(true)
  expect(models["brand-new"].capabilities.temperature).toBe(true)
 })

+test("converts Copilot AIC token prices to USD per million tokens", async () => {
+  globalThis.fetch = mock(() =>
+    Promise.resolve(
+      new Response(
+        JSON.stringify({
+          data: [
+            {
+              model_picker_enabled: true,
+              id: "gpt-5",
+              name: "GPT-5",
+              version: "gpt-5-2026-06-01",
+              billing: {
+                token_prices: {
+                  batch_size: 500000,
+                  default: {
+                    input_price: 500,
+                    output_price: 3000,
+                    cache_price: 50,
+                  },
+                },
+              },
+              capabilities: {
+                family: "gpt",
+                limits: {
+                  max_context_window_tokens: 200000,
+                  max_output_tokens: 16384,
+                  max_prompt_tokens: 200000,
+                },
+                supports: {
+                  streaming: true,
+                  tool_calls: true,
+                },
+              },
+            },
+            {
+              model_picker_enabled: true,
+              id: "incomplete-internal-model",
+              name: "Incomplete Internal Model",
+              version: "incomplete-internal-model-2026-06-01",
+              capabilities: {
+                family: "internal",
+                supports: {},
+              },
+            },
+            {
+              model_picker_enabled: false,
+              id: "ignored-non-chat-record",
+            },
+          ],
+        }),
+        { status: 200 },
+      ),
+    ),
+  ) as unknown as typeof fetch
+
+  const models = (await CopilotModels.get("https://api.githubcopilot.com")).models
+
+  expect(models["gpt-5"].cost).toEqual({
+    input: 10,
+    output: 60,
+    cache: {
+      read: 1,
+      write: 0,
+    },
+  })
+  expect(models["incomplete-internal-model"]).toBeUndefined()
+  expect(models["ignored-non-chat-record"]).toBeUndefined()
+})
+
 test("clears existing variants so refreshed models calculate provider-specific variants", async () => {
  globalThis.fetch = mock(() =>
    Promise.resolve(
@@ -150,7 +220,7 @@ test("clears existing variants so refreshed models calculate provider-specific v
    ),
  ) as unknown as typeof fetch

-  const models = await CopilotModels.get(
+  const result = await CopilotModels.get(
    "https://api.githubcopilot.com",
    {},
    {
@@ -210,6 +280,7 @@ test("clears existing variants so refreshed models calculate provider-specific v
      },
    },
  )
+  const models = result.models

  expect(models["claude-opus-4.7"].api.npm).toBe("@ai-sdk/anthropic")
  expect(models["claude-opus-4.7"].variants).toBeUndefined()
@@ -1674,6 +1674,20 @@ describe("SessionNs.getUsage", () => {
    expect(result.cost).toBe(3 + 1.5)
  })

+  test("uses authoritative Copilot billed cost when provided", () => {
+    const result = SessionNs.getUsage({
+      model: createModel({
+        context: 100_000,
+        output: 32_000,
+        cost: { input: 3, output: 15, cache: { read: 0.3, write: 0.3 } },
+      }),
+      usage: usage({ inputTokens: 11_774, outputTokens: 39, totalTokens: 11_813 }),
+      metadata: { copilot: { totalNanoAiu: 4_473_525_000 } },
+    })
+
+    expect(result.cost).toBe(0.04473525)
+  })
+
  test("uses matching context cost tier before over-200k fallback", () => {
    const model = createModel({
      context: 1_000_000,
@@ -500,6 +500,57 @@ describe("session.llm.ai-sdk adapter", () => {
    expect(result.tokens.cache.write).toBe(300)
    expect(result.tokens.cache.read).toBe(200)
  })
+
+  test("captures Copilot billed usage from raw Anthropic message deltas per step", async () => {
+    const events = await adapt([
+      uncheckedAdapterEvent({
+        type: "raw",
+        rawValue: {
+          type: "message_delta",
+          copilot_usage: { total_nano_aiu: 4_473_525_000 },
+        },
+      }),
+      {
+        type: "finish-step",
+        response: { id: "msg_test", timestamp: new Date(0), modelId: "claude-sonnet-4.6" },
+        finishReason: "stop",
+        rawFinishReason: "end_turn",
+        usage: {
+          inputTokens: 11_774,
+          outputTokens: 39,
+          totalTokens: 11_813,
+          inputTokenDetails: { noCacheTokens: 3, cacheReadTokens: 0, cacheWriteTokens: 11_771 },
+          outputTokenDetails: { textTokens: 39, reasoningTokens: undefined },
+        },
+        providerMetadata: { anthropic: { cacheCreationInputTokens: 11_771 } },
+      },
+      {
+        type: "finish-step",
+        response: { id: "msg_follow_up", timestamp: new Date(0), modelId: "claude-sonnet-4.6" },
+        finishReason: "stop",
+        rawFinishReason: "end_turn",
+        usage: {
+          inputTokens: 1,
+          outputTokens: 1,
+          totalTokens: 2,
+          inputTokenDetails: { noCacheTokens: 1, cacheReadTokens: 0, cacheWriteTokens: 0 },
+          outputTokenDetails: { textTokens: 1, reasoningTokens: undefined },
+        },
+        providerMetadata: { anthropic: {} },
+      },
+    ])
+
+    expect(events[0]).toMatchObject({
+      type: "step-finish",
+      providerMetadata: {
+        anthropic: { cacheCreationInputTokens: 11_771 },
+        copilot: { totalNanoAiu: 4_473_525_000 },
+      },
+    })
+    expect(events[1]).toMatchObject({ type: "step-finish", providerMetadata: { anthropic: {} } })
+    if (events[1].type !== "step-finish") throw new Error("expected step-finish")
+    expect(events[1].providerMetadata?.copilot).toBeUndefined()
+  })
 })

 type Capture = {
@@ -294,6 +294,7 @@ export interface Hooks {
      system: string[]
    },
  ) => Promise<void>
+  "experimental.provider.small_model"?: (input: { provider: ProviderV2 }, output: { model?: ModelV2 }) => Promise<void>
  /**
   * Called before session compaction starts. Allows plugins to customize
   * the compaction prompt.