Skip to content

Commit 16ecc86

Browse files
committed
debug workflow dispatch for dax
1 parent 58e8458 commit 16ecc86

6 files changed

Lines changed: 167 additions & 48 deletions

File tree

.github/workflows/benchmark-reusable.yml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,34 +52,40 @@ jobs:
5252
BENCHMARK_EVAL: ${{ matrix.eval }}
5353
run: |
5454
set -euo pipefail
55-
job_name="Benchmark ${BENCHMARK_AGENT} / ${BENCHMARK_MODEL} / ${BENCHMARK_EVAL}"
55+
# When using reusable workflows, job names get prefixed, so we search for jobs containing our pattern
56+
job_pattern="Benchmark ${BENCHMARK_AGENT} / ${BENCHMARK_MODEL} / ${BENCHMARK_EVAL}"
5657
jobs_endpoint="https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/jobs?per_page=100"
5758
job_json="$(curl -fsSL \
5859
-H "Authorization: token ${GITHUB_TOKEN}" \
5960
-H "Accept: application/vnd.github+json" \
6061
"${jobs_endpoint}")"
62+
63+
# Try to find job by exact name match first, then by pattern match
6164
job_info="$(printf '%s\n' "${job_json}" \
62-
| jq -r --arg name "$job_name" 'select(type=="object" and has("jobs")) | .jobs[] | select(.name == $name) | [.id, .html_url] | @tsv' \
65+
| jq -r --arg pattern "$job_pattern" 'select(type=="object" and has("jobs")) | .jobs[] | select(.name | contains($pattern)) | select(.status == "in_progress") | [.id, .html_url] | @tsv' \
6366
| head -n 1)"
6467
6568
if [ -z "${job_info}" ] || [ "${job_info}" = "null" ]; then
66-
echo "Failed to determine job info for ${job_name}." >&2
69+
echo "Failed to determine job info for pattern: ${job_pattern}" >&2
70+
echo "Available jobs:" >&2
71+
printf '%s\n' "${job_json}" | jq -r '.jobs[]?.name' >&2 || true
6772
exit 1
6873
fi
6974
7075
IFS=$'\t' read -r job_id job_url <<<"${job_info}"
7176
7277
if [ -z "${job_id}" ] || [ "${job_id}" = "null" ]; then
73-
echo "Failed to determine job ID for ${job_name}." >&2
78+
echo "Failed to determine job ID for pattern: ${job_pattern}" >&2
7479
exit 1
7580
fi
7681
7782
if [ -z "${job_url}" ] || [ "${job_url}" = "null" ]; then
78-
echo "Failed to determine job URL for ${job_name}." >&2
83+
echo "Failed to determine job URL for pattern: ${job_pattern}" >&2
7984
exit 1
8085
fi
8186
8287
echo "Job ID: ${job_id}"
88+
echo "Job URL: ${job_url}"
8389
echo "GITHUB_BENCHMARK_JOB_URL=${job_url}" >> "$GITHUB_ENV"
8490
echo "url=${job_url}" >> "$GITHUB_OUTPUT"
8591

.github/workflows/compare-models.yml

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,29 @@
11
name: Compare Models
2-
32
on:
43
workflow_dispatch:
54
inputs:
6-
# OpenCode agent models
7-
opencode_opencode_gpt_5_codex:
8-
description: 'opencode:opencode/gpt-5-codex'
9-
type: boolean
10-
default: false
11-
opencode_opencode_claude_sonnet_4_5:
12-
description: 'opencode:opencode/claude-sonnet-4-5'
13-
type: boolean
14-
default: false
155
# Codex agent models
166
codex_gpt_5_codex:
17-
description: 'codex:gpt-5-codex'
7+
description: codex:gpt-5-codex
8+
type: boolean
9+
default: false
10+
# Opencode agent models
11+
opencode_opencode_gpt_5_codex:
12+
description: opencode:opencode/gpt-5-codex
1813
type: boolean
1914
default: false
20-
codex_gpt_5:
21-
description: 'codex:gpt-5'
15+
opencode_opencode_claude_sonnet_4_5:
16+
description: opencode:opencode/claude-sonnet-4-5
2217
type: boolean
2318
default: false
24-
# Claude Code agent models
19+
__comment_claude-code: Claude-code agent models
2520
claude_code_claude_sonnet_4_5:
26-
description: 'claude-code:claude-sonnet-4-5'
21+
description: claude-code:claude-sonnet-4-5
2722
type: boolean
2823
default: false
29-
3024
permissions:
3125
contents: read
3226
actions: read
33-
3427
jobs:
3528
publish:
3629
runs-on: ubuntu-latest
@@ -39,26 +32,20 @@ jobs:
3932
urls: ${{ steps.publish.outputs.urls }}
4033
packages: ${{ steps.publish.outputs.packages }}
4134
matrix: ${{ steps.build-matrix.outputs.matrix }}
42-
4335
steps:
4436
- name: Checkout repository
4537
uses: actions/checkout@v4
46-
4738
- name: Setup Bun
4839
uses: oven-sh/setup-bun@v1
4940
with:
5041
bun-version: 1.2.21
51-
5242
- name: Install dependencies
5343
run: bun install
54-
5544
- name: Build
5645
run: bun run build
57-
5846
- id: publish
5947
name: Publish preview with pkg.pr.new
6048
run: bunx pkg-pr-new publish --bun
61-
6249
- id: build-matrix
6350
name: Build matrix from selected models
6451
env:
@@ -69,7 +56,6 @@ jobs:
6956
# Pass all inputs to the script
7057
MATRIX_JSON=$(echo "$WORKFLOW_INPUTS" | bun run scripts/build-workflow-matrix.ts)
7158
echo "matrix=${MATRIX_JSON}" >> "$GITHUB_OUTPUT"
72-
7359
run-benchmarks:
7460
needs: publish
7561
if: needs.publish.result == 'success' && needs.publish.outputs.urls != ''

agents/codex.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ const threadCache = new Map<string, Thread>();
2222

2323
export const models: string[] = [
2424
"gpt-5-codex",
25-
"gpt-5",
25+
// "gpt-5",
2626
// "o3",
2727
// "o4-mini"
2828
];

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
},
1212
"scripts": {
1313
"build": "bun build cli.ts --outfile dist/cli.js --target node --format esm --external node:* --external @openai/codex-sdk --external @openai/codex-sdk/* --external @opencode-ai/sdk --external @opencode-ai/sdk/* --external @anthropic-ai/claude-agent-sdk --external @anthropic-ai/claude-agent-sdk/*",
14+
"postbuild": "bun run scripts/sync-workflow-inputs.ts",
1415
"check": "tsc --noEmit",
1516
"dev": "bun run cli.ts",
1617
"test": "bun test",

scripts/build-workflow-matrix.ts

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import { readFileSync } from "node:fs";
1919
import YAML from "yaml";
20+
import { listAgents } from "~/agents/index.js";
2021

2122
interface WorkflowInputs {
2223
[key: string]: string | boolean;
@@ -32,17 +33,27 @@ interface DatasetEntry {
3233
repo: string;
3334
}
3435

35-
// Mapping from input ID pattern to agent:model
36-
const INPUT_MAPPINGS = [
37-
// OpenCode agent
38-
{ pattern: /^opencode_opencode_gpt_5_codex$/, agent: "opencode", model: "opencode/gpt-5-codex" },
39-
{ pattern: /^opencode_opencode_claude_sonnet_4_5$/, agent: "opencode", model: "opencode/claude-sonnet-4-5" },
40-
// Codex agent
41-
{ pattern: /^codex_gpt_5_codex$/, agent: "codex", model: "gpt-5-codex" },
42-
{ pattern: /^codex_gpt_5$/, agent: "codex", model: "gpt-5" },
43-
// Claude Code agent
44-
{ pattern: /^claude_code_claude_sonnet_4_5$/, agent: "claude-code", model: "claude-sonnet-4-5" },
45-
];
36+
// Convert agent:model to workflow input ID
37+
function toInputId(agent: string, model: string): string {
38+
return `${agent}_${model}`
39+
.replace(/\//g, "_")
40+
.replace(/-/g, "_");
41+
}
42+
43+
// Build mapping from input IDs to agent:model combinations
44+
async function buildInputMapping(): Promise<Map<string, { agent: string; model: string }>> {
45+
const agents = await listAgents();
46+
const mapping = new Map<string, { agent: string; model: string }>();
47+
48+
for (const agent of agents) {
49+
for (const model of agent.models) {
50+
const inputId = toInputId(agent.name, model);
51+
mapping.set(inputId, { agent: agent.name, model });
52+
}
53+
}
54+
55+
return mapping;
56+
}
4657

4758
function loadDataset(): DatasetEntry[] {
4859
const raw = readFileSync(new URL("../dataset.yaml", import.meta.url), "utf8");
@@ -67,6 +78,9 @@ async function main(): Promise<void> {
6778
// Load all evals from dataset
6879
const dataset = loadDataset();
6980

81+
// Build input ID to agent:model mapping dynamically
82+
const inputMapping = await buildInputMapping();
83+
7084
// Collect selected agent:model combinations
7185
const selectedCombinations: Array<{ agent: string; model: string }> = [];
7286

@@ -76,13 +90,10 @@ async function main(): Promise<void> {
7690
continue;
7791
}
7892

79-
// Find matching agent:model combination
80-
const mapping = INPUT_MAPPINGS.find((m) => m.pattern.test(key));
81-
if (mapping) {
82-
selectedCombinations.push({
83-
agent: mapping.agent,
84-
model: mapping.model,
85-
});
93+
// Look up agent:model combination from mapping
94+
const combination = inputMapping.get(key);
95+
if (combination) {
96+
selectedCombinations.push(combination);
8697
}
8798
}
8899

scripts/sync-workflow-inputs.ts

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/usr/bin/env bun
2+
/**
3+
* Syncs workflow_dispatch inputs in compare-models.yml with available agent:model combinations.
4+
* Run this after modifying agent model lists to keep the workflow in sync.
5+
*
6+
* Usage:
7+
* bun run scripts/sync-workflow-inputs.ts
8+
*/
9+
10+
import { readFileSync, writeFileSync } from "node:fs";
11+
import { listAgents } from "~/agents/index.js";
12+
import YAML from "yaml";
13+
14+
interface WorkflowInput {
15+
description: string;
16+
type: string;
17+
default: boolean;
18+
}
19+
20+
interface WorkflowInputs {
21+
[key: string]: WorkflowInput;
22+
}
23+
24+
// Convert agent:model to workflow input ID
25+
function toInputId(agent: string, model: string): string {
26+
return `${agent}_${model}`
27+
.replace(/\//g, "_")
28+
.replace(/-/g, "_");
29+
}
30+
31+
// Convert agent:model to display description
32+
function toDescription(agent: string, model: string): string {
33+
return `${agent}:${model}`;
34+
}
35+
36+
async function main(): Promise<void> {
37+
const workflowPath = ".github/workflows/compare-models.yml";
38+
39+
// Load the workflow file
40+
const workflowContent = readFileSync(workflowPath, "utf8");
41+
const workflow = YAML.parse(workflowContent);
42+
43+
// Get all available agent:model combinations
44+
const agents = await listAgents();
45+
const combinations: Array<{ agent: string; model: string }> = [];
46+
47+
for (const agent of agents) {
48+
for (const model of agent.models) {
49+
combinations.push({ agent: agent.name, model });
50+
}
51+
}
52+
53+
if (combinations.length === 0) {
54+
console.error("No agent:model combinations found");
55+
process.exit(1);
56+
}
57+
58+
// Build new inputs
59+
const newInputs: WorkflowInputs = {};
60+
61+
// Group by agent for organization
62+
const byAgent = new Map<string, string[]>();
63+
for (const { agent, model } of combinations) {
64+
if (!byAgent.has(agent)) {
65+
byAgent.set(agent, []);
66+
}
67+
byAgent.get(agent)!.push(model);
68+
}
69+
70+
// Build inputs with comments
71+
const inputsWithComments: any = {};
72+
73+
for (const [agent, models] of byAgent.entries()) {
74+
// Add comment for agent group
75+
const agentKey = `__comment_${agent}`;
76+
inputsWithComments[agentKey] = `${agent.charAt(0).toUpperCase() + agent.slice(1)} agent models`;
77+
78+
for (const model of models) {
79+
const inputId = toInputId(agent, model);
80+
inputsWithComments[inputId] = {
81+
description: toDescription(agent, model),
82+
type: "boolean",
83+
default: false,
84+
};
85+
}
86+
}
87+
88+
// Update the workflow
89+
workflow.on.workflow_dispatch.inputs = inputsWithComments;
90+
91+
// Convert back to YAML with proper formatting
92+
let yamlOutput = YAML.stringify(workflow, {
93+
indent: 2,
94+
lineWidth: 0,
95+
});
96+
97+
// Replace comment placeholders with actual YAML comments
98+
yamlOutput = yamlOutput.replace(
99+
/__comment_(\w+):\s*["']?([^"'\n]+)["']?\n/g,
100+
"# $2\n",
101+
);
102+
103+
// Write back to file
104+
writeFileSync(workflowPath, yamlOutput, "utf8");
105+
106+
console.log(`✓ Updated ${workflowPath} with ${combinations.length} agent:model combinations:`);
107+
for (const { agent, model } of combinations) {
108+
console.log(` - ${agent}:${model} (ID: ${toInputId(agent, model)})`);
109+
}
110+
111+
console.log("\n✓ The build-workflow-matrix.ts script will automatically recognize these combinations.");
112+
console.log(" No manual updates needed - everything is dynamically generated!");
113+
}
114+
115+
await main();

0 commit comments

Comments
 (0)