Skip to content

Commit f23dbb5

Browse files
author
Frank
committed
sync
1 parent 6ceaf78 commit f23dbb5

File tree

5 files changed

+24
-48
lines changed

5 files changed

+24
-48
lines changed

.github/workflows/run-benchmark.yml

Lines changed: 19 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,20 @@ jobs:
2121
runs-on: ubuntu-latest
2222
outputs:
2323
tasks: ${{ steps.split.outputs.tasks }}
24+
model_safe: ${{ steps.sanitize.outputs.model_safe }}
2425
steps:
2526
- name: Split tasks into matrix
2627
id: split
2728
run: |
2829
TASKS_JSON=$(echo "${{ inputs.tasks }}" | tr ',' '\n' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | jq -R -s -c 'split("\n") | map(select(length > 0))')
2930
echo "tasks=$TASKS_JSON" >> $GITHUB_OUTPUT
3031
32+
- name: Sanitize model name for artifacts
33+
id: sanitize
34+
run: |
35+
MODEL_SAFE=$(echo "${{ inputs.model }}" | sed 's/\//-/g')
36+
echo "model_safe=${MODEL_SAFE}" >> $GITHUB_OUTPUT
37+
3138
benchmark:
3239
needs: prepare
3340
runs-on: ubuntu-latest
@@ -51,31 +58,21 @@ jobs:
5158
- name: Install OpenCode CLI
5259
run: bun add -g opencode-ai
5360

54-
- name: Print benchmark config
55-
env:
56-
MODEL: ${{ inputs.model }}
57-
TASK: ${{ matrix.task }}
58-
RUN: ${{ matrix.run }}
59-
run: |
60-
echo "Model: ${MODEL}"
61-
echo "Task: ${TASK}"
62-
echo "Run: ${RUN}"
63-
6461
- name: Run benchmark
6562
env:
6663
OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }}
6764
DEBUG: true
6865
TASK: ${{ matrix.task }}
6966
MODEL: ${{ inputs.model }}
7067
AGENT: ${{ inputs.agent }}
71-
RESULT_PATH: result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run${{ matrix.run }}.json
68+
RESULT_PATH: result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run${{ matrix.run }}.json
7269
run: bun github/run.ts
7370

7471
- name: Upload benchmark results
7572
uses: actions/upload-artifact@v4
7673
with:
77-
name: result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run${{ matrix.run }}
78-
path: result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run${{ matrix.run }}.json
74+
name: result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run${{ matrix.run }}
75+
path: result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run${{ matrix.run }}.json
7976

8077
summarize-runs:
8178
needs: [prepare, benchmark]
@@ -95,35 +92,24 @@ jobs:
9592
- name: Install dependencies
9693
run: bun install
9794

98-
- name: Download run 1 results
95+
- name: Download all run results
9996
uses: actions/download-artifact@v4
10097
with:
101-
name: result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run1
102-
path: results
103-
104-
- name: Download run 2 results
105-
uses: actions/download-artifact@v4
106-
with:
107-
name: result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run2
108-
path: results
109-
110-
- name: Download run 3 results
111-
uses: actions/download-artifact@v4
112-
with:
113-
name: result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run3
98+
pattern: result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run*
11499
path: results
115100

116101
- name: Summarize runs
117-
env:
118-
RESULT_PATHS: results/result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run1.json,results/result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run2.json,results/result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run3.json
119-
RUNS_SUMMARY_PATH: runs-summary-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}.json
120-
run: bun github/summarize-runs.ts
102+
run: |
103+
RESULT_PATHS=$(find results -name 'result-*.json' | sort | tr '\n' ',' | sed 's/,$//')
104+
export RESULT_PATHS
105+
export RUNS_SUMMARY_PATH=runs-summary-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}.json
106+
bun github/summarize-runs.ts
121107
122108
- name: Upload runs summary
123109
uses: actions/upload-artifact@v4
124110
with:
125-
name: runs-summary-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}
126-
path: runs-summary-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}.json
111+
name: runs-summary-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}
112+
path: runs-summary-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}.json
127113

128114
summarize-tasks:
129115
needs: summarize-runs

github/run.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,11 @@
22
import { writeFile } from "node:fs/promises";
33
import { Logger } from "../src/util/logger.js";
44
import { Eval } from "../src/eval.js";
5-
import { sanitizeFilename } from "../src/util/fs.js";
65

76
const task = process.env.TASK!;
87
const model = process.env.MODEL!;
98
const agent = process.env.AGENT!;
10-
const resultPath = sanitizeFilename(process.env.RESULT_PATH!);
9+
const resultPath = process.env.RESULT_PATH!;
1110

1211
// Run eval
1312
const result = await Eval.run(agent, model, task, {

github/summarize-runs.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
11
#!/usr/bin/env bun
22
import { readFile, writeFile } from "node:fs/promises";
33
import { Summarizer } from "../src/summarizer.js";
4-
import { sanitizeFilename } from "../src/util/fs.js";
54

65
const resultPaths = process.env.RESULT_PATHS!;
7-
const runsSummaryPath = sanitizeFilename(process.env.RUNS_SUMMARY_PATH!);
6+
const runsSummaryPath = process.env.RUNS_SUMMARY_PATH!;
87

98
const results = await Promise.all(
109
resultPaths.split(",").map(async (resultPath) => {
11-
const result = await readFile(sanitizeFilename(resultPath), "utf8");
10+
const result = await readFile(resultPath, "utf8");
1211
return JSON.parse(result);
1312
}),
1413
);

github/summarize-tasks.ts

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,13 @@
11
#!/usr/bin/env bun
22
import { readFile, writeFile } from "node:fs/promises";
33
import { Summarizer } from "../src/summarizer.js";
4-
import { sanitizeFilename } from "../src/util/fs.js";
54

65
const runsSummaryPaths = process.env.RUNS_SUMMARY_PATHS!;
7-
const tasksSummaryPath = sanitizeFilename(process.env.TASKS_SUMMARY_PATH!);
6+
const tasksSummaryPath = process.env.TASKS_SUMMARY_PATH!;
87

98
const runsSummaries = await Promise.all(
109
runsSummaryPaths.split(",").map(async (runsSummaryPath) => {
11-
const runsSummary = await readFile(
12-
sanitizeFilename(runsSummaryPath),
13-
"utf8",
14-
);
10+
const runsSummary = await readFile(runsSummaryPath, "utf8");
1511
return JSON.parse(runsSummary);
1612
}),
1713
);

src/util/fs.ts

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,3 @@ export async function fileExists(path: string) {
99
return false;
1010
}
1111
}
12-
13-
export function sanitizeFilename(filename: string) {
14-
return filename.replace(/[/]/g, "-");
15-
}

0 commit comments

Comments
 (0)