-
Notifications
You must be signed in to change notification settings - Fork 8
154 lines (132 loc) · 4.69 KB
/
run-benchmark.yml
File metadata and controls
154 lines (132 loc) · 4.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
name: Run Benchmark
on:
workflow_dispatch:
inputs:
agent:
description: "Agent to use"
required: true
type: string
model:
description: "Model to use"
required: true
type: string
tasks:
description: "Comma-separated list of tasks"
required: true
type: string
jobs:
prepare:
runs-on: ubuntu-latest
outputs:
tasks: ${{ steps.split.outputs.tasks }}
model_safe: ${{ steps.sanitize.outputs.model_safe }}
steps:
- name: Split tasks into matrix
id: split
run: |
TASKS_JSON=$(echo "${{ inputs.tasks }}" | tr ',' '\n' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | jq -R -s -c 'split("\n") | map(select(length > 0))')
echo "tasks=$TASKS_JSON" >> $GITHUB_OUTPUT
- name: Sanitize model name for artifacts
id: sanitize
run: |
MODEL_SAFE=$(echo "${{ inputs.model }}" | sed 's/\//-/g')
echo "model_safe=${MODEL_SAFE}" >> $GITHUB_OUTPUT
benchmark:
needs: prepare
runs-on: ubuntu-latest
strategy:
matrix:
task: ${{ fromJson(needs.prepare.outputs.tasks) }}
run: [1, 2, 3]
environment: production
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v1
with:
bun-version: 1.2.21
- name: Install dependencies
run: bun install
- name: Install OpenCode CLI
run: bun add -g opencode-ai
- name: Run benchmark
env:
OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }}
DEBUG: true
TASK: ${{ matrix.task }}
MODEL: ${{ inputs.model }}
AGENT: ${{ inputs.agent }}
RESULT_PATH: result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run${{ matrix.run }}.json
OPENCODE_ENABLE_EXPERIMENTAL_MODELS: true
run: bun github/run.ts
- name: Upload benchmark results
uses: actions/upload-artifact@v4
with:
name: result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run${{ matrix.run }}
path: result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run${{ matrix.run }}.json
summarize-runs:
needs: [prepare, benchmark]
runs-on: ubuntu-latest
strategy:
matrix:
task: ${{ fromJson(needs.prepare.outputs.tasks) }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v1
with:
bun-version: 1.2.21
- name: Install dependencies
run: bun install
- name: Download all run results
uses: actions/download-artifact@v4
with:
pattern: result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run*
path: results
- name: Summarize runs
run: |
RESULT_PATHS=$(find results -name 'result-*.json' | sort | tr '\n' ',' | sed 's/,$//')
export RESULT_PATHS
export RUNS_SUMMARY_PATH=runs-summary-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}.json
bun github/summarize-runs.ts
- name: Upload runs summary
uses: actions/upload-artifact@v4
with:
name: runs-summary-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}
path: runs-summary-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}.json
summarize-tasks:
needs: summarize-runs
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Bun
uses: oven-sh/setup-bun@v1
with:
bun-version: 1.2.21
- name: Install dependencies
run: bun install
- name: Download all runs summaries
uses: actions/download-artifact@v4
with:
pattern: runs-summary-*
path: runs-summaries
- name: Summarize tasks
run: |
RUNS_SUMMARY_PATHS_COMMA=$(find runs-summaries -name 'runs-summary-*.json' | tr '\n' ',' | sed 's/,$//')
export RUNS_SUMMARY_PATHS="$RUNS_SUMMARY_PATHS_COMMA"
export TASKS_SUMMARY_PATH=tasks-summary.json
bun github/summarize-tasks.ts
- name: Upload tasks summary
uses: actions/upload-artifact@v4
with:
name: tasks-summary
path: tasks-summary.json
- name: Submit results
env:
AGENT: ${{ inputs.agent }}
MODEL: ${{ inputs.model }}
TASKS_SUMMARY_PATH: tasks-summary.json
run: bun github/submit-results.ts