@@ -21,13 +21,20 @@ jobs:
2121 runs-on : ubuntu-latest
2222 outputs :
2323 tasks : ${{ steps.split.outputs.tasks }}
24+ model_safe : ${{ steps.sanitize.outputs.model_safe }}
2425 steps :
2526 - name : Split tasks into matrix
2627 id : split
2728 run : |
2829 TASKS_JSON=$(echo "${{ inputs.tasks }}" | tr ',' '\n' | sed 's/^[[:space:]]*//;s/[[:space:]]*$//' | jq -R -s -c 'split("\n") | map(select(length > 0))')
2930 echo "tasks=$TASKS_JSON" >> $GITHUB_OUTPUT
3031
32+ - name : Sanitize model name for artifacts
33+ id : sanitize
34+ run : |
35+ MODEL_SAFE=$(echo "${{ inputs.model }}" | sed 's/\//-/g')
36+ echo "model_safe=${MODEL_SAFE}" >> $GITHUB_OUTPUT
37+
3138 benchmark :
3239 needs : prepare
3340 runs-on : ubuntu-latest
@@ -51,31 +58,21 @@ jobs:
5158 - name : Install OpenCode CLI
5259 run : bun add -g opencode-ai
5360
54- - name : Print benchmark config
55- env :
56- MODEL : ${{ inputs.model }}
57- TASK : ${{ matrix.task }}
58- RUN : ${{ matrix.run }}
59- run : |
60- echo "Model: ${MODEL}"
61- echo "Task: ${TASK}"
62- echo "Run: ${RUN}"
63-
6461 - name : Run benchmark
6562 env :
6663 OPENCODE_API_KEY : ${{ secrets.OPENCODE_API_KEY }}
6764 DEBUG : true
6865 TASK : ${{ matrix.task }}
6966 MODEL : ${{ inputs.model }}
7067 AGENT : ${{ inputs.agent }}
71- RESULT_PATH : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run${{ matrix.run }}.json
68+ RESULT_PATH : result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run${{ matrix.run }}.json
7269 run : bun github/run.ts
7370
7471 - name : Upload benchmark results
7572 uses : actions/upload-artifact@v4
7673 with :
77- name : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run${{ matrix.run }}
78- path : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run${{ matrix.run }}.json
74+ name : result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run${{ matrix.run }}
75+ path : result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run${{ matrix.run }}.json
7976
8077 summarize-runs :
8178 needs : [prepare, benchmark]
@@ -95,35 +92,24 @@ jobs:
9592 - name : Install dependencies
9693 run : bun install
9794
98- - name : Download run 1 results
95+ - name : Download all run results
9996 uses : actions/download-artifact@v4
10097 with :
101- name : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run1
102- path : results
103-
104- - name : Download run 2 results
105- uses : actions/download-artifact@v4
106- with :
107- name : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run2
108- path : results
109-
110- - name : Download run 3 results
111- uses : actions/download-artifact@v4
112- with :
113- name : result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run3
98+ pattern : result-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}-run*
11499 path : results
115100
116101 - name : Summarize runs
117- env :
118- RESULT_PATHS : results/result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run1.json,results/result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run2.json,results/result-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}-run3.json
119- RUNS_SUMMARY_PATH : runs-summary-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}.json
120- run : bun github/summarize-runs.ts
102+ run : |
103+ RESULT_PATHS=$(find results -name 'result-*.json' | sort | tr '\n' ',' | sed 's/,$//')
104+ export RESULT_PATHS
105+ export RUNS_SUMMARY_PATH=runs-summary-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}.json
106+ bun github/summarize-runs.ts
121107
122108 - name : Upload runs summary
123109 uses : actions/upload-artifact@v4
124110 with :
125- name : runs-summary-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}
126- path : runs-summary-${{ matrix.task }}-${{ inputs.model }}-${{ inputs.agent }}.json
111+ name : runs-summary-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}
112+ path : runs-summary-${{ inputs.agent }}-${{ needs.prepare.outputs.model_safe }}-${{ matrix.task }}.json
127113
128114 summarize-tasks :
129115 needs : summarize-runs
0 commit comments