Skip to content

Commit 6d002ca

Browse files
committed
update: change location of judge summary
1 parent ca447bb commit 6d002ca

File tree

1 file changed

+16
-21
lines changed

1 file changed

+16
-21
lines changed

.github/workflows/benchmark-reusable.yml

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,22 @@ jobs:
140140
exit 1
141141
fi
142142
143+
- name: Generate Judges Summary for this Evaluation
144+
env:
145+
OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }}
146+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
147+
BENCHMARK_EVAL: ${{ matrix.eval }}
148+
run: |
149+
set -euo pipefail
150+
echo ""
151+
echo "═══════════════════════════════════════════════════════"
152+
echo "JUDGE CONSISTENCY ANALYSIS"
153+
echo "Evaluation: ${BENCHMARK_EVAL}"
154+
echo "═══════════════════════════════════════════════════════"
155+
echo ""
156+
bun run scripts/judges-summary.ts benchmark.json --ai-summary || true
157+
echo ""
158+
143159
- name: Prepare artifact name
144160
id: artifact
145161
env:
@@ -181,27 +197,6 @@ jobs:
181197
with:
182198
path: benchmarks
183199

184-
- name: Generate Judges Summaries per Evaluation
185-
env:
186-
OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }}
187-
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
188-
run: |
189-
set -euo pipefail
190-
echo "═══════════════════════════════════════════════════════"
191-
echo "JUDGE CONSISTENCY ANALYSIS PER EVALUATION"
192-
echo "═══════════════════════════════════════════════════════"
193-
194-
# Find all benchmark JSON files
195-
for benchmark_file in benchmarks/*/*.json; do
196-
if [ -f "$benchmark_file" ]; then
197-
echo ""
198-
echo "Analyzing: $benchmark_file"
199-
echo "───────────────────────────────────────────────────────"
200-
bun run scripts/judges-summary.ts "$benchmark_file" --ai-summary || true
201-
echo ""
202-
fi
203-
done
204-
205200
- name: Merge benchmark exports
206201
run: bun run scripts/merge-benchmark-exports.ts benchmarks merged-benchmark.json
207202

0 commit comments

Comments
 (0)