@@ -140,6 +140,22 @@ jobs:
140140 exit 1
141141 fi
142142
143+ - name : Generate Judges Summary for this Evaluation
144+ env :
145+ OPENCODE_API_KEY : ${{ secrets.OPENCODE_API_KEY }}
146+ ANTHROPIC_API_KEY : ${{ secrets.ANTHROPIC_API_KEY }}
147+ BENCHMARK_EVAL : ${{ matrix.eval }}
148+ run : |
149+ set -euo pipefail
150+ echo ""
151+ echo "═══════════════════════════════════════════════════════"
152+ echo "JUDGE CONSISTENCY ANALYSIS"
153+ echo "Evaluation: ${BENCHMARK_EVAL}"
154+ echo "═══════════════════════════════════════════════════════"
155+ echo ""
156+ bun run scripts/judges-summary.ts benchmark.json --ai-summary || true
157+ echo ""
158+
143159 - name : Prepare artifact name
144160 id : artifact
145161 env :
@@ -181,27 +197,6 @@ jobs:
181197 with :
182198 path : benchmarks
183199
184- - name : Generate Judges Summaries per Evaluation
185- env :
186- OPENCODE_API_KEY : ${{ secrets.OPENCODE_API_KEY }}
187- ANTHROPIC_API_KEY : ${{ secrets.ANTHROPIC_API_KEY }}
188- run : |
189- set -euo pipefail
190- echo "═══════════════════════════════════════════════════════"
191- echo "JUDGE CONSISTENCY ANALYSIS PER EVALUATION"
192- echo "═══════════════════════════════════════════════════════"
193-
194- # Find all benchmark JSON files
195- for benchmark_file in benchmarks/*/*.json; do
196- if [ -f "$benchmark_file" ]; then
197- echo ""
198- echo "Analyzing: $benchmark_file"
199- echo "───────────────────────────────────────────────────────"
200- bun run scripts/judges-summary.ts "$benchmark_file" --ai-summary || true
201- echo ""
202- fi
203- done
204-
205200 - name : Merge benchmark exports
206201 run : bun run scripts/merge-benchmark-exports.ts benchmarks merged-benchmark.json
207202
0 commit comments