File tree Expand file tree Collapse file tree 4 files changed +727
-0
lines changed
Expand file tree Collapse file tree 4 files changed +727
-0
lines changed Original file line number Diff line number Diff line change @@ -181,6 +181,27 @@ jobs:
181181 with :
182182 path : benchmarks
183183
184+ - name : Generate Judges Summaries per Evaluation
185+ env :
186+ OPENCODE_API_KEY : ${{ secrets.OPENCODE_API_KEY }}
187+ ANTHROPIC_API_KEY : ${{ secrets.ANTHROPIC_API_KEY }}
188+ run : |
189+ set -euo pipefail
190+ echo "═══════════════════════════════════════════════════════"
191+ echo "JUDGE CONSISTENCY ANALYSIS PER EVALUATION"
192+ echo "═══════════════════════════════════════════════════════"
193+
194+ # Find all benchmark JSON files
195+ for benchmark_file in benchmarks/*/*.json; do
196+ if [ -f "$benchmark_file" ]; then
197+ echo ""
198+ echo "Analyzing: $benchmark_file"
199+ echo "───────────────────────────────────────────────────────"
200+ bun run scripts/judges-summary.ts "$benchmark_file" --ai-summary || true
201+ echo ""
202+ fi
203+ done
204+
184205 - name : Merge benchmark exports
185206 run : bun run scripts/merge-benchmark-exports.ts benchmarks merged-benchmark.json
186207
Original file line number Diff line number Diff line change 11node_modules
22dist
33benchmark.json
4+ results /
You can’t perform that action at this time.
0 commit comments