We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 8c89007 commit 79ba0caCopy full SHA for 79ba0ca
.github/workflows/benchmark-reusable.yml
@@ -172,20 +172,6 @@ JSON
172
echo "benchmark.json not found, skipping summary log"
173
fi
174
175
- - name: Run benchmark-level judge analysis
176
- env:
177
- BENCHMARK_EVAL: ${{ matrix.eval }}
178
- run: |
179
- set -euo pipefail
180
- echo ""
181
- echo "═══════════════════════════════════════════════════════"
182
- echo "JUDGE CONSISTENCY ANALYSIS"
183
- echo "Evaluation: ${BENCHMARK_EVAL}"
184
185
186
- bun run scripts/analysis.ts benchmark.json || true
187
188
-
189
- name: Prepare artifact name
190
id: artifact
191
env:
scripts/run-all-evals.ts
0 commit comments