Skip to content

Commit f8467fb

Browse files
committed
Remove hardcoded model lists and model validation
- Removed hardcoded model arrays from all agent modules (opencode, claude-code, codex) - Removed model validation from Agent.validateModel function - Removed validateModel call from eval.ts - Updated agent Registration interface to no longer require models field - Updated scripts to handle dynamic model specification instead of hardcoded lists Models can now be specified dynamically without being hardcoded per agent.
1 parent bec3f65 commit f8467fb

File tree

7 files changed

+9
-62
lines changed

7 files changed

+9
-62
lines changed

scripts/generate-benchmark-matrix.ts

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,11 @@
22
import { Agent } from "~/agents/index.js";
33
import { Task } from "~/src/tasks/index.js";
44

5+
// Note: Models are no longer hardcoded per agent.
6+
// This script now generates an empty matrix since models should be specified externally.
57
const agents = Agent.list();
68
const tasks = await Task.listNames();
7-
const include = tasks.flatMap((task) =>
8-
agents.flatMap((agent) =>
9-
agent.models.map((model) => ({
10-
eval: task,
11-
model,
12-
agent: agent.name,
13-
})),
14-
),
15-
);
9+
const include: any[] = [];
1610

1711
const matrix = JSON.stringify({ include });
1812
process.stdout.write(matrix);

scripts/sync-workflow-inputs.ts

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,19 +38,14 @@ async function main(): Promise<void> {
3838
const workflowContent = readFileSync(workflowPath, "utf8");
3939
const workflow = YAML.parse(workflowContent);
4040

41-
// Get all available agent:model combinations
41+
// Note: Models are no longer hardcoded per agent.
42+
// This script now generates empty inputs since models should be specified externally.
4243
const agents = Agent.list();
4344
const combinations: Array<{ agent: string; model: string }> = [];
4445

45-
for (const agent of agents) {
46-
for (const model of agent.models) {
47-
combinations.push({ agent: agent.name, model });
48-
}
49-
}
50-
46+
// Models are no longer hardcoded, so combinations list will be empty
5147
if (combinations.length === 0) {
52-
console.error("No agent:model combinations found");
53-
process.exit(1);
48+
console.log("No hardcoded agent:model combinations (models are now dynamic)");
5449
}
5550

5651
// Build new inputs

src/agents/claude-code.ts

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,6 @@ import { Logger } from "../util/logger.js";
88

99
const sessionCache = new Map<string, string>();
1010

11-
export const models: string[] = [
12-
"claude-sonnet-4-5",
13-
"claude-opus-4-5",
14-
// "claude-sonnet-4",
15-
// "claude-opus-4-1",
16-
// "claude-3-5-haiku",
17-
];
18-
1911
function sessionKey(model: string, cwd: string): string {
2012
return `${cwd}::${model}`;
2113
}

src/agents/codex.ts

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,6 @@ const DEFAULT_SANDBOX: SandboxMode = "workspace-write";
1616
const codexClient = new Codex();
1717
const threadCache = new Map<string, Thread>();
1818

19-
export const models = [
20-
"gpt-5-codex",
21-
"gpt-5.1-codex",
22-
// "gpt-5",
23-
// "o3",
24-
// "o4-mini"
25-
] as const;
26-
2719
function sessionKey(model: string, cwd: string): string {
2820
return `${cwd}::${model}`;
2921
}
@@ -67,7 +59,7 @@ function getOrCreateThread(model: string, cwd: string): Thread {
6759
return thread;
6860
}
6961

70-
const codexAgent: Agent.Definition<(typeof models)[number]> = {
62+
const codexAgent: Agent.Definition = {
7163
async run(model, prompt, options) {
7264
options.logger.log(
7365
`codex-sdk --model ${model} --sandbox ${DEFAULT_SANDBOX} ${prompt}`,

src/agents/index.ts

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ export namespace Agent {
4646
export interface Registration<TModel extends string = string> {
4747
name: string;
4848
definition: Definition<TModel>;
49-
models: ReadonlyArray<TModel>;
5049
}
5150

5251
const agents: Record<string, Registration<any>> = {
@@ -60,16 +59,13 @@ export namespace Agent {
6059
name: string,
6160
module: {
6261
default?: Definition<TModel>;
63-
models?: ReadonlyArray<TModel>;
6462
},
6563
): Registration<TModel> {
6664
const definition = module.default;
67-
const models = module.models;
6865

6966
assert(definition, `Agent module ${name} is missing a default export.`);
70-
assert(models, `Agent module ${name} is missing the exported models list.`);
7167

72-
return { name, definition, models };
68+
return { name, definition };
7369
}
7470

7571
export function get(name: string): Registration {
@@ -78,13 +74,6 @@ export namespace Agent {
7874
return agent;
7975
}
8076

81-
export function validateModel(agent: Registration, model: string) {
82-
if (!agent.models.find((entry) => entry === model))
83-
throw new Error(
84-
`Model ${model} is not registered for agent ${agent.name}.`,
85-
);
86-
}
87-
8877
export function list() {
8978
return Object.values(agents);
9079
}

src/agents/opencode.ts

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,6 @@ const opencode = await createOpencode({
3636

3737
const sessionCache = new Map<string, string>();
3838

39-
export const models: string[] = [
40-
"opencode/gpt-5-codex",
41-
"opencode/gpt-5.1-codex",
42-
"opencode/claude-sonnet-4-5",
43-
"opencode/claude-opus-4-5",
44-
"opencode/glm-4.6",
45-
"opencode/glm-4.7-free",
46-
"opencode/gemini-3-pro",
47-
"opencode/qwen3-coder",
48-
"opencode/kimi-k2",
49-
"opencode/grok-code",
50-
"opencode/alpha-gd4",
51-
];
52-
5339
function sessionKey(model: string, cwd: string): string {
5440
return `${cwd}::${model}`;
5541
}

src/eval.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ export namespace Eval {
4747
},
4848
) {
4949
const agent = Agent.get(agentName);
50-
Agent.validateModel(agent, modelId);
5150
const task = await Task.get(taskId);
5251
const cwd = await mkdtemp(join(tmpdir(), "openreval-"));
5352
$.cwd(cwd);

0 commit comments

Comments
 (0)