You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
`${prefix} Restarting episode from a clean state (attempt ${
327
+
attempt+1
328
+
}/${retries})...`,
329
+
);
330
+
}
331
+
},
332
+
},
333
+
);
334
+
}
335
+
336
+
asyncfunctionrunEpisodeAttempt(
337
+
evalDef: DatasetEval,
338
+
agent: AgentRegistration,
339
+
model: string,
340
+
tasks: Task[],
341
+
prefix: string,
312
342
){
313
343
constbaselineCommit=evalDef.from;
314
344
letcwd: string|undefined;
@@ -344,52 +374,24 @@ async function runEpisode(
344
374
lettasksExecuted=0;
345
375
letusage: Usage={input: 0,output: 0,cost: 0};
346
376
constepisodeActions: string[]=[];
347
-
letepisodeDuration=0;
348
377
349
378
for(consttaskoftasks){
350
379
constlogPrefix=`${prefix}${task.commit}`;
351
380
352
381
try{
353
-
letsuccessfulRunDuration=0;
354
-
// TODO: retrying the agent runs here means if the agent did half of the work, the next agent would come up and continue those changes which is not correct.
355
-
// the agent should start from a clean state again and do the work. so the whole loop should be restarted.
0 commit comments