1- import { spawnSync } from "node:child_process" ;
1+ import { spawn } from "node:child_process" ;
22import { strict as assert } from "node:assert" ;
33
44import { generateObject } from "ai" ;
@@ -125,22 +125,21 @@ const COMMAND_TIMEOUT_MS = 5 * 60 * 1000;
125125type ChecksConfig = z . infer < typeof commandConfigSchema > ;
126126
127127export default createScore < PreparedCheck [ ] , ChecksConfig > ( {
128- prepare : ( { cwd, evaluation, config } ) => {
128+ prepare : async ( { cwd, evaluation, config } ) => {
129129 const parsedConfig = commandConfigSchema . parse ( config ?? { } ) ;
130130
131- parsedConfig . setup . forEach ( ( command ) => {
132- const result = runCommand ( command , cwd ) ;
131+ for ( const command of parsedConfig . setup ) {
132+ const result = await runCommand ( command , cwd ) ;
133133 logSetupExecution ( command , result ) ;
134- } ) ;
134+ }
135135
136- const results : PreparedCheck [ ] = parsedConfig . commands . map ( ( command ) => {
137- const baseline = runCommand ( command , cwd ) ;
136+ const results : PreparedCheck [ ] = [ ] ;
137+
138+ for ( const command of parsedConfig . commands ) {
139+ const baseline = await runCommand ( command , cwd ) ;
138140 logExecution ( "baseline" , command , baseline ) ;
139- return {
140- command,
141- baseline,
142- } ;
143- } ) ;
141+ results . push ( { command, baseline } ) ;
142+ }
144143
145144 assert (
146145 results . length > 0 ,
@@ -152,12 +151,12 @@ export default createScore<PreparedCheck[], ChecksConfig>({
152151 evaluate : async ( { evaluation, cwd, judge, reference, config : _config } ) => {
153152 finalizeAgentChanges ( evaluation , cwd , evaluation . from ) ;
154153
155- reference . forEach ( ( entry ) => {
154+ for ( const entry of reference ) {
156155 if ( ! entry . after ) {
157- entry . after = runCommand ( entry . command , cwd ) ;
156+ entry . after = await runCommand ( entry . command , cwd ) ;
158157 logExecution ( "after" , entry . command , entry . after ) ;
159158 }
160- } ) ;
159+ }
161160
162161 const prompt = buildJudgePrompt ( reference ) ;
163162
@@ -181,46 +180,67 @@ export default createScore<PreparedCheck[], ChecksConfig>({
181180 } ,
182181} ) ;
183182
184- function runCommand ( command : string , cwd : string ) : CommandExecution {
183+ async function runCommand ( command : string , cwd : string ) : Promise < CommandExecution > {
185184 const start = Date . now ( ) ;
186- const result = spawnSync ( command , {
187- cwd,
188- shell : true ,
189- encoding : "utf8" ,
190- timeout : COMMAND_TIMEOUT_MS ,
191- env : {
192- ...process . env ,
193- CI : process . env . CI ?? "1" ,
194- } ,
195- } ) ;
196185
197- const runtimeMs = Date . now ( ) - start ;
198- const stdout = ( result . stdout ?? "" ) . toString ( ) ;
199- const stderr = ( result . stderr ?? "" ) . toString ( ) ;
186+ return await new Promise < CommandExecution > ( ( resolve ) => {
187+ const child = spawn ( command , {
188+ cwd,
189+ shell : true ,
190+ env : {
191+ ...process . env ,
192+ CI : process . env . CI ?? "1" ,
193+ } ,
194+ stdio : [ "ignore" , "pipe" , "pipe" ] ,
195+ } ) ;
200196
201- let exitCode : number | null = null ;
202- let success = false ;
203- let errorMessage : string | undefined ;
197+ let stdout = "" ;
198+ let stderr = "" ;
199+ let errorMessage : string | undefined ;
200+ let timeout : NodeJS . Timeout | undefined ;
201+ let settled = false ;
204202
205- if ( typeof result . status === " number" ) {
206- exitCode = result . status ;
207- success = exitCode === 0 ;
208- }
203+ const finalize = ( exitCode : number | null ) => {
204+ if ( settled ) return ;
205+ settled = true ;
206+ if ( timeout ) clearTimeout ( timeout ) ;
209207
210- if ( result . error ) {
211- success = false ;
212- errorMessage = result . error . message ;
213- }
208+ const runtimeMs = Date . now ( ) - start ;
209+ const success = exitCode === 0 && ! errorMessage ;
210+
211+ resolve ( {
212+ command,
213+ success,
214+ exitCode,
215+ stdout,
216+ stderr,
217+ runtimeMs,
218+ errorMessage,
219+ } ) ;
220+ } ;
221+
222+ timeout = setTimeout ( ( ) => {
223+ errorMessage = `Timed out after ${ COMMAND_TIMEOUT_MS } ms` ;
224+ child . kill ( "SIGKILL" ) ;
225+ } , COMMAND_TIMEOUT_MS ) ;
226+
227+ child . stdout ?. on ( "data" , ( chunk ) => {
228+ stdout += chunk . toString ( ) ;
229+ } ) ;
214230
215- return {
216- command,
217- success,
218- exitCode,
219- stdout,
220- stderr,
221- runtimeMs,
222- errorMessage,
223- } ;
231+ child . stderr ?. on ( "data" , ( chunk ) => {
232+ stderr += chunk . toString ( ) ;
233+ } ) ;
234+
235+ child . on ( "error" , ( error ) => {
236+ errorMessage = error . message ;
237+ } ) ;
238+
239+ child . on ( "close" , ( code ) => {
240+ const exitCode = typeof code === "number" ? code : null ;
241+ finalize ( exitCode ) ;
242+ } ) ;
243+ } ) ;
224244}
225245
226246function buildJudgePrompt ( entries : PreparedCheck [ ] ) : string {
0 commit comments