Skip to content

Commit 94850d2

Browse files
committed
feat: add cost and overall summary to evals
1 parent ad84a3a commit 94850d2

1 file changed

Lines changed: 53 additions & 24 deletions

File tree

cli.ts

Lines changed: 53 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,10 @@ async function fetchSessionTokensAndCost(sessionID: string): Promise<{
113113
function generateLogSummary(allLogs: string[]): string {
114114
const toolCalls: Record<string, number> = {};
115115
const modifiedFiles = new Set<string>();
116-
const readFiles = new Set<string>();
116+
const narrativeTexts: string[] = [];
117117
let errorCount = 0;
118-
let totalMessages = 0;
119118

120119
for (const log of allLogs) {
121-
totalMessages++;
122-
123120
// Check for errors
124121
if (log.includes("ERROR:")) {
125122
errorCount++;
@@ -129,6 +126,15 @@ function generateLogSummary(allLogs: string[]): string {
129126
try {
130127
const parsed = JSON.parse(log.replace("ERROR: ", ""));
131128

129+
// Extract narrative from TextPart objects
130+
if (parsed?.type === "text" && typeof parsed.text === "string") {
131+
// Filter out very short or empty text
132+
const text = parsed.text.trim();
133+
if (text.length > 10) {
134+
narrativeTexts.push(text);
135+
}
136+
}
137+
132138
// Extract tool usage from ToolPart objects
133139
if (parsed?.type === "tool" && typeof parsed.tool === "string") {
134140
const toolName = parsed.tool;
@@ -143,50 +149,73 @@ function generateLogSummary(allLogs: string[]): string {
143149
}
144150
});
145151
}
146-
147-
// Extract file reads from FilePart objects
148-
if (parsed?.type === "file" && typeof parsed.path === "string") {
149-
readFiles.add(parsed.path);
150-
}
151152
} catch {
152153
// Not JSON or doesn't match expected format, skip
153154
}
154155
}
155156

156-
// Build summary parts
157-
const parts: string[] = [];
157+
// Build narrative summary
158+
const summaryParts: string[] = [];
159+
160+
// Add narrative if we have text
161+
if (narrativeTexts.length > 0) {
162+
// Create a condensed narrative by taking key sentences
163+
const opening = narrativeTexts[0]; // First statement (approach)
164+
const keySteps: string[] = [];
165+
166+
// Extract key action statements (sentences with "implement", "add", "create", "edit", etc.)
167+
const actionWords = /\b(implement|add|create|edit|modif|updat|fix|test|verif|check|read|writ|run)\w*/i;
168+
for (const text of narrativeTexts.slice(1)) {
169+
if (actionWords.test(text) && keySteps.length < 3) {
170+
keySteps.push(text);
171+
}
172+
}
173+
174+
// Combine into narrative
175+
let narrative = opening;
176+
if (keySteps.length > 0) {
177+
narrative += " " + keySteps.join(" ");
178+
}
179+
180+
// Truncate if extremely long (keep generous limit for detailed summaries)
181+
if (narrative.length > 10000) {
182+
narrative = narrative.substring(0, 9997) + "...";
183+
}
184+
185+
summaryParts.push(narrative);
186+
}
187+
188+
// Add metadata summary
189+
const metadata: string[] = [];
158190

159-
// Add modified files summary
160191
if (modifiedFiles.size > 0) {
161192
const fileList = Array.from(modifiedFiles).sort();
162-
if (fileList.length <= 5) {
163-
parts.push(`Modified files: ${fileList.join(", ")}`);
193+
if (fileList.length <= 3) {
194+
metadata.push(`Modified: ${fileList.join(", ")}`);
164195
} else {
165-
parts.push(`Modified ${fileList.length} files (${fileList.slice(0, 3).join(", ")}, ...)`);
196+
metadata.push(`Modified ${fileList.length} files`);
166197
}
167198
}
168199

169-
// Add tool usage summary
170200
if (Object.keys(toolCalls).length > 0) {
171201
const toolSummary = Object.entries(toolCalls)
172202
.sort(([a], [b]) => a.localeCompare(b))
173203
.map(([tool, count]) => `${tool}(${count})`)
174204
.join(", ");
175-
parts.push(`Tools: ${toolSummary}`);
205+
metadata.push(`Tools: ${toolSummary}`);
176206
}
177207

178-
// Add file reads summary (optional, only if significant)
179-
if (readFiles.size > 10) {
180-
parts.push(`Read ${readFiles.size} files`);
208+
if (metadata.length > 0) {
209+
summaryParts.push(`[${metadata.join("; ")}]`);
181210
}
182211

183-
// Add basic stats
184-
parts.push(`${totalMessages} messages`);
185212
if (errorCount > 0) {
186-
parts.push(`${errorCount} errors`);
213+
summaryParts.push(`⚠️ ${errorCount} errors`);
187214
}
188215

189-
return parts.length > 0 ? parts.join(", ") : "No activity detected";
216+
return summaryParts.length > 0
217+
? summaryParts.join(" ")
218+
: "No activity detected";
190219
}
191220

192221
async function printHelp(): Promise<void> {

0 commit comments

Comments
 (0)