Skip to content

Commit 63f7b59

Browse files
add bucket attachment cleanup
1 parent ea55f09 commit 63f7b59

File tree

5 files changed

+240
-434
lines changed

5 files changed

+240
-434
lines changed

apps/sim/background/cleanup-soft-deletes.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { db } from '@sim/db'
22
import {
33
a2aAgent,
4+
copilotChats,
45
knowledgeBase,
56
mcpServers,
67
memory,
@@ -21,6 +22,7 @@ import {
2122
getRetentionDefaultHours,
2223
resolveTierWorkspaceIds,
2324
} from '@/lib/billing/cleanup-dispatcher'
25+
import { prepareChatCleanup } from '@/lib/cleanup/chat-cleanup'
2426
import { isUsingCloudStorage, StorageService } from '@/lib/uploads'
2527

2628
const logger = createLogger('CleanupSoftDeletes')
@@ -181,6 +183,33 @@ export async function runCleanupSoftDeletes(payload: CleanupJobPayload): Promise
181183
`[${tierLabel}] Processing ${workspaceIds.length} workspaces, cutoff: ${retentionDate.toISOString()}`
182184
)
183185

186+
// Find chats linked to workflows that are about to be cascade-deleted
187+
const doomedWorkflows = await db
188+
.select({ id: workflow.id })
189+
.from(workflow)
190+
.where(
191+
and(
192+
inArray(workflow.workspaceId, workspaceIds),
193+
isNotNull(workflow.archivedAt),
194+
lt(workflow.archivedAt, retentionDate)
195+
)
196+
)
197+
198+
const doomedWorkflowIds = doomedWorkflows.map((w) => w.id)
199+
let chatCleanup: { execute: () => Promise<void> } | null = null
200+
201+
if (doomedWorkflowIds.length > 0) {
202+
const doomedChats = await db
203+
.select({ id: copilotChats.id })
204+
.from(copilotChats)
205+
.where(inArray(copilotChats.workflowId, doomedWorkflowIds))
206+
207+
const doomedChatIds = doomedChats.map((c) => c.id)
208+
if (doomedChatIds.length > 0) {
209+
chatCleanup = await prepareChatCleanup(doomedChatIds, tierLabel)
210+
}
211+
}
212+
184213
const fileStats = await cleanupWorkspaceFileStorage(workspaceIds, retentionDate)
185214

186215
let totalDeleted = 0
@@ -200,6 +229,11 @@ export async function runCleanupSoftDeletes(payload: CleanupJobPayload): Promise
200229
`[${tierLabel}] Complete: ${totalDeleted} rows deleted, ${fileStats.filesDeleted} files cleaned`
201230
)
202231

232+
// Clean up copilot backend + chat storage files after DB rows are gone
233+
if (chatCleanup) {
234+
await chatCleanup.execute()
235+
}
236+
203237
const timeElapsed = (Date.now() - startTime) / 1000
204238
logger.info(`[${tierLabel}] Job completed in ${timeElapsed.toFixed(2)}s`)
205239
}

apps/sim/background/cleanup-tasks.ts

Lines changed: 12 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ import {
1717
getRetentionDefaultHours,
1818
resolveTierWorkspaceIds,
1919
} from '@/lib/billing/cleanup-dispatcher'
20-
import { SIM_AGENT_API_URL } from '@/lib/copilot/constants'
21-
import { env } from '@/lib/core/config/env'
20+
import { prepareChatCleanup } from '@/lib/cleanup/chat-cleanup'
2221

2322
const logger = createLogger('CleanupTasks')
2423

@@ -87,7 +86,7 @@ async function cleanupRunChildren(
8786
const runIds = await db
8887
.select({ id: copilotRuns.id })
8988
.from(copilotRuns)
90-
.where(and(inArray(copilotRuns.workspaceId, workspaceIds), lt(copilotRuns.createdAt, retentionDate)))
89+
.where(and(inArray(copilotRuns.workspaceId, workspaceIds), lt(copilotRuns.updatedAt, retentionDate)))
9190
.limit(BATCH_SIZE * MAX_BATCHES_PER_TABLE)
9291

9392
if (runIds.length === 0) {
@@ -139,61 +138,6 @@ async function cleanupRunChildren(
139138
return results
140139
}
141140

142-
const COPILOT_CLEANUP_BATCH_SIZE = 1000
143-
144-
/**
145-
* Call the copilot backend to delete chat data (memory_files, checkpoints, task_chains, etc.)
146-
* before we delete the Sim DB rows. Chunked at 1000 per request.
147-
*/
148-
async function cleanupCopilotBackend(
149-
chatIds: string[],
150-
tierLabel: string
151-
): Promise<{ deleted: number; failed: number }> {
152-
const stats = { deleted: 0, failed: 0 }
153-
154-
if (chatIds.length === 0 || !env.COPILOT_API_KEY) {
155-
if (!env.COPILOT_API_KEY) {
156-
logger.warn(`[${tierLabel}] COPILOT_API_KEY not set, skipping copilot backend cleanup`)
157-
}
158-
return stats
159-
}
160-
161-
for (let i = 0; i < chatIds.length; i += COPILOT_CLEANUP_BATCH_SIZE) {
162-
const chunk = chatIds.slice(i, i + COPILOT_CLEANUP_BATCH_SIZE)
163-
try {
164-
const response = await fetch(`${SIM_AGENT_API_URL}/api/tasks/cleanup`, {
165-
method: 'POST',
166-
headers: {
167-
'Content-Type': 'application/json',
168-
'x-api-key': env.COPILOT_API_KEY,
169-
},
170-
body: JSON.stringify({ chatIds: chunk }),
171-
})
172-
173-
if (!response.ok) {
174-
const errorBody = await response.text().catch(() => '')
175-
logger.error(`[${tierLabel}] Copilot backend cleanup failed: ${response.status}`, {
176-
errorBody,
177-
chatCount: chunk.length,
178-
})
179-
stats.failed += chunk.length
180-
continue
181-
}
182-
183-
const result = await response.json()
184-
stats.deleted += result.deleted ?? 0
185-
logger.info(
186-
`[${tierLabel}] Copilot backend cleanup: ${result.deleted} chats deleted (batch ${Math.floor(i / COPILOT_CLEANUP_BATCH_SIZE) + 1})`
187-
)
188-
} catch (error) {
189-
stats.failed += chunk.length
190-
logger.error(`[${tierLabel}] Copilot backend cleanup request failed:`, { error })
191-
}
192-
}
193-
194-
return stats
195-
}
196-
197141
async function resolvePayload(payload: CleanupJobPayload): Promise<{
198142
workspaceIds: string[]
199143
retentionHours: number
@@ -252,13 +196,16 @@ export async function runCleanupTasks(payload: CleanupJobPayload): Promise<void>
252196
.where(
253197
and(
254198
inArray(copilotChats.workspaceId, workspaceIds),
255-
lt(copilotChats.createdAt, retentionDate)
199+
lt(copilotChats.updatedAt, retentionDate)
256200
)
257201
)
258202
.limit(BATCH_SIZE * MAX_BATCHES_PER_TABLE)
259203

260204
const doomedChatIds = doomedChats.map((c) => c.id)
261205

206+
// Prepare chat cleanup (collect file keys + copilot backend call) BEFORE DB deletion
207+
const chatCleanup = await prepareChatCleanup(doomedChatIds, tierLabel)
208+
262209
// Delete run children first (checkpoints, tool calls) since they reference runs
263210
const runChildResults = await cleanupRunChildren(workspaceIds, retentionDate, tierLabel)
264211
for (const r of runChildResults) {
@@ -278,7 +225,7 @@ export async function runCleanupTasks(payload: CleanupJobPayload): Promise<void>
278225
.where(
279226
and(
280227
inArray(copilotChats.workspaceId, workspaceIds),
281-
lt(copilotChats.createdAt, retentionDate)
228+
lt(copilotChats.updatedAt, retentionDate)
282229
)
283230
)
284231
.limit(BATCH_SIZE * MAX_BATCHES_PER_TABLE)
@@ -287,7 +234,7 @@ export async function runCleanupTasks(payload: CleanupJobPayload): Promise<void>
287234
const deleted = await db
288235
.delete(copilotFeedback)
289236
.where(inArray(copilotFeedback.chatId, chatIds.map((c) => c.id)))
290-
.returning({ id: sql`id` })
237+
.returning({ id: copilotFeedback.feedbackId })
291238
feedbackResult.deleted = deleted.length
292239
logger.info(`[${feedbackResult.table}] Deleted ${deleted.length} rows`)
293240
} else {
@@ -302,7 +249,7 @@ export async function runCleanupTasks(payload: CleanupJobPayload): Promise<void>
302249
const runsResult = await cleanupTable(
303250
copilotRuns,
304251
copilotRuns.workspaceId,
305-
copilotRuns.createdAt,
252+
copilotRuns.updatedAt,
306253
workspaceIds,
307254
retentionDate,
308255
`${tierLabel}/copilotRuns`
@@ -312,7 +259,7 @@ export async function runCleanupTasks(payload: CleanupJobPayload): Promise<void>
312259
const chatsResult = await cleanupTable(
313260
copilotChats,
314261
copilotChats.workspaceId,
315-
copilotChats.createdAt,
262+
copilotChats.updatedAt,
316263
workspaceIds,
317264
retentionDate,
318265
`${tierLabel}/copilotChats`
@@ -336,13 +283,8 @@ export async function runCleanupTasks(payload: CleanupJobPayload): Promise<void>
336283

337284
logger.info(`[${tierLabel}] Complete: ${totalDeleted} total rows deleted`)
338285

339-
// Clean up copilot backend after Sim DB rows are gone (chat no longer accessible)
340-
if (doomedChatIds.length > 0) {
341-
const copilotResult = await cleanupCopilotBackend(doomedChatIds, tierLabel)
342-
logger.info(
343-
`[${tierLabel}] Copilot backend: ${copilotResult.deleted} deleted, ${copilotResult.failed} failed`
344-
)
345-
}
286+
// Clean up copilot backend + storage files after DB rows are gone
287+
await chatCleanup.execute()
346288

347289
const timeElapsed = (Date.now() - startTime) / 1000
348290
logger.info(`Task cleanup completed in ${timeElapsed.toFixed(2)}s`)

0 commit comments

Comments
 (0)