Skip to content

Commit 6c6c691

Browse files
Add configurable cleanup for soft delete and logs
1 parent 351873a commit 6c6c691

12 files changed

Lines changed: 1441 additions & 180 deletions

File tree

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
import { db } from '@sim/db'
2+
import {
3+
a2aAgent,
4+
knowledgeBase,
5+
mcpServers,
6+
memory,
7+
subscription,
8+
userTableDefinitions,
9+
workflow,
10+
workflowFolder,
11+
workflowMcpServer,
12+
workspace,
13+
workspaceFile,
14+
workspaceFiles,
15+
} from '@sim/db/schema'
16+
import { createLogger } from '@sim/logger'
17+
import { and, eq, inArray, isNotNull, isNull, lt, or, sql } from 'drizzle-orm'
18+
import type { PgColumn, PgTable } from 'drizzle-orm/pg-core'
19+
import { type NextRequest, NextResponse } from 'next/server'
20+
import { verifyCronAuth } from '@/lib/auth/internal'
21+
import { sqlIsPaid, sqlIsPro, sqlIsTeam } from '@/lib/billing/plan-helpers'
22+
import { ENTITLED_SUBSCRIPTION_STATUSES } from '@/lib/billing/subscriptions/utils'
23+
import { env } from '@/lib/core/config/env'
24+
import { isUsingCloudStorage, StorageService } from '@/lib/uploads'
25+
26+
export const dynamic = 'force-dynamic'
27+
28+
const logger = createLogger('SoftDeleteCleanupAPI')
29+
30+
const BATCH_SIZE = 2000
31+
const MAX_BATCHES_PER_TABLE = 10
32+
33+
interface TableCleanupResult {
34+
table: string
35+
deleted: number
36+
failed: number
37+
}
38+
39+
/**
40+
* Batch-delete rows from a table where the soft-delete column is older than the retention date,
41+
* scoped to the given workspace IDs.
42+
*/
43+
async function cleanupTable(
44+
tableDef: PgTable,
45+
softDeleteCol: PgColumn,
46+
workspaceIdCol: PgColumn,
47+
workspaceIds: string[],
48+
retentionDate: Date,
49+
tableName: string
50+
): Promise<TableCleanupResult> {
51+
const result: TableCleanupResult = { table: tableName, deleted: 0, failed: 0 }
52+
53+
if (workspaceIds.length === 0) {
54+
logger.info(`[${tableName}] Skipped — no workspaces in this tier`)
55+
return result
56+
}
57+
58+
let batchesProcessed = 0
59+
let hasMore = true
60+
61+
while (hasMore && batchesProcessed < MAX_BATCHES_PER_TABLE) {
62+
try {
63+
const deleted = await db
64+
.delete(tableDef)
65+
.where(
66+
and(
67+
inArray(workspaceIdCol, workspaceIds),
68+
isNotNull(softDeleteCol),
69+
lt(softDeleteCol, retentionDate)
70+
)
71+
)
72+
.returning({ id: sql`id` })
73+
74+
result.deleted += deleted.length
75+
hasMore = deleted.length === BATCH_SIZE
76+
batchesProcessed++
77+
78+
if (deleted.length > 0) {
79+
logger.info(`[${tableName}] Batch ${batchesProcessed}: deleted ${deleted.length} rows`)
80+
} else {
81+
logger.info(`[${tableName}] No expired soft-deleted rows found`)
82+
}
83+
} catch (error) {
84+
result.failed++
85+
logger.error(`[${tableName}] Batch delete failed:`, { error })
86+
hasMore = false
87+
}
88+
}
89+
90+
return result
91+
}
92+
93+
/**
94+
* Clean up soft-deleted workspace files from cloud storage before hard-deleting.
95+
*/
96+
async function cleanupWorkspaceFileStorage(
97+
workspaceIds: string[],
98+
retentionDate: Date
99+
): Promise<{ filesDeleted: number; filesFailed: number }> {
100+
const stats = { filesDeleted: 0, filesFailed: 0 }
101+
102+
if (!isUsingCloudStorage() || workspaceIds.length === 0) return stats
103+
104+
// Fetch keys of files about to be deleted
105+
const filesToDelete = await db
106+
.select({ key: workspaceFiles.key })
107+
.from(workspaceFiles)
108+
.where(
109+
and(
110+
inArray(workspaceFiles.workspaceId, workspaceIds),
111+
isNotNull(workspaceFiles.deletedAt),
112+
lt(workspaceFiles.deletedAt, retentionDate)
113+
)
114+
)
115+
.limit(BATCH_SIZE * MAX_BATCHES_PER_TABLE)
116+
117+
for (const file of filesToDelete) {
118+
try {
119+
await StorageService.deleteFile({ key: file.key, context: 'workspace' })
120+
stats.filesDeleted++
121+
} catch (error) {
122+
stats.filesFailed++
123+
logger.error(`Failed to delete storage file ${file.key}:`, { error })
124+
}
125+
}
126+
127+
return stats
128+
}
129+
130+
/** All tables to clean up with their soft-delete column and workspace column. */
131+
const CLEANUP_TARGETS = [
132+
{ table: workflow, softDeleteCol: workflow.archivedAt, wsCol: workflow.workspaceId, name: 'workflow' },
133+
{ table: workflowFolder, softDeleteCol: workflowFolder.archivedAt, wsCol: workflowFolder.workspaceId, name: 'workflowFolder' },
134+
{ table: knowledgeBase, softDeleteCol: knowledgeBase.deletedAt, wsCol: knowledgeBase.workspaceId, name: 'knowledgeBase' },
135+
{ table: userTableDefinitions, softDeleteCol: userTableDefinitions.archivedAt, wsCol: userTableDefinitions.workspaceId, name: 'userTableDefinitions' },
136+
{ table: workspaceFile, softDeleteCol: workspaceFile.deletedAt, wsCol: workspaceFile.workspaceId, name: 'workspaceFile' },
137+
{ table: workspaceFiles, softDeleteCol: workspaceFiles.deletedAt, wsCol: workspaceFiles.workspaceId, name: 'workspaceFiles' },
138+
{ table: memory, softDeleteCol: memory.deletedAt, wsCol: memory.workspaceId, name: 'memory' },
139+
{ table: mcpServers, softDeleteCol: mcpServers.deletedAt, wsCol: mcpServers.workspaceId, name: 'mcpServers' },
140+
{ table: workflowMcpServer, softDeleteCol: workflowMcpServer.deletedAt, wsCol: workflowMcpServer.workspaceId, name: 'workflowMcpServer' },
141+
{ table: a2aAgent, softDeleteCol: a2aAgent.archivedAt, wsCol: a2aAgent.workspaceId, name: 'a2aAgent' },
142+
] as const
143+
144+
async function cleanupTier(
145+
workspaceIds: string[],
146+
retentionDate: Date,
147+
tierLabel: string
148+
): Promise<{ tables: TableCleanupResult[]; filesDeleted: number; filesFailed: number }> {
149+
const tables: TableCleanupResult[] = []
150+
151+
if (workspaceIds.length === 0) {
152+
return { tables, filesDeleted: 0, filesFailed: 0 }
153+
}
154+
155+
// Clean cloud storage files before hard-deleting file metadata rows
156+
const fileStats = await cleanupWorkspaceFileStorage(workspaceIds, retentionDate)
157+
158+
for (const target of CLEANUP_TARGETS) {
159+
const result = await cleanupTable(
160+
target.table,
161+
target.softDeleteCol,
162+
target.wsCol,
163+
workspaceIds,
164+
retentionDate,
165+
`${tierLabel}/${target.name}`
166+
)
167+
tables.push(result)
168+
}
169+
170+
return { tables, ...fileStats }
171+
}
172+
173+
export async function GET(request: NextRequest) {
174+
try {
175+
const authError = verifyCronAuth(request, 'soft-delete cleanup')
176+
if (authError) return authError
177+
178+
const startTime = Date.now()
179+
180+
const freeRetentionDays = Number(env.FREE_PLAN_LOG_RETENTION_DAYS || '7')
181+
const paidRetentionDays = Number(env.PAID_PLAN_LOG_RETENTION_DAYS || '30')
182+
183+
const freeRetentionDate = new Date(Date.now() - freeRetentionDays * 24 * 60 * 60 * 1000)
184+
const paidRetentionDate = new Date(Date.now() - paidRetentionDays * 24 * 60 * 60 * 1000)
185+
186+
logger.info('Starting soft-delete cleanup', {
187+
freeRetentionDays,
188+
paidRetentionDays,
189+
freeRetentionDate: freeRetentionDate.toISOString(),
190+
paidRetentionDate: paidRetentionDate.toISOString(),
191+
})
192+
193+
// --- Group 1: Free workspaces ---
194+
195+
const freeWorkspaceRows = await db
196+
.select({ id: workspace.id })
197+
.from(workspace)
198+
.leftJoin(
199+
subscription,
200+
and(
201+
eq(subscription.referenceId, workspace.billedAccountUserId),
202+
inArray(subscription.status, ENTITLED_SUBSCRIPTION_STATUSES),
203+
sqlIsPaid(subscription.plan)
204+
)
205+
)
206+
.where(and(isNull(subscription.id), isNull(workspace.archivedAt)))
207+
208+
const freeIds = freeWorkspaceRows.map((w) => w.id)
209+
logger.info(`[free] Found ${freeIds.length} workspaces, retention cutoff: ${freeRetentionDate.toISOString()}`)
210+
const freeResults = await cleanupTier(freeIds, freeRetentionDate, 'free')
211+
logger.info(`[free] Result: ${freeResults.tables.reduce((s, t) => s + t.deleted, 0)} total rows deleted across ${CLEANUP_TARGETS.length} tables`)
212+
213+
// --- Group 2: Pro/Team workspaces ---
214+
215+
const paidWorkspaceRows = await db
216+
.select({ id: workspace.id })
217+
.from(workspace)
218+
.innerJoin(
219+
subscription,
220+
and(
221+
eq(subscription.referenceId, workspace.billedAccountUserId),
222+
inArray(subscription.status, ENTITLED_SUBSCRIPTION_STATUSES),
223+
or(sqlIsPro(subscription.plan)!, sqlIsTeam(subscription.plan)!)
224+
)
225+
)
226+
.where(isNull(workspace.archivedAt))
227+
228+
const paidIds = paidWorkspaceRows.map((w) => w.id)
229+
logger.info(`[paid] Found ${paidIds.length} workspaces, retention cutoff: ${paidRetentionDate.toISOString()}`)
230+
const paidResults = await cleanupTier(paidIds, paidRetentionDate, 'paid')
231+
logger.info(`[paid] Result: ${paidResults.tables.reduce((s, t) => s + t.deleted, 0)} total rows deleted across ${CLEANUP_TARGETS.length} tables`)
232+
233+
// --- Group 3: Enterprise with custom softDeleteRetentionHours ---
234+
235+
const enterpriseWorkspaceRows = await db
236+
.select({
237+
id: workspace.id,
238+
softDeleteRetentionHours: workspace.softDeleteRetentionHours,
239+
})
240+
.from(workspace)
241+
.innerJoin(
242+
subscription,
243+
and(
244+
eq(subscription.referenceId, workspace.billedAccountUserId),
245+
inArray(subscription.status, ENTITLED_SUBSCRIPTION_STATUSES),
246+
eq(subscription.plan, 'enterprise')
247+
)
248+
)
249+
.where(
250+
and(isNull(workspace.archivedAt), isNotNull(workspace.softDeleteRetentionHours))
251+
)
252+
253+
const enterpriseGroups = new Map<number, string[]>()
254+
for (const ws of enterpriseWorkspaceRows) {
255+
const hours = ws.softDeleteRetentionHours!
256+
const group = enterpriseGroups.get(hours) ?? []
257+
group.push(ws.id)
258+
enterpriseGroups.set(hours, group)
259+
}
260+
261+
logger.info(`[enterprise] Found ${enterpriseWorkspaceRows.length} workspaces with custom retention (${enterpriseGroups.size} distinct retention periods). Workspaces with NULL retention are skipped.`)
262+
263+
const enterpriseTables: TableCleanupResult[] = []
264+
let enterpriseFilesDeleted = 0
265+
let enterpriseFilesFailed = 0
266+
267+
for (const [hours, ids] of enterpriseGroups) {
268+
const retentionDate = new Date(Date.now() - hours * 60 * 60 * 1000)
269+
logger.info(`[enterprise-${hours}h] Processing ${ids.length} workspaces, retention cutoff: ${retentionDate.toISOString()}`)
270+
const groupResults = await cleanupTier(ids, retentionDate, `enterprise-${hours}h`)
271+
enterpriseTables.push(...groupResults.tables)
272+
enterpriseFilesDeleted += groupResults.filesDeleted
273+
enterpriseFilesFailed += groupResults.filesFailed
274+
}
275+
276+
const timeElapsed = (Date.now() - startTime) / 1000
277+
278+
const totalDeleted = (results: { tables: TableCleanupResult[] }) =>
279+
results.tables.reduce((sum, t) => sum + t.deleted, 0)
280+
281+
return NextResponse.json({
282+
message: `Soft-delete cleanup completed in ${timeElapsed.toFixed(2)}s`,
283+
tiers: {
284+
free: {
285+
workspaces: freeIds.length,
286+
retentionDays: freeRetentionDays,
287+
totalDeleted: totalDeleted(freeResults),
288+
filesDeleted: freeResults.filesDeleted,
289+
filesFailed: freeResults.filesFailed,
290+
tables: freeResults.tables,
291+
},
292+
paid: {
293+
workspaces: paidIds.length,
294+
retentionDays: paidRetentionDays,
295+
totalDeleted: totalDeleted(paidResults),
296+
filesDeleted: paidResults.filesDeleted,
297+
filesFailed: paidResults.filesFailed,
298+
tables: paidResults.tables,
299+
},
300+
enterprise: {
301+
workspaces: enterpriseWorkspaceRows.length,
302+
groups: enterpriseGroups.size,
303+
totalDeleted: enterpriseTables.reduce((sum, t) => sum + t.deleted, 0),
304+
filesDeleted: enterpriseFilesDeleted,
305+
filesFailed: enterpriseFilesFailed,
306+
tables: enterpriseTables,
307+
},
308+
},
309+
})
310+
} catch (error) {
311+
logger.error('Error in soft-delete cleanup:', { error })
312+
return NextResponse.json({ error: 'Failed to process soft-delete cleanup' }, { status: 500 })
313+
}
314+
}

0 commit comments

Comments
 (0)