|
| 1 | +import { db } from '@sim/db' |
| 2 | +import { |
| 3 | + a2aAgent, |
| 4 | + knowledgeBase, |
| 5 | + mcpServers, |
| 6 | + memory, |
| 7 | + subscription, |
| 8 | + userTableDefinitions, |
| 9 | + workflow, |
| 10 | + workflowFolder, |
| 11 | + workflowMcpServer, |
| 12 | + workspace, |
| 13 | + workspaceFile, |
| 14 | + workspaceFiles, |
| 15 | +} from '@sim/db/schema' |
| 16 | +import { createLogger } from '@sim/logger' |
| 17 | +import { and, eq, inArray, isNotNull, isNull, lt, or, sql } from 'drizzle-orm' |
| 18 | +import type { PgColumn, PgTable } from 'drizzle-orm/pg-core' |
| 19 | +import { type NextRequest, NextResponse } from 'next/server' |
| 20 | +import { verifyCronAuth } from '@/lib/auth/internal' |
| 21 | +import { sqlIsPaid, sqlIsPro, sqlIsTeam } from '@/lib/billing/plan-helpers' |
| 22 | +import { ENTITLED_SUBSCRIPTION_STATUSES } from '@/lib/billing/subscriptions/utils' |
| 23 | +import { env } from '@/lib/core/config/env' |
| 24 | +import { isUsingCloudStorage, StorageService } from '@/lib/uploads' |
| 25 | + |
| 26 | +export const dynamic = 'force-dynamic' |
| 27 | + |
| 28 | +const logger = createLogger('SoftDeleteCleanupAPI') |
| 29 | + |
| 30 | +const BATCH_SIZE = 2000 |
| 31 | +const MAX_BATCHES_PER_TABLE = 10 |
| 32 | + |
| 33 | +interface TableCleanupResult { |
| 34 | + table: string |
| 35 | + deleted: number |
| 36 | + failed: number |
| 37 | +} |
| 38 | + |
| 39 | +/** |
| 40 | + * Batch-delete rows from a table where the soft-delete column is older than the retention date, |
| 41 | + * scoped to the given workspace IDs. |
| 42 | + */ |
| 43 | +async function cleanupTable( |
| 44 | + tableDef: PgTable, |
| 45 | + softDeleteCol: PgColumn, |
| 46 | + workspaceIdCol: PgColumn, |
| 47 | + workspaceIds: string[], |
| 48 | + retentionDate: Date, |
| 49 | + tableName: string |
| 50 | +): Promise<TableCleanupResult> { |
| 51 | + const result: TableCleanupResult = { table: tableName, deleted: 0, failed: 0 } |
| 52 | + |
| 53 | + if (workspaceIds.length === 0) { |
| 54 | + logger.info(`[${tableName}] Skipped — no workspaces in this tier`) |
| 55 | + return result |
| 56 | + } |
| 57 | + |
| 58 | + let batchesProcessed = 0 |
| 59 | + let hasMore = true |
| 60 | + |
| 61 | + while (hasMore && batchesProcessed < MAX_BATCHES_PER_TABLE) { |
| 62 | + try { |
| 63 | + const deleted = await db |
| 64 | + .delete(tableDef) |
| 65 | + .where( |
| 66 | + and( |
| 67 | + inArray(workspaceIdCol, workspaceIds), |
| 68 | + isNotNull(softDeleteCol), |
| 69 | + lt(softDeleteCol, retentionDate) |
| 70 | + ) |
| 71 | + ) |
| 72 | + .returning({ id: sql`id` }) |
| 73 | + |
| 74 | + result.deleted += deleted.length |
| 75 | + hasMore = deleted.length === BATCH_SIZE |
| 76 | + batchesProcessed++ |
| 77 | + |
| 78 | + if (deleted.length > 0) { |
| 79 | + logger.info(`[${tableName}] Batch ${batchesProcessed}: deleted ${deleted.length} rows`) |
| 80 | + } else { |
| 81 | + logger.info(`[${tableName}] No expired soft-deleted rows found`) |
| 82 | + } |
| 83 | + } catch (error) { |
| 84 | + result.failed++ |
| 85 | + logger.error(`[${tableName}] Batch delete failed:`, { error }) |
| 86 | + hasMore = false |
| 87 | + } |
| 88 | + } |
| 89 | + |
| 90 | + return result |
| 91 | +} |
| 92 | + |
| 93 | +/** |
| 94 | + * Clean up soft-deleted workspace files from cloud storage before hard-deleting. |
| 95 | + */ |
| 96 | +async function cleanupWorkspaceFileStorage( |
| 97 | + workspaceIds: string[], |
| 98 | + retentionDate: Date |
| 99 | +): Promise<{ filesDeleted: number; filesFailed: number }> { |
| 100 | + const stats = { filesDeleted: 0, filesFailed: 0 } |
| 101 | + |
| 102 | + if (!isUsingCloudStorage() || workspaceIds.length === 0) return stats |
| 103 | + |
| 104 | + // Fetch keys of files about to be deleted |
| 105 | + const filesToDelete = await db |
| 106 | + .select({ key: workspaceFiles.key }) |
| 107 | + .from(workspaceFiles) |
| 108 | + .where( |
| 109 | + and( |
| 110 | + inArray(workspaceFiles.workspaceId, workspaceIds), |
| 111 | + isNotNull(workspaceFiles.deletedAt), |
| 112 | + lt(workspaceFiles.deletedAt, retentionDate) |
| 113 | + ) |
| 114 | + ) |
| 115 | + .limit(BATCH_SIZE * MAX_BATCHES_PER_TABLE) |
| 116 | + |
| 117 | + for (const file of filesToDelete) { |
| 118 | + try { |
| 119 | + await StorageService.deleteFile({ key: file.key, context: 'workspace' }) |
| 120 | + stats.filesDeleted++ |
| 121 | + } catch (error) { |
| 122 | + stats.filesFailed++ |
| 123 | + logger.error(`Failed to delete storage file ${file.key}:`, { error }) |
| 124 | + } |
| 125 | + } |
| 126 | + |
| 127 | + return stats |
| 128 | +} |
| 129 | + |
| 130 | +/** All tables to clean up with their soft-delete column and workspace column. */ |
| 131 | +const CLEANUP_TARGETS = [ |
| 132 | + { table: workflow, softDeleteCol: workflow.archivedAt, wsCol: workflow.workspaceId, name: 'workflow' }, |
| 133 | + { table: workflowFolder, softDeleteCol: workflowFolder.archivedAt, wsCol: workflowFolder.workspaceId, name: 'workflowFolder' }, |
| 134 | + { table: knowledgeBase, softDeleteCol: knowledgeBase.deletedAt, wsCol: knowledgeBase.workspaceId, name: 'knowledgeBase' }, |
| 135 | + { table: userTableDefinitions, softDeleteCol: userTableDefinitions.archivedAt, wsCol: userTableDefinitions.workspaceId, name: 'userTableDefinitions' }, |
| 136 | + { table: workspaceFile, softDeleteCol: workspaceFile.deletedAt, wsCol: workspaceFile.workspaceId, name: 'workspaceFile' }, |
| 137 | + { table: workspaceFiles, softDeleteCol: workspaceFiles.deletedAt, wsCol: workspaceFiles.workspaceId, name: 'workspaceFiles' }, |
| 138 | + { table: memory, softDeleteCol: memory.deletedAt, wsCol: memory.workspaceId, name: 'memory' }, |
| 139 | + { table: mcpServers, softDeleteCol: mcpServers.deletedAt, wsCol: mcpServers.workspaceId, name: 'mcpServers' }, |
| 140 | + { table: workflowMcpServer, softDeleteCol: workflowMcpServer.deletedAt, wsCol: workflowMcpServer.workspaceId, name: 'workflowMcpServer' }, |
| 141 | + { table: a2aAgent, softDeleteCol: a2aAgent.archivedAt, wsCol: a2aAgent.workspaceId, name: 'a2aAgent' }, |
| 142 | +] as const |
| 143 | + |
| 144 | +async function cleanupTier( |
| 145 | + workspaceIds: string[], |
| 146 | + retentionDate: Date, |
| 147 | + tierLabel: string |
| 148 | +): Promise<{ tables: TableCleanupResult[]; filesDeleted: number; filesFailed: number }> { |
| 149 | + const tables: TableCleanupResult[] = [] |
| 150 | + |
| 151 | + if (workspaceIds.length === 0) { |
| 152 | + return { tables, filesDeleted: 0, filesFailed: 0 } |
| 153 | + } |
| 154 | + |
| 155 | + // Clean cloud storage files before hard-deleting file metadata rows |
| 156 | + const fileStats = await cleanupWorkspaceFileStorage(workspaceIds, retentionDate) |
| 157 | + |
| 158 | + for (const target of CLEANUP_TARGETS) { |
| 159 | + const result = await cleanupTable( |
| 160 | + target.table, |
| 161 | + target.softDeleteCol, |
| 162 | + target.wsCol, |
| 163 | + workspaceIds, |
| 164 | + retentionDate, |
| 165 | + `${tierLabel}/${target.name}` |
| 166 | + ) |
| 167 | + tables.push(result) |
| 168 | + } |
| 169 | + |
| 170 | + return { tables, ...fileStats } |
| 171 | +} |
| 172 | + |
| 173 | +export async function GET(request: NextRequest) { |
| 174 | + try { |
| 175 | + const authError = verifyCronAuth(request, 'soft-delete cleanup') |
| 176 | + if (authError) return authError |
| 177 | + |
| 178 | + const startTime = Date.now() |
| 179 | + |
| 180 | + const freeRetentionDays = Number(env.FREE_PLAN_LOG_RETENTION_DAYS || '7') |
| 181 | + const paidRetentionDays = Number(env.PAID_PLAN_LOG_RETENTION_DAYS || '30') |
| 182 | + |
| 183 | + const freeRetentionDate = new Date(Date.now() - freeRetentionDays * 24 * 60 * 60 * 1000) |
| 184 | + const paidRetentionDate = new Date(Date.now() - paidRetentionDays * 24 * 60 * 60 * 1000) |
| 185 | + |
| 186 | + logger.info('Starting soft-delete cleanup', { |
| 187 | + freeRetentionDays, |
| 188 | + paidRetentionDays, |
| 189 | + freeRetentionDate: freeRetentionDate.toISOString(), |
| 190 | + paidRetentionDate: paidRetentionDate.toISOString(), |
| 191 | + }) |
| 192 | + |
| 193 | + // --- Group 1: Free workspaces --- |
| 194 | + |
| 195 | + const freeWorkspaceRows = await db |
| 196 | + .select({ id: workspace.id }) |
| 197 | + .from(workspace) |
| 198 | + .leftJoin( |
| 199 | + subscription, |
| 200 | + and( |
| 201 | + eq(subscription.referenceId, workspace.billedAccountUserId), |
| 202 | + inArray(subscription.status, ENTITLED_SUBSCRIPTION_STATUSES), |
| 203 | + sqlIsPaid(subscription.plan) |
| 204 | + ) |
| 205 | + ) |
| 206 | + .where(and(isNull(subscription.id), isNull(workspace.archivedAt))) |
| 207 | + |
| 208 | + const freeIds = freeWorkspaceRows.map((w) => w.id) |
| 209 | + logger.info(`[free] Found ${freeIds.length} workspaces, retention cutoff: ${freeRetentionDate.toISOString()}`) |
| 210 | + const freeResults = await cleanupTier(freeIds, freeRetentionDate, 'free') |
| 211 | + logger.info(`[free] Result: ${freeResults.tables.reduce((s, t) => s + t.deleted, 0)} total rows deleted across ${CLEANUP_TARGETS.length} tables`) |
| 212 | + |
| 213 | + // --- Group 2: Pro/Team workspaces --- |
| 214 | + |
| 215 | + const paidWorkspaceRows = await db |
| 216 | + .select({ id: workspace.id }) |
| 217 | + .from(workspace) |
| 218 | + .innerJoin( |
| 219 | + subscription, |
| 220 | + and( |
| 221 | + eq(subscription.referenceId, workspace.billedAccountUserId), |
| 222 | + inArray(subscription.status, ENTITLED_SUBSCRIPTION_STATUSES), |
| 223 | + or(sqlIsPro(subscription.plan)!, sqlIsTeam(subscription.plan)!) |
| 224 | + ) |
| 225 | + ) |
| 226 | + .where(isNull(workspace.archivedAt)) |
| 227 | + |
| 228 | + const paidIds = paidWorkspaceRows.map((w) => w.id) |
| 229 | + logger.info(`[paid] Found ${paidIds.length} workspaces, retention cutoff: ${paidRetentionDate.toISOString()}`) |
| 230 | + const paidResults = await cleanupTier(paidIds, paidRetentionDate, 'paid') |
| 231 | + logger.info(`[paid] Result: ${paidResults.tables.reduce((s, t) => s + t.deleted, 0)} total rows deleted across ${CLEANUP_TARGETS.length} tables`) |
| 232 | + |
| 233 | + // --- Group 3: Enterprise with custom softDeleteRetentionHours --- |
| 234 | + |
| 235 | + const enterpriseWorkspaceRows = await db |
| 236 | + .select({ |
| 237 | + id: workspace.id, |
| 238 | + softDeleteRetentionHours: workspace.softDeleteRetentionHours, |
| 239 | + }) |
| 240 | + .from(workspace) |
| 241 | + .innerJoin( |
| 242 | + subscription, |
| 243 | + and( |
| 244 | + eq(subscription.referenceId, workspace.billedAccountUserId), |
| 245 | + inArray(subscription.status, ENTITLED_SUBSCRIPTION_STATUSES), |
| 246 | + eq(subscription.plan, 'enterprise') |
| 247 | + ) |
| 248 | + ) |
| 249 | + .where( |
| 250 | + and(isNull(workspace.archivedAt), isNotNull(workspace.softDeleteRetentionHours)) |
| 251 | + ) |
| 252 | + |
| 253 | + const enterpriseGroups = new Map<number, string[]>() |
| 254 | + for (const ws of enterpriseWorkspaceRows) { |
| 255 | + const hours = ws.softDeleteRetentionHours! |
| 256 | + const group = enterpriseGroups.get(hours) ?? [] |
| 257 | + group.push(ws.id) |
| 258 | + enterpriseGroups.set(hours, group) |
| 259 | + } |
| 260 | + |
| 261 | + logger.info(`[enterprise] Found ${enterpriseWorkspaceRows.length} workspaces with custom retention (${enterpriseGroups.size} distinct retention periods). Workspaces with NULL retention are skipped.`) |
| 262 | + |
| 263 | + const enterpriseTables: TableCleanupResult[] = [] |
| 264 | + let enterpriseFilesDeleted = 0 |
| 265 | + let enterpriseFilesFailed = 0 |
| 266 | + |
| 267 | + for (const [hours, ids] of enterpriseGroups) { |
| 268 | + const retentionDate = new Date(Date.now() - hours * 60 * 60 * 1000) |
| 269 | + logger.info(`[enterprise-${hours}h] Processing ${ids.length} workspaces, retention cutoff: ${retentionDate.toISOString()}`) |
| 270 | + const groupResults = await cleanupTier(ids, retentionDate, `enterprise-${hours}h`) |
| 271 | + enterpriseTables.push(...groupResults.tables) |
| 272 | + enterpriseFilesDeleted += groupResults.filesDeleted |
| 273 | + enterpriseFilesFailed += groupResults.filesFailed |
| 274 | + } |
| 275 | + |
| 276 | + const timeElapsed = (Date.now() - startTime) / 1000 |
| 277 | + |
| 278 | + const totalDeleted = (results: { tables: TableCleanupResult[] }) => |
| 279 | + results.tables.reduce((sum, t) => sum + t.deleted, 0) |
| 280 | + |
| 281 | + return NextResponse.json({ |
| 282 | + message: `Soft-delete cleanup completed in ${timeElapsed.toFixed(2)}s`, |
| 283 | + tiers: { |
| 284 | + free: { |
| 285 | + workspaces: freeIds.length, |
| 286 | + retentionDays: freeRetentionDays, |
| 287 | + totalDeleted: totalDeleted(freeResults), |
| 288 | + filesDeleted: freeResults.filesDeleted, |
| 289 | + filesFailed: freeResults.filesFailed, |
| 290 | + tables: freeResults.tables, |
| 291 | + }, |
| 292 | + paid: { |
| 293 | + workspaces: paidIds.length, |
| 294 | + retentionDays: paidRetentionDays, |
| 295 | + totalDeleted: totalDeleted(paidResults), |
| 296 | + filesDeleted: paidResults.filesDeleted, |
| 297 | + filesFailed: paidResults.filesFailed, |
| 298 | + tables: paidResults.tables, |
| 299 | + }, |
| 300 | + enterprise: { |
| 301 | + workspaces: enterpriseWorkspaceRows.length, |
| 302 | + groups: enterpriseGroups.size, |
| 303 | + totalDeleted: enterpriseTables.reduce((sum, t) => sum + t.deleted, 0), |
| 304 | + filesDeleted: enterpriseFilesDeleted, |
| 305 | + filesFailed: enterpriseFilesFailed, |
| 306 | + tables: enterpriseTables, |
| 307 | + }, |
| 308 | + }, |
| 309 | + }) |
| 310 | + } catch (error) { |
| 311 | + logger.error('Error in soft-delete cleanup:', { error }) |
| 312 | + return NextResponse.json({ error: 'Failed to process soft-delete cleanup' }, { status: 500 }) |
| 313 | + } |
| 314 | +} |
0 commit comments