|
| 1 | +import { createLogger } from '@sim/logger' |
| 2 | +import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/core/execution-limits' |
1 | 3 | import type { BrightDataDiscoverParams, BrightDataDiscoverResponse } from '@/tools/brightdata/types' |
2 | 4 | import type { ToolConfig } from '@/tools/types' |
3 | 5 |
|
| 6 | +const logger = createLogger('tools:brightdata:discover') |
| 7 | + |
| 8 | +const POLL_INTERVAL_MS = 3000 |
| 9 | +const MAX_POLL_TIME_MS = DEFAULT_EXECUTION_TIMEOUT_MS |
| 10 | + |
4 | 11 | export const brightDataDiscoverTool: ToolConfig< |
5 | 12 | BrightDataDiscoverParams, |
6 | 13 | BrightDataDiscoverResponse |
@@ -84,45 +91,120 @@ export const brightDataDiscoverTool: ToolConfig< |
84 | 91 | }, |
85 | 92 | }, |
86 | 93 |
|
87 | | - transformResponse: async (response: Response) => { |
| 94 | + transformResponse: async (response: Response, params) => { |
88 | 95 | if (!response.ok) { |
89 | 96 | const errorText = await response.text() |
90 | 97 | throw new Error(errorText || `Discover request failed with status ${response.status}`) |
91 | 98 | } |
92 | 99 |
|
93 | 100 | const data = await response.json() |
94 | 101 |
|
95 | | - let results: Array<{ |
96 | | - url: string | null |
97 | | - title: string | null |
98 | | - description: string | null |
99 | | - relevanceScore: number | null |
100 | | - content: string | null |
101 | | - }> = [] |
102 | | - |
103 | | - const items = Array.isArray(data) ? data : (data?.results ?? data?.data ?? []) |
104 | | - |
105 | | - if (Array.isArray(items)) { |
106 | | - results = items.map((item: Record<string, unknown>) => ({ |
107 | | - url: (item.link as string) ?? (item.url as string) ?? null, |
108 | | - title: (item.title as string) ?? null, |
109 | | - description: (item.description as string) ?? (item.snippet as string) ?? null, |
110 | | - relevanceScore: (item.relevance_score as number) ?? null, |
111 | | - content: |
112 | | - (item.content as string) ?? (item.text as string) ?? (item.markdown as string) ?? null, |
113 | | - })) |
114 | | - } |
115 | | - |
116 | 102 | return { |
117 | 103 | success: true, |
118 | 104 | output: { |
119 | | - results, |
120 | | - query: null, |
121 | | - totalResults: results.length, |
| 105 | + results: [], |
| 106 | + query: params?.query ?? null, |
| 107 | + totalResults: 0, |
| 108 | + taskId: data.task_id ?? null, |
122 | 109 | }, |
123 | 110 | } |
124 | 111 | }, |
125 | 112 |
|
| 113 | + postProcess: async (result, params) => { |
| 114 | + if (!result.success) return result |
| 115 | + |
| 116 | + const taskId = result.output.taskId |
| 117 | + if (!taskId) { |
| 118 | + return { |
| 119 | + ...result, |
| 120 | + success: false, |
| 121 | + error: 'Discover API did not return a task_id. Cannot poll for results.', |
| 122 | + } |
| 123 | + } |
| 124 | + |
| 125 | + logger.info(`Bright Data Discover task ${taskId} created, polling for results...`) |
| 126 | + |
| 127 | + let elapsedTime = 0 |
| 128 | + |
| 129 | + while (elapsedTime < MAX_POLL_TIME_MS) { |
| 130 | + try { |
| 131 | + const pollResponse = await fetch( |
| 132 | + `https://api.brightdata.com/discover?task_id=${encodeURIComponent(taskId)}`, |
| 133 | + { |
| 134 | + method: 'GET', |
| 135 | + headers: { |
| 136 | + Authorization: `Bearer ${params.apiKey}`, |
| 137 | + }, |
| 138 | + } |
| 139 | + ) |
| 140 | + |
| 141 | + if (!pollResponse.ok) { |
| 142 | + return { |
| 143 | + ...result, |
| 144 | + success: false, |
| 145 | + error: `Failed to poll discover results: ${pollResponse.statusText}`, |
| 146 | + } |
| 147 | + } |
| 148 | + |
| 149 | + const data = await pollResponse.json() |
| 150 | + logger.info(`Bright Data Discover task ${taskId} status: ${data.status}`) |
| 151 | + |
| 152 | + if (data.status === 'done') { |
| 153 | + const items = Array.isArray(data.results) ? data.results : [] |
| 154 | + |
| 155 | + const results = items.map((item: Record<string, unknown>) => ({ |
| 156 | + url: (item.link as string) ?? (item.url as string) ?? null, |
| 157 | + title: (item.title as string) ?? null, |
| 158 | + description: (item.description as string) ?? (item.snippet as string) ?? null, |
| 159 | + relevanceScore: (item.relevance_score as number) ?? null, |
| 160 | + content: (item.content as string) ?? null, |
| 161 | + })) |
| 162 | + |
| 163 | + return { |
| 164 | + success: true, |
| 165 | + output: { |
| 166 | + results, |
| 167 | + query: params.query ?? null, |
| 168 | + totalResults: results.length, |
| 169 | + }, |
| 170 | + } |
| 171 | + } |
| 172 | + |
| 173 | + if (data.status === 'failed' || data.status === 'error') { |
| 174 | + return { |
| 175 | + ...result, |
| 176 | + success: false, |
| 177 | + error: `Discover task failed: ${data.error ?? 'Unknown error'}`, |
| 178 | + } |
| 179 | + } |
| 180 | + |
| 181 | + await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS)) |
| 182 | + elapsedTime += POLL_INTERVAL_MS |
| 183 | + } catch (error) { |
| 184 | + logger.error('Error polling for discover task:', { |
| 185 | + message: error instanceof Error ? error.message : String(error), |
| 186 | + taskId, |
| 187 | + }) |
| 188 | + |
| 189 | + return { |
| 190 | + ...result, |
| 191 | + success: false, |
| 192 | + error: `Error polling for discover task: ${error instanceof Error ? error.message : String(error)}`, |
| 193 | + } |
| 194 | + } |
| 195 | + } |
| 196 | + |
| 197 | + logger.warn( |
| 198 | + `Discover task ${taskId} did not complete within the maximum polling time (${MAX_POLL_TIME_MS / 1000}s)` |
| 199 | + ) |
| 200 | + |
| 201 | + return { |
| 202 | + ...result, |
| 203 | + success: false, |
| 204 | + error: `Discover task ${taskId} timed out after ${MAX_POLL_TIME_MS / 1000}s. Check status manually.`, |
| 205 | + } |
| 206 | + }, |
| 207 | + |
126 | 208 | outputs: { |
127 | 209 | results: { |
128 | 210 | type: 'array', |
|
0 commit comments