Skip to content

Commit 6dddc3f

Browse files
waleedlatif1claude
andauthored
fix(brightdata): fix async Discover API, echo-back fields, and registry ordering (#4188)
* fix(brightdata): use params for echo-back fields in transformResponse transformResponse receives params as its second argument. Use it to return the original url, query, snapshotId, and searchEngine values instead of hardcoding null or extracting from response data that may not contain them. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(brightdata): handle async Discover API with polling The Bright Data Discover API is asynchronous — POST /discover returns a task_id, and results must be polled via GET /discover?task_id=... The previous implementation incorrectly treated it as synchronous, always returning empty results. Uses postProcess (matching Firecrawl crawl pattern) to poll every 3s with a 120s timeout until status is "done". Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(brightdata): alphabetize block registry entry Move box before brandfetch/brightdata to maintain alphabetical ordering. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * lint * fix(brightdata): return error objects instead of throwing in postProcess The executor wraps postProcess in try-catch and falls back to the intermediate transformResponse result on error, which has success: true with empty results. Throwing errors would silently return empty results. Match Firecrawl's pattern: return { ...result, success: false, error } instead of throwing. Also add taskId to BrightDataDiscoverResponse type to eliminate unsafe casts. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(brightdata): use platform execution timeout for Discover polling Replace hardcoded 120s timeout with DEFAULT_EXECUTION_TIMEOUT_MS to match Firecrawl and other async polling tools. Respects platform- configured limits (300s free, 3000s paid). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent cd8c5bd commit 6dddc3f

File tree

9 files changed

+126
-37
lines changed

9 files changed

+126
-37
lines changed

apps/docs/content/docs/en/tools/agiloft.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import { BlockInfoCard } from "@/components/ui/block-info-card"
77

88
<BlockInfoCard
99
type="agiloft"
10-
color="#263A5C"
10+
color="#FFFFFF"
1111
/>
1212

1313
{/* MANUAL-CONTENT-START:intro */}

apps/sim/blocks/registry.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,9 +245,9 @@ export const registry: Record<string, BlockConfig> = {
245245
ashby: AshbyBlock,
246246
athena: AthenaBlock,
247247
attio: AttioBlock,
248+
box: BoxBlock,
248249
brandfetch: BrandfetchBlock,
249250
brightdata: BrightDataBlock,
250-
box: BoxBlock,
251251
browser_use: BrowserUseBlock,
252252
calcom: CalComBlock,
253253
calendly: CalendlyBlock,

apps/sim/tools/brightdata/cancel_snapshot.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,17 @@ export const brightDataCancelSnapshotTool: ToolConfig<
3838
}),
3939
},
4040

41-
transformResponse: async (response: Response) => {
41+
transformResponse: async (response: Response, params) => {
4242
if (!response.ok) {
4343
const errorText = await response.text()
4444
throw new Error(errorText || `Cancel snapshot failed with status ${response.status}`)
4545
}
4646

47-
const data = (await response.json().catch(() => null)) as Record<string, unknown> | null
47+
await response.json().catch(() => null)
4848
return {
4949
success: true,
5050
output: {
51-
snapshotId: (data?.snapshot_id as string) ?? null,
51+
snapshotId: params?.snapshotId ?? null,
5252
cancelled: true,
5353
},
5454
}

apps/sim/tools/brightdata/discover.ts

Lines changed: 107 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
import { createLogger } from '@sim/logger'
2+
import { DEFAULT_EXECUTION_TIMEOUT_MS } from '@/lib/core/execution-limits'
13
import type { BrightDataDiscoverParams, BrightDataDiscoverResponse } from '@/tools/brightdata/types'
24
import type { ToolConfig } from '@/tools/types'
35

6+
const logger = createLogger('tools:brightdata:discover')
7+
8+
const POLL_INTERVAL_MS = 3000
9+
const MAX_POLL_TIME_MS = DEFAULT_EXECUTION_TIMEOUT_MS
10+
411
export const brightDataDiscoverTool: ToolConfig<
512
BrightDataDiscoverParams,
613
BrightDataDiscoverResponse
@@ -84,45 +91,120 @@ export const brightDataDiscoverTool: ToolConfig<
8491
},
8592
},
8693

87-
transformResponse: async (response: Response) => {
94+
transformResponse: async (response: Response, params) => {
8895
if (!response.ok) {
8996
const errorText = await response.text()
9097
throw new Error(errorText || `Discover request failed with status ${response.status}`)
9198
}
9299

93100
const data = await response.json()
94101

95-
let results: Array<{
96-
url: string | null
97-
title: string | null
98-
description: string | null
99-
relevanceScore: number | null
100-
content: string | null
101-
}> = []
102-
103-
const items = Array.isArray(data) ? data : (data?.results ?? data?.data ?? [])
104-
105-
if (Array.isArray(items)) {
106-
results = items.map((item: Record<string, unknown>) => ({
107-
url: (item.link as string) ?? (item.url as string) ?? null,
108-
title: (item.title as string) ?? null,
109-
description: (item.description as string) ?? (item.snippet as string) ?? null,
110-
relevanceScore: (item.relevance_score as number) ?? null,
111-
content:
112-
(item.content as string) ?? (item.text as string) ?? (item.markdown as string) ?? null,
113-
}))
114-
}
115-
116102
return {
117103
success: true,
118104
output: {
119-
results,
120-
query: null,
121-
totalResults: results.length,
105+
results: [],
106+
query: params?.query ?? null,
107+
totalResults: 0,
108+
taskId: data.task_id ?? null,
122109
},
123110
}
124111
},
125112

113+
postProcess: async (result, params) => {
114+
if (!result.success) return result
115+
116+
const taskId = result.output.taskId
117+
if (!taskId) {
118+
return {
119+
...result,
120+
success: false,
121+
error: 'Discover API did not return a task_id. Cannot poll for results.',
122+
}
123+
}
124+
125+
logger.info(`Bright Data Discover task ${taskId} created, polling for results...`)
126+
127+
let elapsedTime = 0
128+
129+
while (elapsedTime < MAX_POLL_TIME_MS) {
130+
try {
131+
const pollResponse = await fetch(
132+
`https://api.brightdata.com/discover?task_id=${encodeURIComponent(taskId)}`,
133+
{
134+
method: 'GET',
135+
headers: {
136+
Authorization: `Bearer ${params.apiKey}`,
137+
},
138+
}
139+
)
140+
141+
if (!pollResponse.ok) {
142+
return {
143+
...result,
144+
success: false,
145+
error: `Failed to poll discover results: ${pollResponse.statusText}`,
146+
}
147+
}
148+
149+
const data = await pollResponse.json()
150+
logger.info(`Bright Data Discover task ${taskId} status: ${data.status}`)
151+
152+
if (data.status === 'done') {
153+
const items = Array.isArray(data.results) ? data.results : []
154+
155+
const results = items.map((item: Record<string, unknown>) => ({
156+
url: (item.link as string) ?? (item.url as string) ?? null,
157+
title: (item.title as string) ?? null,
158+
description: (item.description as string) ?? (item.snippet as string) ?? null,
159+
relevanceScore: (item.relevance_score as number) ?? null,
160+
content: (item.content as string) ?? null,
161+
}))
162+
163+
return {
164+
success: true,
165+
output: {
166+
results,
167+
query: params.query ?? null,
168+
totalResults: results.length,
169+
},
170+
}
171+
}
172+
173+
if (data.status === 'failed' || data.status === 'error') {
174+
return {
175+
...result,
176+
success: false,
177+
error: `Discover task failed: ${data.error ?? 'Unknown error'}`,
178+
}
179+
}
180+
181+
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS))
182+
elapsedTime += POLL_INTERVAL_MS
183+
} catch (error) {
184+
logger.error('Error polling for discover task:', {
185+
message: error instanceof Error ? error.message : String(error),
186+
taskId,
187+
})
188+
189+
return {
190+
...result,
191+
success: false,
192+
error: `Error polling for discover task: ${error instanceof Error ? error.message : String(error)}`,
193+
}
194+
}
195+
}
196+
197+
logger.warn(
198+
`Discover task ${taskId} did not complete within the maximum polling time (${MAX_POLL_TIME_MS / 1000}s)`
199+
)
200+
201+
return {
202+
...result,
203+
success: false,
204+
error: `Discover task ${taskId} timed out after ${MAX_POLL_TIME_MS / 1000}s. Check status manually.`,
205+
}
206+
},
207+
126208
outputs: {
127209
results: {
128210
type: 'array',

apps/sim/tools/brightdata/download_snapshot.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ export const brightDataDownloadSnapshotTool: ToolConfig<
5656
}),
5757
},
5858

59-
transformResponse: async (response: Response) => {
59+
transformResponse: async (response: Response, params) => {
6060
if (response.status === 409) {
6161
throw new Error(
6262
'Snapshot is not ready for download. Check the snapshot status first and wait until it is "ready".'
@@ -89,7 +89,7 @@ export const brightDataDownloadSnapshotTool: ToolConfig<
8989
output: {
9090
data,
9191
format: contentType,
92-
snapshotId: (data[0]?.snapshot_id as string) ?? null,
92+
snapshotId: params?.snapshotId ?? null,
9393
},
9494
}
9595
},

apps/sim/tools/brightdata/scrape_url.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ export const brightDataScrapeUrlTool: ToolConfig<
6666
},
6767
},
6868

69-
transformResponse: async (response: Response) => {
69+
transformResponse: async (response: Response, params) => {
7070
const contentType = response.headers.get('content-type') || ''
7171

7272
if (!response.ok) {
@@ -86,7 +86,7 @@ export const brightDataScrapeUrlTool: ToolConfig<
8686
success: true,
8787
output: {
8888
content,
89-
url: null,
89+
url: params?.url ?? null,
9090
statusCode: response.status,
9191
},
9292
}

apps/sim/tools/brightdata/serp_search.ts

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ export const brightDataSerpSearchTool: ToolConfig<
129129
},
130130
},
131131

132-
transformResponse: async (response: Response) => {
132+
transformResponse: async (response: Response, params) => {
133133
if (!response.ok) {
134134
const errorText = await response.text()
135135
throw new Error(errorText || `SERP request failed with status ${response.status}`)
@@ -178,9 +178,14 @@ export const brightDataSerpSearchTool: ToolConfig<
178178
success: true,
179179
output: {
180180
results,
181-
query: ((data?.general as Record<string, unknown> | undefined)?.query as string) ?? null,
181+
query:
182+
((data?.general as Record<string, unknown> | undefined)?.query as string) ??
183+
params?.query ??
184+
null,
182185
searchEngine:
183-
((data?.general as Record<string, unknown> | undefined)?.search_engine as string) ?? null,
186+
((data?.general as Record<string, unknown> | undefined)?.search_engine as string) ??
187+
params?.searchEngine ??
188+
null,
184189
},
185190
}
186191
},

apps/sim/tools/brightdata/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ export interface BrightDataDiscoverResponse extends ToolResponse {
131131
}>
132132
query: string | null
133133
totalResults: number
134+
taskId?: string | null
134135
}
135136
}
136137

bun.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)