Skip to content

Commit a39dc15

Browse files
waleedlatif1claude
andauthored
feat(brightdata): add Bright Data integration with 8 tools (#4183)
* feat(brightdata): add Bright Data integration with 8 tools Add complete Bright Data integration supporting Web Unlocker, SERP API, Discover API, and Web Scraper dataset operations. Includes scrape URL, SERP search, discover, sync scrape, scrape dataset, snapshot status, download snapshot, and cancel snapshot tools. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(brightdata): address PR review feedback - Fix truncated "Download Snapshot" description in integrations.json and docs - Map engine-specific query params (num/count/numdoc, hl/setLang/lang/kl, gl/cc/lr) per search engine instead of using Google-specific params for all - Attempt to parse snapshot_id from cancel/download response bodies instead of hardcoding null Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * lint * fix(agiloft): change bgColor to white; fix docs truncation Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(brightdata): avoid inner quotes in description to fix docs generation The docs generator regex truncates at inner quotes. Reword the download_snapshot description to avoid embedded double quotes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * fix(brightdata): disable incompatible DuckDuckGo and Yandex URL params DuckDuckGo kl expects region-language format (us-en) and Yandex lr expects numeric region IDs (213), not plain two-letter codes. Disable these URL-level params since Bright Data normalizes localization through the body-level country param. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 05c1c5b commit a39dc15

File tree

21 files changed

+1780
-2
lines changed

21 files changed

+1780
-2
lines changed

apps/docs/components/icons.tsx

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2087,6 +2087,21 @@ export function BrandfetchIcon(props: SVGProps<SVGSVGElement>) {
20872087
)
20882088
}
20892089

2090+
export function BrightDataIcon(props: SVGProps<SVGSVGElement>) {
2091+
return (
2092+
<svg {...props} viewBox='54 93 22 52' fill='none' xmlns='http://www.w3.org/2000/svg'>
2093+
<path
2094+
d='M62 95.21c.19 2.16 1.85 3.24 2.82 4.74.25.38.48.11.67-.16.21-.31.6-1.21 1.15-1.28-.35 1.38-.04 3.15.16 4.45.49 3.05-1.22 5.64-4.07 6.18-3.38.65-6.22-2.21-5.6-5.62.23-1.24 1.37-2.5.77-3.7-.85-1.7.54-.52.79-.22 1.04 1.2 1.21.09 1.45-.55.24-.63.31-1.31.47-1.97.19-.77.55-1.4 1.39-1.87z'
2095+
fill='currentColor'
2096+
/>
2097+
<path
2098+
d='M66.70 123.37c0 3.69.04 7.38-.03 11.07-.02 1.04.31 1.48 1.32 1.49.29 0 .59.12.88.13.93.01 1.18.47 1.16 1.37-.05 2.19 0 2.19-2.24 2.19-3.48 0-6.96-.04-10.44.03-1.09.02-1.47-.33-1.3-1.36.02-.12.02-.26 0-.38-.28-1.39.39-1.96 1.7-1.9 1.36.06 1.76-.51 1.74-1.88-.09-5.17-.08-10.35 0-15.53.02-1.22-.32-1.87-1.52-2.17-.57-.14-1.47-.11-1.57-.85-.15-1.04-.05-2.11.01-3.17.02-.34.44-.35.73-.39 2.81-.39 5.63-.77 8.44-1.18.92-.14 1.15.2 1.14 1.09-.04 3.8-.02 7.62-.02 11.44z'
2099+
fill='currentColor'
2100+
/>
2101+
</svg>
2102+
)
2103+
}
2104+
20902105
export function BrowserUseIcon(props: SVGProps<SVGSVGElement>) {
20912106
return (
20922107
<svg

apps/docs/components/ui/icon-mapping.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import {
2323
BoxCompanyIcon,
2424
BrainIcon,
2525
BrandfetchIcon,
26+
BrightDataIcon,
2627
BrowserUseIcon,
2728
CalComIcon,
2829
CalendlyIcon,
@@ -215,6 +216,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
215216
attio: AttioIcon,
216217
box: BoxCompanyIcon,
217218
brandfetch: BrandfetchIcon,
219+
brightdata: BrightDataIcon,
218220
browser_use: BrowserUseIcon,
219221
calcom: CalComIcon,
220222
calendly: CalendlyIcon,
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
---
2+
title: Bright Data
3+
description: Scrape websites, search engines, and extract structured data
4+
---
5+
6+
import { BlockInfoCard } from "@/components/ui/block-info-card"
7+
8+
<BlockInfoCard
9+
type="brightdata"
10+
color="#FFFFFF"
11+
/>
12+
13+
## Usage Instructions
14+
15+
Integrate Bright Data into the workflow. Scrape any URL with Web Unlocker, search Google and other engines with SERP API, discover web content ranked by intent, or trigger pre-built scrapers for structured data extraction.
16+
17+
18+
19+
## Tools
20+
21+
### `brightdata_scrape_url`
22+
23+
Fetch content from any URL using Bright Data Web Unlocker. Bypasses anti-bot protections, CAPTCHAs, and IP blocks automatically.
24+
25+
#### Input
26+
27+
| Parameter | Type | Required | Description |
28+
| --------- | ---- | -------- | ----------- |
29+
| `apiKey` | string | Yes | Bright Data API token |
30+
| `zone` | string | Yes | Web Unlocker zone name from your Bright Data dashboard \(e.g., "web_unlocker1"\) |
31+
| `url` | string | Yes | The URL to scrape \(e.g., "https://example.com/page"\) |
32+
| `format` | string | No | Response format: "raw" for HTML or "json" for parsed content. Defaults to "raw" |
33+
| `country` | string | No | Two-letter country code for geo-targeting \(e.g., "us", "gb"\) |
34+
35+
#### Output
36+
37+
| Parameter | Type | Description |
38+
| --------- | ---- | ----------- |
39+
| `content` | string | The scraped page content \(HTML or JSON depending on format\) |
40+
| `url` | string | The URL that was scraped |
41+
| `statusCode` | number | HTTP status code of the response |
42+
43+
### `brightdata_serp_search`
44+
45+
Search Google, Bing, DuckDuckGo, or Yandex and get structured search results using Bright Data SERP API.
46+
47+
#### Input
48+
49+
| Parameter | Type | Required | Description |
50+
| --------- | ---- | -------- | ----------- |
51+
| `apiKey` | string | Yes | Bright Data API token |
52+
| `zone` | string | Yes | SERP API zone name from your Bright Data dashboard \(e.g., "serp_api1"\) |
53+
| `query` | string | Yes | The search query \(e.g., "best project management tools"\) |
54+
| `searchEngine` | string | No | Search engine to use: "google", "bing", "duckduckgo", or "yandex". Defaults to "google" |
55+
| `country` | string | No | Two-letter country code for localized results \(e.g., "us", "gb"\) |
56+
| `language` | string | No | Two-letter language code \(e.g., "en", "es"\) |
57+
| `numResults` | number | No | Number of results to return \(e.g., 10, 20\). Defaults to 10 |
58+
59+
#### Output
60+
61+
| Parameter | Type | Description |
62+
| --------- | ---- | ----------- |
63+
| `results` | array | Array of search results |
64+
|`title` | string | Title of the search result |
65+
|`url` | string | URL of the search result |
66+
|`description` | string | Snippet or description of the result |
67+
|`rank` | number | Position in search results |
68+
| `query` | string | The search query that was executed |
69+
| `searchEngine` | string | The search engine that was used |
70+
71+
### `brightdata_discover`
72+
73+
AI-powered web discovery that finds and ranks results by intent. Returns up to 1,000 results with optional cleaned page content for RAG and verification.
74+
75+
#### Input
76+
77+
| Parameter | Type | Required | Description |
78+
| --------- | ---- | -------- | ----------- |
79+
| `apiKey` | string | Yes | Bright Data API token |
80+
| `query` | string | Yes | The search query \(e.g., "competitor pricing changes enterprise plan"\) |
81+
| `numResults` | number | No | Number of results to return, up to 1000. Defaults to 10 |
82+
| `intent` | string | No | Describes what the agent is trying to accomplish, used to rank results by relevance \(e.g., "find official pricing pages and change notes"\) |
83+
| `includeContent` | boolean | No | Whether to include cleaned page content in results |
84+
| `format` | string | No | Response format: "json" or "markdown". Defaults to "json" |
85+
| `language` | string | No | Search language code \(e.g., "en", "es", "fr"\). Defaults to "en" |
86+
| `country` | string | No | Two-letter ISO country code for localized results \(e.g., "us", "gb"\) |
87+
88+
#### Output
89+
90+
| Parameter | Type | Description |
91+
| --------- | ---- | ----------- |
92+
| `results` | array | Array of discovered web results ranked by intent relevance |
93+
|`url` | string | URL of the discovered page |
94+
|`title` | string | Page title |
95+
|`description` | string | Page description or snippet |
96+
|`relevanceScore` | number | AI-calculated relevance score for intent-based ranking |
97+
|`content` | string | Cleaned page content in the requested format \(when includeContent is true\) |
98+
| `query` | string | The search query that was executed |
99+
| `totalResults` | number | Total number of results returned |
100+
101+
### `brightdata_sync_scrape`
102+
103+
Scrape URLs synchronously using a Bright Data pre-built scraper and get structured results directly. Supports up to 20 URLs with a 1-minute timeout.
104+
105+
#### Input
106+
107+
| Parameter | Type | Required | Description |
108+
| --------- | ---- | -------- | ----------- |
109+
| `apiKey` | string | Yes | Bright Data API token |
110+
| `datasetId` | string | Yes | Dataset scraper ID from your Bright Data dashboard \(e.g., "gd_l1viktl72bvl7bjuj0"\) |
111+
| `urls` | string | Yes | JSON array of URL objects to scrape, up to 20 \(e.g., \[\{"url": "https://example.com/product"\}\]\) |
112+
| `format` | string | No | Output format: "json", "ndjson", or "csv". Defaults to "json" |
113+
| `includeErrors` | boolean | No | Whether to include error reports in results |
114+
115+
#### Output
116+
117+
| Parameter | Type | Description |
118+
| --------- | ---- | ----------- |
119+
| `data` | array | Array of scraped result objects with fields specific to the dataset scraper used |
120+
| `snapshotId` | string | Snapshot ID returned if the request exceeded the 1-minute timeout and switched to async processing |
121+
| `isAsync` | boolean | Whether the request fell back to async mode \(true means use snapshot ID to retrieve results\) |
122+
123+
### `brightdata_scrape_dataset`
124+
125+
Trigger a Bright Data pre-built scraper to extract structured data from URLs. Supports 660+ scrapers for platforms like Amazon, LinkedIn, Instagram, and more.
126+
127+
#### Input
128+
129+
| Parameter | Type | Required | Description |
130+
| --------- | ---- | -------- | ----------- |
131+
| `apiKey` | string | Yes | Bright Data API token |
132+
| `datasetId` | string | Yes | Dataset scraper ID from your Bright Data dashboard \(e.g., "gd_l1viktl72bvl7bjuj0"\) |
133+
| `urls` | string | Yes | JSON array of URL objects to scrape \(e.g., \[\{"url": "https://example.com/product"\}\]\) |
134+
| `format` | string | No | Output format: "json" or "csv". Defaults to "json" |
135+
136+
#### Output
137+
138+
| Parameter | Type | Description |
139+
| --------- | ---- | ----------- |
140+
| `snapshotId` | string | The snapshot ID to retrieve results later |
141+
| `status` | string | Status of the scraping job \(e.g., "triggered", "running"\) |
142+
143+
### `brightdata_snapshot_status`
144+
145+
Check the progress of an async Bright Data scraping job. Returns status: starting, running, ready, or failed.
146+
147+
#### Input
148+
149+
| Parameter | Type | Required | Description |
150+
| --------- | ---- | -------- | ----------- |
151+
| `apiKey` | string | Yes | Bright Data API token |
152+
| `snapshotId` | string | Yes | The snapshot ID returned when the collection was triggered \(e.g., "s_m4x7enmven8djfqak"\) |
153+
154+
#### Output
155+
156+
| Parameter | Type | Description |
157+
| --------- | ---- | ----------- |
158+
| `snapshotId` | string | The snapshot ID that was queried |
159+
| `datasetId` | string | The dataset ID associated with this snapshot |
160+
| `status` | string | Current status of the snapshot: "starting", "running", "ready", or "failed" |
161+
162+
### `brightdata_download_snapshot`
163+
164+
Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have ready status.
165+
166+
#### Input
167+
168+
| Parameter | Type | Required | Description |
169+
| --------- | ---- | -------- | ----------- |
170+
| `apiKey` | string | Yes | Bright Data API token |
171+
| `snapshotId` | string | Yes | The snapshot ID returned when the collection was triggered \(e.g., "s_m4x7enmven8djfqak"\) |
172+
| `format` | string | No | Output format: "json", "ndjson", "jsonl", or "csv". Defaults to "json" |
173+
| `compress` | boolean | No | Whether to compress the results |
174+
175+
#### Output
176+
177+
| Parameter | Type | Description |
178+
| --------- | ---- | ----------- |
179+
| `data` | array | Array of scraped result records |
180+
| `format` | string | The content type of the downloaded data |
181+
| `snapshotId` | string | The snapshot ID that was downloaded |
182+
183+
### `brightdata_cancel_snapshot`
184+
185+
Cancel an active Bright Data scraping job using its snapshot ID. Terminates data collection in progress.
186+
187+
#### Input
188+
189+
| Parameter | Type | Required | Description |
190+
| --------- | ---- | -------- | ----------- |
191+
| `apiKey` | string | Yes | Bright Data API token |
192+
| `snapshotId` | string | Yes | The snapshot ID of the collection to cancel \(e.g., "s_m4x7enmven8djfqak"\) |
193+
194+
#### Output
195+
196+
| Parameter | Type | Description |
197+
| --------- | ---- | ----------- |
198+
| `snapshotId` | string | The snapshot ID that was cancelled |
199+
| `cancelled` | boolean | Whether the cancellation was successful |
200+
201+

apps/docs/content/docs/en/tools/meta.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
"attio",
1919
"box",
2020
"brandfetch",
21+
"brightdata",
2122
"browser_use",
2223
"calcom",
2324
"calendly",

apps/sim/app/(landing)/integrations/data/icon-mapping.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import {
2323
BoxCompanyIcon,
2424
BrainIcon,
2525
BrandfetchIcon,
26+
BrightDataIcon,
2627
BrowserUseIcon,
2728
CalComIcon,
2829
CalendlyIcon,
@@ -215,6 +216,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
215216
attio: AttioIcon,
216217
box: BoxCompanyIcon,
217218
brandfetch: BrandfetchIcon,
219+
brightdata: BrightDataIcon,
218220
browser_use: BrowserUseIcon,
219221
calcom: CalComIcon,
220222
calendly: CalendlyIcon,

apps/sim/app/(landing)/integrations/data/integrations.json

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@
214214
"name": "Agiloft",
215215
"description": "Manage records in Agiloft CLM",
216216
"longDescription": "Integrate with Agiloft contract lifecycle management to create, read, update, delete, and search records. Supports file attachments, SQL-based selection, saved searches, and record locking across any table in your knowledge base.",
217-
"bgColor": "#263A5C",
217+
"bgColor": "#FFFFFF",
218218
"iconName": "AgiloftIcon",
219219
"docsUrl": "https://docs.sim.ai/tools/agiloft",
220220
"operations": [
@@ -1743,6 +1743,57 @@
17431743
"integrationTypes": ["sales", "analytics"],
17441744
"tags": ["enrichment", "marketing"]
17451745
},
1746+
{
1747+
"type": "brightdata",
1748+
"slug": "bright-data",
1749+
"name": "Bright Data",
1750+
"description": "Scrape websites, search engines, and extract structured data",
1751+
"longDescription": "Integrate Bright Data into the workflow. Scrape any URL with Web Unlocker, search Google and other engines with SERP API, discover web content ranked by intent, or trigger pre-built scrapers for structured data extraction.",
1752+
"bgColor": "#FFFFFF",
1753+
"iconName": "BrightDataIcon",
1754+
"docsUrl": "https://docs.sim.ai/tools/brightdata",
1755+
"operations": [
1756+
{
1757+
"name": "Scrape URL",
1758+
"description": "Fetch content from any URL using Bright Data Web Unlocker. Bypasses anti-bot protections, CAPTCHAs, and IP blocks automatically."
1759+
},
1760+
{
1761+
"name": "SERP Search",
1762+
"description": "Search Google, Bing, DuckDuckGo, or Yandex and get structured search results using Bright Data SERP API."
1763+
},
1764+
{
1765+
"name": "Discover",
1766+
"description": "AI-powered web discovery that finds and ranks results by intent. Returns up to 1,000 results with optional cleaned page content for RAG and verification."
1767+
},
1768+
{
1769+
"name": "Sync Scrape",
1770+
"description": "Scrape URLs synchronously using a Bright Data pre-built scraper and get structured results directly. Supports up to 20 URLs with a 1-minute timeout."
1771+
},
1772+
{
1773+
"name": "Scrape Dataset",
1774+
"description": "Trigger a Bright Data pre-built scraper to extract structured data from URLs. Supports 660+ scrapers for platforms like Amazon, LinkedIn, Instagram, and more."
1775+
},
1776+
{
1777+
"name": "Snapshot Status",
1778+
"description": "Check the progress of an async Bright Data scraping job. Returns status: starting, running, ready, or failed."
1779+
},
1780+
{
1781+
"name": "Download Snapshot",
1782+
"description": "Download the results of a completed Bright Data scraping job using its snapshot ID. The snapshot must have ready status."
1783+
},
1784+
{
1785+
"name": "Cancel Snapshot",
1786+
"description": "Cancel an active Bright Data scraping job using its snapshot ID. Terminates data collection in progress."
1787+
}
1788+
],
1789+
"operationCount": 8,
1790+
"triggers": [],
1791+
"triggerCount": 0,
1792+
"authType": "api-key",
1793+
"category": "tools",
1794+
"integrationTypes": ["search", "developer-tools"],
1795+
"tags": ["web-scraping", "automation"]
1796+
},
17461797
{
17471798
"type": "browser_use",
17481799
"slug": "browser-use",

apps/sim/blocks/blocks/agiloft.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ export const AgiloftBlock: BlockConfig = {
1313
category: 'tools',
1414
integrationType: IntegrationType.Productivity,
1515
tags: ['automation'],
16-
bgColor: '#263A5C',
16+
bgColor: '#FFFFFF',
1717
icon: AgiloftIcon,
1818
authMode: AuthMode.ApiKey,
1919

0 commit comments

Comments
 (0)