LinkedIn Web Scraping with Bright Data MCP Server & Google Gemini

工作流概述

这是一个包含20个节点的复杂工作流,主要用于自动化处理各种任务。

工作流源代码

下载
{
  "id": "D2RkoPZlkKFRUrNu",
  "meta": {
    "instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
    "templateCredsSetupCompleted": true
  },
  "name": "LinkedIn Web Scraping with Bright Data MCP Server & Google Gemini",
  "tags": [
    {
      "id": "ZOwtAMLepQaGW76t",
      "name": "Building Blocks",
      "createdAt": "2025-04-13T15:23:40.462Z",
      "updatedAt": "2025-04-13T15:23:40.462Z"
    },
    {
      "id": "ddPkw7Hg5dZhQu2w",
      "name": "AI",
      "createdAt": "2025-04-13T05:38:08.053Z",
      "updatedAt": "2025-04-13T05:38:08.053Z"
    }
  ],
  "nodes": [
    {
      "id": "68715d64-ce99-4e23-81ed-fe8f7d08ebd7",
      "name": "When clicking ‘Test workflow’",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -640,
        -50
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "e0295397-2926-4964-8be5-c0341de29a02",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -260,
        -420
      ],
      "parameters": {
        "color": 3,
        "width": 440,
        "height": 320,
        "content": "## Bright Data LinkedIn Person Scraper"
      },
      "typeVersion": 1
    },
    {
      "id": "cdf42164-569e-4140-9847-4751d69c6b7b",
      "name": "Set the URLs",
      "type": "n8n-nodes-base.set",
      "position": [
        -200,
        -300
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "214e61a0-3587-453f-baf5-eac013990857",
              "name": "url",
              "type": "string",
              "value": "https://www.linkedin.com/in/ranjan-dailata/"
            },
            {
              "id": "45014942-0a2e-4f46-b395-f82f97bfa93e",
              "name": "webhook_url",
              "type": "string",
              "value": "https://webhook.site/ce41e056-c097-48c8-a096-9b876d3abbf7"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "5769fce6-bcd7-4a13-b992-cd6d955a2cf1",
      "name": "Bright Data MCP Client For LinkedIn Person",
      "type": "n8n-nodes-mcp.mcpClient",
      "notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language.",
      "position": [
        20,
        -300
      ],
      "parameters": {
        "toolName": "web_data_linkedin_person_profile",
        "operation": "executeTool",
        "toolParameters": "={
   \"url\": \"{{ $json.url }}\"
} "
      },
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "notesInFlow": true,
      "typeVersion": 1
    },
    {
      "id": "56e37aa6-9719-4879-80af-a10c091377fb",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -260,
        -60
      ],
      "parameters": {
        "color": 4,
        "width": 440,
        "height": 320,
        "content": "## Bright Data LinkedIn Company Scraper"
      },
      "typeVersion": 1
    },
    {
      "id": "69afab25-32c6-4849-b2f9-4a2b25657c37",
      "name": "List all tools for Bright Data",
      "type": "n8n-nodes-mcp.mcpClient",
      "position": [
        -420,
        50
      ],
      "parameters": {},
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "feb16a2b-fdf7-49d4-bcd5-848ccaf66639",
      "name": "Bright Data MCP Client For LinkedIn Company",
      "type": "n8n-nodes-mcp.mcpClient",
      "notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language.",
      "position": [
        20,
        50
      ],
      "parameters": {
        "toolName": "web_data_linkedin_company_profile",
        "operation": "executeTool",
        "toolParameters": "={
   \"url\": \"{{ $json.url }}\"
} "
      },
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "notesInFlow": true,
      "typeVersion": 1
    },
    {
      "id": "e5117eb1-a757-4c28-965e-87ea03213ed1",
      "name": "Set the LinkedIn Company URL",
      "type": "n8n-nodes-base.set",
      "position": [
        -200,
        50
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "214e61a0-3587-453f-baf5-eac013990857",
              "name": "url",
              "type": "string",
              "value": "https://www.linkedin.com/company/bright-data/"
            },
            {
              "id": "45014942-0a2e-4f46-b395-f82f97bfa93e",
              "name": "webhook_url",
              "type": "string",
              "value": "https://webhook.site/ce41e056-c097-48c8-a096-9b876d3abbf7"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "99f45d7f-ad79-4ffc-8299-c71bd870f8fb",
      "name": "Webhook for LinkedIn Company Web Scraper",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1060,
        40
      ],
      "parameters": {
        "url": "={{ $('Set the LinkedIn Company URL').item.json.webhook_url }}",
        "options": {},
        "jsonBody": "={
  \"about\": {{ JSON.stringify($json.about[0]) }},
 \"story\": {{ JSON.stringify($json.company_story[0]) }}
}",
        "sendBody": true,
        "specifyBody": "json"
      },
      "typeVersion": 4.2
    },
    {
      "id": "5dfd2630-17d9-4a13-8cd6-57a564ef4a26",
      "name": "LinkedIn Data Extractor",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        240,
        200
      ],
      "parameters": {
        "text": "=Write a complete story of the provided company information in JSON. Use the following Company info to produce a story or a blog post. Make sure to incorporate all the provided company context.

Here's the Company Info in JSON - {{ $json.input }}",
        "options": {
          "systemPromptTemplate": "You are an expert data formatter"
        },
        "attributes": {
          "attributes": [
            {
              "name": "company_story",
              "required": true,
              "description": "Detailed Company Info"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "d1927c08-5ded-4b0b-b60b-bed126040d38",
      "name": "Google Gemini Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        328,
        420
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "0de1d200-c35a-41df-b512-8b97b92f14db",
      "name": "List all available tools for Bright Data",
      "type": "n8n-nodes-mcp.mcpClient",
      "position": [
        -420,
        -300
      ],
      "parameters": {},
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "3f884694-b8f3-478a-b1a3-f46326a0c96f",
      "name": "Code",
      "type": "n8n-nodes-base.code",
      "position": [
        318,
        -100
      ],
      "parameters": {
        "jsCode": "jsonContent = JSON.parse($input.first().json.result.content[0].text) 
return jsonContent
"
      },
      "typeVersion": 2
    },
    {
      "id": "67036198-4d7d-42d9-93cf-ffc65649bae0",
      "name": "Merge",
      "type": "n8n-nodes-base.merge",
      "position": [
        616,
        50
      ],
      "parameters": {},
      "typeVersion": 3.1
    },
    {
      "id": "77423290-bd08-4dc8-9f37-cf8fec9f6a63",
      "name": "Aggregate",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        836,
        50
      ],
      "parameters": {
        "options": {},
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "about"
            },
            {
              "fieldToAggregate": "output.company_story"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "91d25405-afb3-4ed6-b8fa-52ab64a654e2",
      "name": "Create a binary data for LinkedIn person info extract",
      "type": "n8n-nodes-base.function",
      "position": [
        320,
        -500
      ],
      "parameters": {
        "functionCode": "items[0].binary = {
  data: {
    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')
  }
};
return items;"
      },
      "typeVersion": 1
    },
    {
      "id": "3e74c49e-eb31-43b1-b8e1-ed960bd83ca1",
      "name": "Write the LinkedIn person info to disk",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        520,
        -500
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\LinkedIn-Person.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "f92b3505-2af6-42aa-bf4b-8b7b6cb97364",
      "name": "Create a binary data for LinkedIn company info extract",
      "type": "n8n-nodes-base.function",
      "position": [
        1000,
        -180
      ],
      "parameters": {
        "functionCode": "items[0].binary = {
  data: {
    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')
  }
};
return items;"
      },
      "typeVersion": 1
    },
    {
      "id": "6ed1402b-4858-4311-bede-f0b8f28acb9f",
      "name": "Write the LinkedIn company info to disk",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        1220,
        -180
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\LinkedIn-Company.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "335efc2b-80e3-4fac-b31f-82fff4ac4e65",
      "name": "Webhook for LinkedIn Person Web Scraper",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        318,
        -300
      ],
      "parameters": {
        "url": "={{ $('Set the URLs').item.json.webhook_url }}",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.result.content[0].text }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "35815900-1729-40c7-b128-778eabb62ec1",
  "connections": {
    "Code": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Merge": {
      "main": [
        [
          {
            "node": "Aggregate",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Aggregate": {
      "main": [
        [
          {
            "node": "Webhook for LinkedIn Company Web Scraper",
            "type": "main",
            "index": 0
          },
          {
            "node": "Create a binary data for LinkedIn company info extract",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set the URLs": {
      "main": [
        [
          {
            "node": "Bright Data MCP Client For LinkedIn Person",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "LinkedIn Data Extractor": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Google Gemini Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "LinkedIn Data Extractor",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Set the LinkedIn Company URL": {
      "main": [
        [
          {
            "node": "Bright Data MCP Client For LinkedIn Company",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "List all tools for Bright Data": {
      "main": [
        [
          {
            "node": "Set the LinkedIn Company URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "List all available tools for Bright Data",
            "type": "main",
            "index": 0
          },
          {
            "node": "List all tools for Bright Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Webhook for LinkedIn Person Web Scraper": {
      "main": [
        []
      ]
    },
    "List all available tools for Bright Data": {
      "main": [
        [
          {
            "node": "Set the URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Bright Data MCP Client For LinkedIn Person": {
      "main": [
        [
          {
            "node": "Webhook for LinkedIn Person Web Scraper",
            "type": "main",
            "index": 0
          },
          {
            "node": "Create a binary data for LinkedIn person info extract",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Bright Data MCP Client For LinkedIn Company": {
      "main": [
        [
          {
            "node": "Code",
            "type": "main",
            "index": 0
          },
          {
            "node": "LinkedIn Data Extractor",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create a binary data for LinkedIn person info extract": {
      "main": [
        [
          {
            "node": "Write the LinkedIn person info to disk",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create a binary data for LinkedIn company info extract": {
      "main": [
        [
          {
            "node": "Write the LinkedIn company info to disk",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

功能特点

  • 自动检测新邮件
  • AI智能内容分析
  • 自定义分类规则
  • 批量处理能力
  • 详细的处理日志

技术分析

节点类型及作用

  • Manualtrigger
  • Stickynote
  • Set
  • N8N Nodes Mcp.Mcpclient
  • Httprequest

复杂度评估

配置难度:
★★★★☆
维护难度:
★★☆☆☆
扩展性:
★★★★☆

实施指南

前置条件

  • 有效的Gmail账户
  • n8n平台访问权限
  • Google API凭证
  • AI分类服务订阅

配置步骤

  1. 在n8n中导入工作流JSON文件
  2. 配置Gmail节点的认证信息
  3. 设置AI分类器的API密钥
  4. 自定义分类规则和标签映射
  5. 测试工作流执行
  6. 配置定时触发器(可选)

关键参数

参数名称 默认值 说明
maxEmails 50 单次处理的最大邮件数量
confidenceThreshold 0.8 分类置信度阈值
autoLabel true 是否自动添加标签

最佳实践

优化建议

  • 定期更新AI分类模型以提高准确性
  • 根据邮件量调整处理批次大小
  • 设置合理的分类置信度阈值
  • 定期清理过期的分类规则

安全注意事项

  • 妥善保管API密钥和认证信息
  • 限制工作流的访问权限
  • 定期审查处理日志
  • 启用双因素认证保护Gmail账户

性能优化

  • 使用增量处理减少重复工作
  • 缓存频繁访问的数据
  • 并行处理多个邮件分类任务
  • 监控系统资源使用情况

故障排除

常见问题

邮件未被正确分类

检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。

Gmail认证失败

确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。

调试技巧

  • 启用详细日志记录查看每个步骤的执行情况
  • 使用测试邮件验证分类逻辑
  • 检查网络连接和API服务状态
  • 逐步执行工作流定位问题节点

错误处理

工作流包含以下错误处理机制:

  • 网络超时自动重试(最多3次)
  • API错误记录和告警
  • 处理失败邮件的隔离机制
  • 异常情况下的回滚操作