Selenium Ultimate Scraper Workflow

工作流概述

这是一个包含63个节点的复杂工作流,主要用于自动化处理各种任务。

工作流源代码

下载
{
  "id": "kZ3aL4r7xc96Q7lp",
  "meta": {
    "instanceId": "b8b2c0d20b02864cf66adc9cbefc86e9e56de0252b653d37ba6613341b5e0bef",
    "templateCredsSetupCompleted": true
  },
  "name": "Selenium Ultimate Scraper Workflow",
  "tags": [],
  "nodes": [
    {
      "id": "20d35d68-db49-4183-a913-85ad06c13912",
      "name": "Extract First Url Match",
      "type": "n8n-nodes-base.html",
      "position": [
        1820,
        540
      ],
      "parameters": {
        "options": {},
        "operation": "extractHtmlContent",
        "extractionValues": {
          "values": [
            {
              "key": "Url Find ",
              "attribute": "href",
              "cssSelector": "=a[href*=\"https://\"][href*=\"{{ $('Edit Fields (For testing prupose )').item.json['Website Domaine'] }}\"]
",
              "returnArray": true,
              "returnValue": "attribute"
            }
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "9167ea20-fc9c-4d75-bf4d-bb2016079dd0",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        2060,
        700
      ],
      "parameters": {
        "model": "gpt-4o",
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "FmszNHDDVS32ud21",
          "name": "OpenAi account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "42a8646d-1b0b-4309-a87d-9c8aeb355a28",
      "name": "Clean Webdriver ",
      "type": "n8n-nodes-base.httpRequest",
      "notes": "Script to delete traces of selenium in the browser ",
      "position": [
        3120,
        560
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/execute/sync",
        "method": "POST",
        "options": {},
        "jsonBody": "{
  \"script\": \"Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); window.navigator.chrome = { runtime: {} }; Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] });\",
  \"args\": []
}
",
        "sendBody": true,
        "specifyBody": "json"
      },
      "notesInFlow": false,
      "typeVersion": 4.2
    },
    {
      "id": "107dd8de-e341-4819-a493-94ed57fd0f33",
      "name": "Delete Session",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        5180,
        920
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}",
        "method": "DELETE",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "8c7ec6bc-d417-48c2-a6f2-ecce27803671",
      "name": "Delete Session2",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        6740,
        -160
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}",
        "method": "DELETE",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "e43ecd94-b7f2-4f73-a9fa-b829de9e0296",
      "name": "If Block1",
      "type": "n8n-nodes-base.if",
      "position": [
        6520,
        -20
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "e6e6e15d-1cfe-48be-8ea0-f112e9781c9d",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.content }}",
              "rightValue": "BLOCK"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "08e46f63-41b5-4606-8f2c-df9e96c9c34e",
      "name": "Delete Session3",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        6740,
        60
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}",
        "method": "DELETE",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "b47d9b22-9a59-4c7a-8cba-9487f18207ee",
      "name": "Limit",
      "type": "n8n-nodes-base.limit",
      "position": [
        5120,
        -100
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "541622f7-562b-4e8a-93e5-61e6e918ff52",
      "name": "Delete Session1",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        5180,
        720
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}",
        "method": "DELETE",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "825be0d7-9dd3-4a2f-8c3d-fd405f59a5d6",
      "name": "Delete Session4",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        5780,
        260
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}",
        "method": "DELETE",
        "options": {}
      },
      "retryOnFail": false,
      "typeVersion": 4.2
    },
    {
      "id": "56f6f4f6-f737-4de8-bdfe-029546909677",
      "name": "Success with cookie",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        7260,
        60
      ],
      "parameters": {
        "options": {
          "responseCode": 200
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "c6939773-e230-45e1-bf76-d0299c2c7066",
      "name": "Respond to Webhook2",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        6920,
        -160
      ],
      "parameters": {
        "options": {
          "responseCode": 200
        },
        "respondWith": "json",
        "responseBody": "{
  \"Success \": \"Request has been block by the targeted website\"
}"
      },
      "typeVersion": 1.1
    },
    {
      "id": "ea921f11-323f-4c79-8cc6-779b39498b05",
      "name": "Code",
      "type": "n8n-nodes-base.code",
      "position": [
        4700,
        -100
      ],
      "parameters": {
        "jsCode": "// Récupère les données du nœud Webhook (en remplaçant \"Webhook\" par le nom du nœud Webhook dans votre workflow)
const webhookData = $node[\"Webhook\"].json;

// Fonction pour convertir la valeur de sameSite
function convertSameSite(value) {
    // Conversion spécifique des valeurs de sameSite
    const conversionMap = {
        \"unspecified\": \"None\",
        \"lax\": \"Lax\",
        \"strict\": \"Strict\"
    };
    
    // Si la valeur existe dans le tableau de conversion, on la convertit
    if (value in conversionMap) {
        return conversionMap[value];
    }
    
    // Si la valeur est déjà une des valeurs acceptées par Selenium
    const allowedValues = [\"Strict\", \"Lax\", \"None\"];
    if (allowedValues.includes(value)) {
        return value;
    } else {
        // Si la valeur n'est pas reconnue, on la remplace par \"Lax\" (par défaut)
        return \"Lax\";
    }
}

// Vérifiez et traitez les données des cookies
if (webhookData.body && webhookData.body.cookies) {
    let items = [];
    for (const cookieObject of webhookData.body.cookies) {
        if (cookieObject.cookie) {
            // Convertir la valeur de sameSite
            cookieObject.cookie.sameSite = convertSameSite(cookieObject.cookie.sameSite);
            
            // Ajouter le cookie à la liste des items
            items.push({
                json: cookieObject.cookie
            });
        }
    }
    return items;
}

// Si les cookies ne sont pas trouvés, renvoyer un tableau vide
return [];
"
      },
      "typeVersion": 2
    },
    {
      "id": "c3d77928-eefc-4903-9b4f-b14bd6f34e3c",
      "name": "Delete Session5",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        3940,
        360
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}",
        "method": "DELETE",
        "options": {}
      },
      "retryOnFail": false,
      "typeVersion": 4.2
    },
    {
      "id": "036cfce6-8082-4539-bb0e-980368679fe5",
      "name": "Error",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        4120,
        360
      ],
      "parameters": {
        "options": {
          "responseCode": 404
        },
        "respondWith": "json",
        "responseBody": "{
  \"Error\": \"Cookies are note for the targeted url\"
}"
      },
      "typeVersion": 1.1
    },
    {
      "id": "09d6a99b-d8b3-40c9-b74a-14014e3647e2",
      "name": "Error1",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        6000,
        260
      ],
      "parameters": {
        "options": {
          "responseCode": 500
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "0b1f3442-6b70-405f-b597-642e9c982b82",
      "name": "Error2",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        3060,
        780
      ],
      "parameters": {
        "options": {
          "responseCode": 500
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "4d0112bb-cbfd-45c6-961a-964bd8f59cac",
      "name": "If",
      "type": "n8n-nodes-base.if",
      "position": [
        3760,
        200
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "1bffbc80-9913-46e7-a594-ebc26948c83b",
              "operator": {
                "type": "string",
                "operation": "contains"
              },
              "leftValue": "={{ $('Webhook').item.json.body.cookies[0].cookie.domain }}",
              "rightValue": "={{ $('Webhook').item.json.body.Url }}"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "58a50b80-df4c-4b6f-a682-72237f4dbdef",
      "name": "Inject Cookie",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        4900,
        -100
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/cookie",
        "method": "POST",
        "options": {},
        "jsonBody": "={
  \"cookie\": {
    \"name\": \"{{ $json.name }}\",
    \"value\": \"{{ $json.value }}\",
    \"domain\": \"{{ $json.domain }}\",
    \"path\": \"{{ $json.path }}\",
    \"secure\": {{ $json.secure }},
    \"httpOnly\": {{ $json.httpOnly }},
    \"sameSite\": \"{{ $json.sameSite }}\",
    \"expirationDate\": {{ $json.expirationDate }}
  }
}",
        "sendBody": true,
        "specifyBody": "json"
      },
      "typeVersion": 4.2
    },
    {
      "id": "39f7401b-b6b7-4f0c-9afc-8f144d394350",
      "name": "Respond to Webhook3",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        5400,
        720
      ],
      "parameters": {
        "options": {
          "responseCode": 200
        },
        "respondWith": "json",
        "responseBody": "{
  \"Success \": \"Request has been block by the targeted website\"
}"
      },
      "typeVersion": 1.1
    },
    {
      "id": "80b107cc-2f6c-46f0-a597-e85594634492",
      "name": "Success",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        5740,
        920
      ],
      "parameters": {
        "options": {
          "responseKey": "={{ $json.output }}",
          "responseCode": 200
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "94a97354-07d9-428e-989c-ef066f9b4d8a",
      "name": "Go on url",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        3900,
        780
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/url",
        "method": "POST",
        "options": {},
        "jsonBody": "={
  \"url\": \"{{ $('Webhook').item.json.body['Target Url'] }}\"
}
",
        "sendBody": true,
        "specifyBody": "json"
      },
      "retryOnFail": true,
      "typeVersion": 4.2
    },
    {
      "id": "fd044cf3-594d-48af-bbd1-f2d9adedcbc1",
      "name": "Delete Session6",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        4360,
        1200
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}",
        "method": "DELETE",
        "options": {}
      },
      "retryOnFail": false,
      "typeVersion": 4.2
    },
    {
      "id": "7c28c3b6-1141-4609-8774-cb6b4d842b97",
      "name": "Error3",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        4520,
        1200
      ],
      "parameters": {
        "options": {
          "responseCode": 500
        },
        "respondWith": "json",
        "responseBody": "{
  \"Error\": \"Page crash on the extracted url\"
}"
      },
      "typeVersion": 1.1
    },
    {
      "id": "52f78923-156f-4861-88ba-f0253c483bd9",
      "name": "Information Extractor",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        2040,
        540
      ],
      "parameters": {
        "text": "={{ $json['Url Find '][1] }}{{ $json['Url Find '][2] }}{{ $json['Url Find '][3] }}",
        "options": {
          "systemPromptTemplate": "=You are an expert extraction algorithm.
Only extract relevant url from the unstructured urls array.
A relevant url is a url whre you can find relevant information about this subject : {{ $('Edit Fields (For testing prupose )').item.json.Subject }}, on this domaine name : {{ $('Edit Fields (For testing prupose )').item.json['Website Domaine'] }}.
If you do not know the value of an attribute asked to extract, you need \ attribute's value as NA."
        },
        "attributes": {
          "attributes": [
            {
              "name": "Good_url_for_etract_information",
              "required": true,
              "description": "=The url where I can extract relevant infroamtion on this subject : {{ $('Edit Fields (For testing prupose )').item.json.Subject }} on this domaine name : {{ $('Edit Fields (For testing prupose )').item.json['Website Domaine'] }}"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "6ac249e2-a9d8-4590-b050-3a0a2472fa3c",
      "name": "Check if empty of NA",
      "type": "n8n-nodes-base.if",
      "position": [
        2440,
        540
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "or",
          "conditions": [
            {
              "id": "9470fb6c-e367-4af7-a697-275e724fe771",
              "operator": {
                "type": "string",
                "operation": "empty",
                "singleValue": true
              },
              "leftValue": "={{ $json.output.Good_url_for_etract_information }}",
              "rightValue": ""
            },
            {
              "id": "8518e9a9-5b0c-4699-97c5-d9b7b1943918",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.output.Good_url_for_etract_information }}",
              "rightValue": "NA"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "f380eff7-3d18-4791-9dac-8a88d3fdcc4f",
      "name": "If Block",
      "type": "n8n-nodes-base.if",
      "position": [
        4960,
        840
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "e6e6e15d-1cfe-48be-8ea0-f112e9781c9d",
              "operator": {
                "type": "string",
                "operation": "contains"
              },
              "leftValue": "={{ $json.content }}",
              "rightValue": "BLOCK"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "43382397-89b5-4b90-9016-49109ec04baf",
      "name": "Google search Query ",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1600,
        540
      ],
      "parameters": {
        "url": "=https://www.google.com/search?q=site:{{ $json['Website Domaine'] }}+{{$json.Subject}}&oq=site&gs_lcrp=EgZjaHJvbWUqCAgAEEUYJxg7MggIABBFGCcYOzIICAEQRRgnGDsyBggCEEUYOzIRCAMQRRg5GEMYyQMYgAQYigUyBggEEEUYQDIGCAUQRRg9MgYIBhBFGD0yBggHEEUYPdIBCDEwNTRqMGo3qAIAsAIA&sourceid=chrome&ie=UTF-8",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "d34256af-1b43-4f64-853c-cf063b8c6b68",
      "name": "Create Selenium Session",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        2680,
        640
      ],
      "parameters": {
        "url": "http://selenium_chrome:4444/wd/hub/session",
        "method": "POST",
        "options": {
          "timeout": 5000
        },
        "jsonBody": "{
  \"capabilities\": {
    \"alwaysMatch\": {
      \"browserName\": \"chrome\",
      \"goog:chromeOptions\": {
        \"args\": [  
          \"--disable-blink-features=AutomationControlled\",
          \"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3\"
        ]
      }
    }
  }
}
",
        "sendBody": true,
        "specifyBody": "json"
      },
      "retryOnFail": true,
      "typeVersion": 4.2
    },
    {
      "id": "4f0f696c-9637-4c7d-82ae-1f5c36bb9cd1",
      "name": "Get ScreenShot 1",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        4420,
        840
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/screenshot",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "ba72c0cf-217a-4411-80f6-ca28ccdb0151",
      "name": "Refresh browser",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        5320,
        -100
      ],
      "parameters": {
        "url": "=http:///selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/refresh",
        "method": "POST",
        "options": {},
        "jsonBody": "{}",
        "sendBody": true,
        "specifyBody": "json"
      },
      "typeVersion": 4.2
    },
    {
      "id": "b6ba7068-399a-467d-ba58-7f47d650e2f1",
      "name": "Get ScreenShot ",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        5880,
        -20
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/screenshot",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "792649be-0ee2-442f-bc21-d0c297cea227",
      "name": "Convert to File",
      "type": "n8n-nodes-base.convertToFile",
      "onError": "continueErrorOutput",
      "position": [
        6160,
        -20
      ],
      "parameters": {
        "options": {},
        "operation": "toBinary",
        "sourceProperty": "value"
      },
      "typeVersion": 1.1
    },
    {
      "id": "49e58759-bedf-4f38-a96c-bd18e67b8aaf",
      "name": "Convert to File1",
      "type": "n8n-nodes-base.convertToFile",
      "onError": "continueErrorOutput",
      "position": [
        4600,
        840
      ],
      "parameters": {
        "options": {},
        "operation": "toBinary",
        "sourceProperty": "value"
      },
      "typeVersion": 1.1
    },
    {
      "id": "3735f5f5-665e-4649-b1c2-84a4a8699f70",
      "name": "Delete Session7",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        2920,
        780
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}",
        "method": "DELETE",
        "options": {}
      },
      "retryOnFail": false,
      "typeVersion": 4.2
    },
    {
      "id": "1b8b1e0c-f465-4963-869c-0e7086922151",
      "name": "Sticky Note",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        920,
        -1023.3944834469928
      ],
      "parameters": {
        "color": 4,
        "width": 851.2111300888805,
        "height": 1333.3079943516484,
        "content": "## N8N Ultimate Scraper - Workflow

This workflow's objective is to collect data from any website page, whether it requires login or not.

For example, you can collect the number of stars of the n8n-ultimate-scraper project on GitHub.

## Requirements
**Selenium Container**: Selenium is an open-source automation framework for web applications, enabling browser control and interaction through scripts in various programming languages.
You can deploy the Docker Compose file from the associated GitHub project to set up your Selenium container and configuration: https://github.com/Touxan/n8n-ultimate-scraper

**Residential Proxy Server**: To scrape data at scale without being blocked, I personally recommend GeoNode. They offer affordable, high-quality residential proxies: https://geonode.com/invite/98895

**OpenAI API Key**: For using GPT-4.

## Optional
Session Cookies Collection: To use login functionality with the n8n Ultimate Scraper, you need to collect session cookies from the target website. You can do this using the extension created for this application in the GitHub project: https://github.com/Touxan/n8n-ultimate-scraper. Follow the installation procedure to use it.

## How to use 
Deploy the project with all the requiremnts and request your webhook.

**Example of request**:
curl -X POST http://localhost:5678/webhook-test/yourwebhookid \
-H \"Content-Type: application/json\" \
-d '{
  \"subject\": \"Hugging Face\",
  \"Url\": \"github.com\",
  \"Target data\": [
    {
      \"DataName\": \"Followers\",
      \"description\": \"The number of followers of the GitHub page\"
    },
    {
      \"DataName\": \"Total Stars\",
      \"description\": \"The total numbers of stars on the different repos\"
    }
  ],
  \"cookies\": []
}'

You can also scrape link like this : 
curl -X POST http://localhost:5678/webhook-test/67d77918-2d5b-48c1-ae73-2004b32125f0 \
-H \"Content-Type: application/json\" \
-d '{
  \"Target Url\": \"https://github.com\",
  \"Target data\": [
    {
      \"DataName\": \"Followers\",
      \"description\": \"The number of followers of the GitHub page\"
    },
    {
      \"DataName\": \"Total Stars\",
      \"description\": \"The total numbers of stars on the different repo\"
    }
]
}'

**Note**
The maximum nimber of Target data is 5."
      },
      "typeVersion": 1
    },
    {
      "id": "4d743518-4fcb-4e9f-aff7-a8959a78ccaf",
      "name": "Edit Fields (For testing prupose )",
      "type": "n8n-nodes-base.set",
      "position": [
        1160,
        540
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "3895040f-0a21-47ee-a73f-d3c7fd6edf36",
              "name": "Subject",
              "type": "string",
              "value": "={{ $json.body.subject }}"
            },
            {
              "id": "304e4240-513f-4c87-ae9d-4efda7d0c4ab",
              "name": "Website Domaine",
              "type": "string",
              "value": "={{ $json.body.Url }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "62b0a416-71a2-4d2b-83f9-8c5465c72006",
      "name": "Get ScreenShot 2",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        6200,
        851
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/screenshot",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "6a5b1a08-c47a-435e-8e0b-648cb8282a90",
      "name": "Convert to File2",
      "type": "n8n-nodes-base.convertToFile",
      "onError": "continueErrorOutput",
      "position": [
        6440,
        851
      ],
      "parameters": {
        "options": {},
        "operation": "toBinary",
        "sourceProperty": "value"
      },
      "typeVersion": 1.1
    },
    {
      "id": "a2aa5d45-5f41-41f7-a8ee-07c145b73d89",
      "name": "Go on ip-api.com",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        5960,
        851
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/url",
        "method": "POST",
        "options": {},
        "jsonBody": "={
  \"url\": \"https://ip-api.com/\"
}
",
        "sendBody": true,
        "specifyBody": "json"
      },
      "retryOnFail": true,
      "typeVersion": 4.2
    },
    {
      "id": "8ddde1d2-0b09-45ca-88ef-db24352b095e",
      "name": "Delete Session8",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        6440,
        1071
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}",
        "method": "DELETE",
        "options": {}
      },
      "retryOnFail": false,
      "typeVersion": 4.2
    },
    {
      "id": "78ffd8e1-b4b8-444c-8a7d-410172d3a7f8",
      "name": "Sticky Note1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        5920,
        727
      ],
      "parameters": {
        "color": 6,
        "width": 784.9798841202522,
        "height": 520.0741248156677,
        "content": "## Debug IP

This small debug flow aims to check the IP you're requesting with, in case you're using a proxy"
      },
      "typeVersion": 1
    },
    {
      "id": "be5de434-5f07-40bc-a1e6-aece9ad211b4",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1580,
        420
      ],
      "parameters": {
        "width": 751.8596006980003,
        "height": 430.433007240277,
        "content": "## Search

**Description** :
This part aims to search on Google for the subject and find the URL of the subject page based on the input URL."
      },
      "typeVersion": 1
    },
    {
      "id": "ffbb3c92-245b-4635-9adf-17d24f236bff",
      "name": "Error can't find url",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        2800,
        280
      ],
      "parameters": {
        "options": {
          "responseCode": 404
        },
        "respondWith": "json",
        "responseBody": "{
  \"Error\": \"Can't find url\"
}"
      },
      "typeVersion": 1.1
    },
    {
      "id": "088ad72c-907a-409a-9fa4-00a16d396e1b",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2420,
        420
      ],
      "parameters": {
        "width": 827.9448220213314,
        "height": 502.0185388323068,
        "content": "## Selenium Session

**Description**:
Creation and configuration of the Selenium session."
      },
      "typeVersion": 1
    },
    {
      "id": "00b8bf19-b34e-42ed-bb2a-3fbfa5f02a25",
      "name": "Resize browser window",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        2920,
        560
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $json.value.sessionId }}/window/rect",
        "method": "POST",
        "options": {},
        "jsonBody": "{
  \"width\": 1920,
  \"height\": 1080,
  \"x\": 0,
  \"y\": 0
}
",
        "sendBody": true,
        "specifyBody": "json"
      },
      "typeVersion": 4.2
    },
    {
      "id": "007354a1-3f00-4ae9-ab53-54ded5eed563",
      "name": "Sticky Note4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        3500,
        -300
      ],
      "parameters": {
        "width": 3939.555135735299,
        "height": 821.0847869745435,
        "content": "## Scrape with cookies session

**Description**
This part goes to the extracted URL, injects the cookies passed into the webhook, takes a screenshot of the webpage, and analyzes the image with GPT to extract the targeted data."
      },
      "typeVersion": 1
    },
    {
      "id": "5ab44e1b-6878-4af5-bfd8-1f1e5cbee3a7",
      "name": "Sticky Note5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        3500,
        580
      ],
      "parameters": {
        "width": 3336.952424000919,
        "height": 821.0847869745435,
        "content": "## Scrape without cookies session

**Description**
Same as the 'Scrape with cookies session' flow, but without the cookie injection"
      },
      "typeVersion": 1
    },
    {
      "id": "4fc7e290-0c60-4efe-ac3f-eb71ce5e457b",
      "name": "OpenAI",
      "type": "@n8n/n8n-nodes-langchain.openAi",
      "position": [
        6340,
        -20
      ],
      "parameters": {
        "text": "=Analyse this image and extract revlant infromation about this subject : {{ $('Webhook').item.json.body.subject }}. 

If the webpage seem block by waf, or don't have any relant information about the subject reurn BLOCK with out any aditinonal information.",
        "modelId": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o",
          "cachedResultName": "GPT-4O"
        },
        "options": {
          "detail": "auto",
          "maxTokens": 300
        },
        "resource": "image",
        "inputType": "base64",
        "operation": "analyze"
      },
      "credentials": {
        "openAiApi": {
          "id": "FmszNHDDVS32ud21",
          "name": "OpenAi account"
        }
      },
      "typeVersion": 1.5
    },
    {
      "id": "b039ed2a-94da-4a37-b794-7fb1721a8ab3",
      "name": "OpenAI1",
      "type": "@n8n/n8n-nodes-langchain.openAi",
      "onError": "continueErrorOutput",
      "position": [
        4780,
        840
      ],
      "parameters": {
        "text": "=Analyse this image and extract revlant infromation about this subject : {{ $('Webhook').item.json.body.subject }}. 

If the webpage seem block by waf, or don't have any relant information about the subject reurn BLOCK with out any aditinonal information.",
        "modelId": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o",
          "cachedResultName": "GPT-4O"
        },
        "options": {
          "detail": "auto",
          "maxTokens": 300
        },
        "resource": "image",
        "inputType": "base64",
        "operation": "analyze"
      },
      "credentials": {
        "openAiApi": {
          "id": "FmszNHDDVS32ud21",
          "name": "OpenAi account"
        }
      },
      "typeVersion": 1.5
    },
    {
      "id": "c69364ce-c7e3-4f7a-ae0c-bad97643da30",
      "name": "Information Extractor1",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        5400,
        920
      ],
      "parameters": {
        "text": "={{ $('OpenAI1').item.json.content }}",
        "options": {
          "systemPromptTemplate": "You are an expert extraction algorithm.
Only extract relevant information from the text.
If you do not know the value of an attribute asked to extract, set the attribute's value to NA."
        },
        "attributes": {
          "attributes": [
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][0].DataName }}",
              "description": "={{ $('Webhook').item.json.body['Target data'][0].description }}"
            },
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][1].DataName }}",
              "description": "=The total number of stars on all project"
            },
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][2].DataName }}",
              "description": "={{ $('Webhook').item.json.body['Target data'][2].description }}"
            },
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][3].DataName }}",
              "description": "={{ $('Webhook').item.json.body['Target data'][3].description }}"
            },
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][4].DataName }}",
              "description": "={{ $('Webhook').item.json.body['Target data'][4].description }}"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "0e756adb-a6ba-421f-9d21-374e7fa74781",
      "name": "OpenAI Chat Model1",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        5400,
        1140
      ],
      "parameters": {
        "model": "gpt-4o-mini",
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "FmszNHDDVS32ud21",
          "name": "OpenAi account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "920e9315-7de4-4a23-adbe-36338ea18097",
      "name": "Information Extractor2",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        6920,
        60
      ],
      "parameters": {
        "text": "={{ $('OpenAI').item.json.content }}",
        "options": {
          "systemPromptTemplate": "You are an expert extraction algorithm.
Only extract relevant information from the text.
If you do not know the value of an attribute asked to extract, set the attribute's value to NA. If the attribute is empty you can omit it."
        },
        "attributes": {
          "attributes": [
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][0].DataName }}",
              "description": "={{ $('Webhook').item.json.body['Target data'][0].description }}"
            },
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][1].DataName }}",
              "description": "=The total number of stars on all project"
            },
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][2].DataName }}",
              "description": "={{ $('Webhook').item.json.body['Target data'][2].description }}"
            },
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][3].DataName }}",
              "description": "={{ $('Webhook').item.json.body['Target data'][3].description }}"
            },
            {
              "name": "={{ $('Webhook').item.json.body['Target data'][4].DataName }}",
              "description": "={{ $('Webhook').item.json.body['Target data'][4].description }}"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "aa98d16e-d20c-4a8f-8eaf-1f64751dd8ea",
      "name": "OpenAI Chat Model2",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        6940,
        220
      ],
      "parameters": {
        "model": "gpt-4o-mini",
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "FmszNHDDVS32ud21",
          "name": "OpenAi account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "ba41b87e-feb7-4753-95b3-d569d54d8756",
      "name": "Sticky Note6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1820,
        -680
      ],
      "parameters": {
        "color": 3,
        "width": 813.0685668942513,
        "height": 507.4126722815008,
        "content": "## Proxy

**Configuration**

To configure your proxy with the project, follow the instructions on the GitHub project: https://github.com/Touxan/n8n-ultimate-scraper. To configure the docker-compose, you also need to add this argument to the 'Create Selenium Session' node : --proxy-server=address:port.

### ⚠️Warning⚠️
 Selenium does not support proxy authentication, so you need to add your server IP to the proxy whitelist. On GeoNode, it's here: https://app.geonode.com/whitelist-ip!"
      },
      "typeVersion": 1
    },
    {
      "id": "194bbecc-a5b3-4c5f-a17f-94703a44f196",
      "name": "Webhook",
      "type": "n8n-nodes-base.webhook",
      "position": [
        940,
        540
      ],
      "webhookId": "67d77918-2d5b-48c1-ae73-2004b32125f0",
      "parameters": {
        "path": "67d77918-2d5b-48c1-ae73-2004b32125f0",
        "options": {},
        "httpMethod": "POST",
        "responseMode": "responseNode"
      },
      "typeVersion": 2
    },
    {
      "id": "513389b0-0930-48d8-8cbb-e3575a0276ae",
      "name": "If Target Url",
      "type": "n8n-nodes-base.if",
      "position": [
        1380,
        620
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "4b608dcd-a175-4019-82c2-560320a2abce",
              "operator": {
                "type": "string",
                "operation": "empty",
                "singleValue": true
              },
              "leftValue": "={{ $('Webhook').item.json.body['Target Url'] }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "4ca0aee7-0dd2-4c78-b99b-8c188a3917f4",
      "name": "If1",
      "type": "n8n-nodes-base.if",
      "position": [
        3700,
        900
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "ff919945-b8c2-492a-b496-8617e9147389",
              "operator": {
                "type": "string",
                "operation": "notEmpty",
                "singleValue": true
              },
              "leftValue": "={{ $('Webhook').item.json.body['Target Url'] }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "baa4dc94-67f3-4683-b8c7-6b6e856e7c64",
      "name": "Go on url1",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        3900,
        960
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/url",
        "method": "POST",
        "options": {},
        "jsonBody": "={
  \"url\": \"{{ $('Information Extractor').item.json.output.Good_url_for_etract_information }}\"
}
",
        "sendBody": true,
        "specifyBody": "json"
      },
      "retryOnFail": true,
      "typeVersion": 4.2
    },
    {
      "id": "2c439b0e-7c78-4ae8-b653-3f02b3834aa8",
      "name": "If2",
      "type": "n8n-nodes-base.if",
      "position": [
        3340,
        560
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "2a1bfc1e-28a6-45d1-9581-53b632af90e0",
              "operator": {
                "type": "string",
                "operation": "notEmpty",
                "singleValue": true
              },
              "leftValue": "={{ $('Webhook').item.json.body.cookies }}",
              "rightValue": ""
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.2
    },
    {
      "id": "fc3260da-9131-4850-a581-55a27ce4428d",
      "name": "Go on url2",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        4260,
        -20
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/url",
        "method": "POST",
        "options": {},
        "jsonBody": "={
  \"url\": \"{{ $('Webhook').item.json.body['Target Url'] }}\"
}
",
        "sendBody": true,
        "specifyBody": "json"
      },
      "retryOnFail": true,
      "typeVersion": 4.2
    },
    {
      "id": "fe345010-1fa3-4d2c-8bc2-e87f6aeeb0d9",
      "name": "If3",
      "type": "n8n-nodes-base.if",
      "position": [
        4060,
        100
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "ff919945-b8c2-492a-b496-8617e9147389",
              "operator": {
                "type": "string",
                "operation": "notEmpty",
                "singleValue": true
              },
              "leftValue": "={{ $('Webhook').item.json.body['Target Url'] }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "1aae02ec-3a22-4dd5-aea4-819758f130c1",
      "name": "Go on url3",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        4260,
        160
      ],
      "parameters": {
        "url": "=http://selenium_chrome:4444/wd/hub/session/{{ $('Create Selenium Session').item.json.value.sessionId }}/url",
        "method": "POST",
        "options": {},
        "jsonBody": "={
  \"url\": \"{{ $('Information Extractor').item.json.output.Good_url_for_etract_information }}\"
}
",
        "sendBody": true,
        "specifyBody": "json"
      },
      "retryOnFail": true,
      "typeVersion": 4.2
    }
  ],
  "active": true,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "e0ae7ac4-4be7-4b9c-9247-1475ffd297b1",
  "connections": {
    "If": {
      "main": [
        [
          {
            "node": "If3",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session5",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If1": {
      "main": [
        [
          {
            "node": "Go on url",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Go on url1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If2": {
      "main": [
        [
          {
            "node": "If",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "If1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If3": {
      "main": [
        [
          {
            "node": "Go on url2",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Go on url3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Code": {
      "main": [
        [
          {
            "node": "Inject Cookie",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Limit": {
      "main": [
        [
          {
            "node": "Refresh browser",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI": {
      "main": [
        [
          {
            "node": "If Block1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI1": {
      "main": [
        [
          {
            "node": "If Block",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Webhook": {
      "main": [
        [
          {
            "node": "Edit Fields (For testing prupose )",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If Block": {
      "main": [
        [
          {
            "node": "Delete Session1",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Go on url": {
      "main": [
        [
          {
            "node": "Get ScreenShot 1",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If Block1": {
      "main": [
        [
          {
            "node": "Delete Session2",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Go on url1": {
      "main": [
        [
          {
            "node": "Get ScreenShot 1",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Go on url2": {
      "main": [
        [
          {
            "node": "Code",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session4",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Go on url3": {
      "main": [
        [
          {
            "node": "Code",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session4",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If Target Url": {
      "main": [
        [
          {
            "node": "Google search Query ",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Create Selenium Session",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Inject Cookie": {
      "main": [
        [
          {
            "node": "Limit",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Delete Session": {
      "main": [
        [
          {
            "node": "Information Extractor1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Convert to File": {
      "main": [
        [
          {
            "node": "OpenAI",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session4",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Delete Session1": {
      "main": [
        [
          {
            "node": "Respond to Webhook3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Delete Session2": {
      "main": [
        [
          {
            "node": "Respond to Webhook2",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Delete Session3": {
      "main": [
        [
          {
            "node": "Information Extractor2",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Delete Session4": {
      "main": [
        [
          {
            "node": "Error1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Delete Session5": {
      "main": [
        [
          {
            "node": "Error",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Delete Session6": {
      "main": [
        [
          {
            "node": "Error3",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Delete Session7": {
      "main": [
        [
          {
            "node": "Error2",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get ScreenShot ": {
      "main": [
        [
          {
            "node": "Convert to File",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session4",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Refresh browser": {
      "main": [
        [
          {
            "node": "Get ScreenShot ",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session4",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Clean Webdriver ": {
      "main": [
        [
          {
            "node": "If2",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Convert to File1": {
      "main": [
        [
          {
            "node": "OpenAI1",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get ScreenShot 1": {
      "main": [
        [
          {
            "node": "Convert to File1",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get ScreenShot 2": {
      "main": [
        [
          {
            "node": "Convert to File2",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session8",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Go on ip-api.com": {
      "main": [
        [
          {
            "node": "Get ScreenShot 2",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session8",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Information Extractor",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "Information Extractor1",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model2": {
      "ai_languageModel": [
        [
          {
            "node": "Information Extractor2",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Check if empty of NA": {
      "main": [
        [
          {
            "node": "Error can't find url",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Create Selenium Session",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google search Query ": {
      "main": [
        [
          {
            "node": "Extract First Url Match",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Information Extractor": {
      "main": [
        [
          {
            "node": "Check if empty of NA",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Resize browser window": {
      "main": [
        [
          {
            "node": "Clean Webdriver ",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Information Extractor1": {
      "main": [
        [
          {
            "node": "Success",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Information Extractor2": {
      "main": [
        [
          {
            "node": "Success with cookie",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Selenium Session": {
      "main": [
        [
          {
            "node": "Resize browser window",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Delete Session7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract First Url Match": {
      "main": [
        [
          {
            "node": "Information Extractor",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Edit Fields (For testing prupose )": {
      "main": [
        [
          {
            "node": "If Target Url",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

功能特点

  • 自动检测新邮件
  • AI智能内容分析
  • 自定义分类规则
  • 批量处理能力
  • 详细的处理日志

技术分析

节点类型及作用

  • Html
  • @N8N/N8N Nodes Langchain.Lmchatopenai
  • Httprequest
  • If
  • Limit

复杂度评估

配置难度:
★★★★☆
维护难度:
★★☆☆☆
扩展性:
★★★★☆

实施指南

前置条件

  • 有效的Gmail账户
  • n8n平台访问权限
  • Google API凭证
  • AI分类服务订阅

配置步骤

  1. 在n8n中导入工作流JSON文件
  2. 配置Gmail节点的认证信息
  3. 设置AI分类器的API密钥
  4. 自定义分类规则和标签映射
  5. 测试工作流执行
  6. 配置定时触发器(可选)

关键参数

参数名称 默认值 说明
maxEmails 50 单次处理的最大邮件数量
confidenceThreshold 0.8 分类置信度阈值
autoLabel true 是否自动添加标签

最佳实践

优化建议

  • 定期更新AI分类模型以提高准确性
  • 根据邮件量调整处理批次大小
  • 设置合理的分类置信度阈值
  • 定期清理过期的分类规则

安全注意事项

  • 妥善保管API密钥和认证信息
  • 限制工作流的访问权限
  • 定期审查处理日志
  • 启用双因素认证保护Gmail账户

性能优化

  • 使用增量处理减少重复工作
  • 缓存频繁访问的数据
  • 并行处理多个邮件分类任务
  • 监控系统资源使用情况

故障排除

常见问题

邮件未被正确分类

检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。

Gmail认证失败

确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。

调试技巧

  • 启用详细日志记录查看每个步骤的执行情况
  • 使用测试邮件验证分类逻辑
  • 检查网络连接和API服务状态
  • 逐步执行工作流定位问题节点

错误处理

工作流包含以下错误处理机制:

  • 网络超时自动重试(最多3次)
  • API错误记录和告警
  • 处理失败邮件的隔离机制
  • 异常情况下的回滚操作