extract_swifts

工作流概述

这是一个包含23个节点的复杂工作流,主要用于自动化处理各种任务。

工作流源代码

下载
{
  "id": "14",
  "name": "extract_swifts",
  "nodes": [
    {
      "name": "On clicking 'execute'",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -140,
        820
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "name": "HTTP Request",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        320,
        820
      ],
      "parameters": {
        "url": "https://www.theswiftcodes.com/browse-by-country/",
        "options": {},
        "responseFormat": "string"
      },
      "typeVersion": 1
    },
    {
      "name": "HTML Extract",
      "type": "n8n-nodes-base.htmlExtract",
      "position": [
        510,
        820
      ],
      "parameters": {
        "options": {},
        "extractionValues": {
          "values": [
            {
              "key": "countries",
              "attribute": "href",
              "cssSelector": "ol > li > a",
              "returnArray": true,
              "returnValue": "attribute"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "name": "SplitInBatches",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        910,
        820
      ],
      "parameters": {
        "options": {
          "reset": false
        },
        "batchSize": 1
      },
      "typeVersion": 1
    },
    {
      "name": "HTTP Request1",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        2250,
        740
      ],
      "parameters": {
        "url": "={{$node[\"Set\"].json[\"url\"]}}",
        "options": {},
        "responseFormat": "file"
      },
      "typeVersion": 1
    },
    {
      "name": "HTML Extract1",
      "type": "n8n-nodes-base.htmlExtract",
      "position": [
        2750,
        590
      ],
      "parameters": {
        "options": {},
        "sourceData": "binary",
        "extractionValues": {
          "values": [
            {
              "key": "next_button",
              "attribute": "href",
              "cssSelector": "span.next > a",
              "returnValue": "attribute"
            },
            {
              "key": "names",
              "cssSelector": "td.table-name",
              "returnArray": true
            },
            {
              "key": "swifts",
              "cssSelector": "td.table-swift",
              "returnArray": true
            },
            {
              "key": "cities",
              "cssSelector": "td.table-city",
              "returnArray": true
            },
            {
              "key": "branches",
              "cssSelector": "td.table-branch",
              "returnArray": true
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "name": "MongoDB1",
      "type": "n8n-nodes-base.mongoDb",
      "position": [
        3280,
        590
      ],
      "parameters": {
        "fields": "iso_code,country,page,name,branch,city,swift_code,createdAt,updatedAt",
        "options": {
          "dateFields": "createdAt,updatedAt"
        },
        "operation": "insert",
        "collection": "swifts.meetup"
      },
      "credentials": {
        "mongoDb": "db-mongo"
      },
      "typeVersion": 1
    },
    {
      "name": "uProc",
      "type": "n8n-nodes-base.uproc",
      "position": [
        1100,
        820
      ],
      "parameters": {
        "tool": "getCountryNormalized",
        "group": "geographic",
        "country": "={{$node[\"SplitInBatches\"].json[\"country\"].replace(/[\/0-9]/g, \"\")}}",
        "additionalOptions": {}
      },
      "credentials": {
        "uprocApi": "uproc-miquel"
      },
      "typeVersion": 1
    },
    {
      "name": "Prepare Documents",
      "type": "n8n-nodes-base.function",
      "position": [
        2930,
        590
      ],
      "parameters": {
        "functionCode": "var newItems = [];

for (i = 0; i < items[0].json.swifts.length; i++) {
  var item = {
    iso_code: $node['uProc'].json.message.code,
    country: $node['SplitInBatches'].json.country.replace(/[-\/0-9]/g, \"\"),
    page: $node['Set Page to Scrape'].json.page,
    name: items[0].json.names[i],
    city: items[0].json.cities[i],
    branch: items[0].json.branches[i],
    swift_code: items[0].json.swifts[i],
    createdAt: new Date(),
    updatedAt: new Date()
  }
  newItems.push({json: item});
}

return newItems;

"
      },
      "typeVersion": 1
    },
    {
      "name": "More Countries",
      "type": "n8n-nodes-base.if",
      "position": [
        2810,
        1100
      ],
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{$node[\"SplitInBatches\"].context[\"noItemsLeft\"] + \"\"}}",
              "value2": "true"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "name": "Set Page to Scrape",
      "type": "n8n-nodes-base.functionItem",
      "position": [
        1290,
        680
      ],
      "parameters": {
        "functionCode": "const staticData = getWorkflowStaticData('global');

item.page = \"\";
if (staticData.page && staticData.page.length) {
  item.page = staticData.page;
} else {
  item.page = $node['SplitInBatches'].json.country;
}
return item;
"
      },
      "typeVersion": 1
    },
    {
      "name": "More Pages",
      "type": "n8n-nodes-base.if",
      "position": [
        3070,
        1020
      ],
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{$json[\"more_pages\"] + \"\"}}",
              "value2": "true"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "name": "Set More Pages",
      "type": "n8n-nodes-base.function",
      "position": [
        3470,
        590
      ],
      "parameters": {
        "functionCode": "var next_page = $node['HTML Extract1'].json.next_button && $node['HTML Extract1'].json.next_button.length ? $node['HTML Extract1'].json.next_button : \"\";
var more_pages = next_page.length > 0;
const staticData = getWorkflowStaticData('global');

//all current items are after date: needs pagination
if (more_pages) {
  staticData.page = next_page;
} else {
  //don't check more items in previous pages;
  delete staticData.page;
}

return [
  {
    json: {
      more_pages: more_pages
    }
  }
];
"
      },
      "typeVersion": 1
    },
    {
      "name": "Set",
      "type": "n8n-nodes-base.set",
      "position": [
        1440,
        680
      ],
      "parameters": {
        "values": {
          "string": [
            {
              "name": "url",
              "value": "=https://www.theswiftcodes.com{{$node[\"Set Page to Scrape\"].json[\"page\"]}}"
            }
          ]
        },
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "name": "Generate filename",
      "type": "n8n-nodes-base.functionItem",
      "position": [
        1600,
        610
      ],
      "parameters": {
        "functionCode": "var generateNameFromUrl = function(url){
    return url.replace(/[^a-z0-9]/gi, \"_\");
}

item.file = generateNameFromUrl(item.url) + \".html\"
return item;"
      },
      "typeVersion": 1
    },
    {
      "name": "Read Binary File",
      "type": "n8n-nodes-base.readBinaryFile",
      "position": [
        1770,
        610
      ],
      "parameters": {
        "filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
      },
      "typeVersion": 1,
      "continueOnFail": true,
      "alwaysOutputData": true
    },
    {
      "name": "File exists?",
      "type": "n8n-nodes-base.if",
      "position": [
        1950,
        610
      ],
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{$node[\"Read Binary File\"].binary.data.mimeType}}",
              "value2": "text/html"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "name": "Write Binary File",
      "type": "n8n-nodes-base.writeBinaryFile",
      "position": [
        2400,
        740
      ],
      "parameters": {
        "fileName": "=/home/node/.cache/scrapper/{{$node[\"Generate filename\"].json[\"file\"]}}",
        "dataPropertyName": "=data"
      },
      "typeVersion": 1
    },
    {
      "name": "Read Binary File1",
      "type": "n8n-nodes-base.readBinaryFile",
      "position": [
        2570,
        590
      ],
      "parameters": {
        "filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
      },
      "typeVersion": 1,
      "continueOnFail": true,
      "alwaysOutputData": true
    },
    {
      "name": "Wait",
      "type": "n8n-nodes-base.function",
      "position": [
        2090,
        740
      ],
      "parameters": {
        "functionCode": "const waitTimeSeconds = 1;

return new Promise((resolve) => {
  setTimeout(() => {
    resolve([]);
  }, waitTimeSeconds * 1000);
});
"
      },
      "typeVersion": 1,
      "continueOnFail": true,
      "alwaysOutputData": true
    },
    {
      "name": "Prepare countries",
      "type": "n8n-nodes-base.function",
      "position": [
        700,
        820
      ],
      "parameters": {
        "functionCode": "return items[0].json.countries.map(function(country) {
  return {
  json: {country: country}
  }
});"
      },
      "typeVersion": 1
    },
    {
      "name": "Create Directory",
      "type": "n8n-nodes-base.executeCommand",
      "position": [
        70,
        820
      ],
      "parameters": {
        "command": "mkdir -p  /home/node/.cache/scrapper/"
      },
      "typeVersion": 1,
      "continueOnFail": true
    },
    {
      "name": "MongoDB",
      "type": "n8n-nodes-base.mongoDb",
      "disabled": true,
      "position": [
        3100,
        520
      ],
      "parameters": {
        "query": "={\"swift_code\": \"{{$json[\"swift_code\"]}}\"}",
        "options": {},
        "collection": "swifts.meetup"
      },
      "credentials": {
        "mongoDb": "db-mongo"
      },
      "executeOnce": false,
      "typeVersion": 1,
      "alwaysOutputData": true
    }
  ],
  "active": false,
  "settings": {},
  "connections": {
    "Set": {
      "main": [
        [
          {
            "node": "Generate filename",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wait": {
      "main": [
        [
          {
            "node": "HTTP Request1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "uProc": {
      "main": [
        [
          {
            "node": "Set Page to Scrape",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "MongoDB": {
      "main": [
        []
      ]
    },
    "MongoDB1": {
      "main": [
        [
          {
            "node": "Set More Pages",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "More Pages": {
      "main": [
        [
          {
            "node": "Set Page to Scrape",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "More Countries",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "File exists?": {
      "main": [
        [
          {
            "node": "Read Binary File1",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Wait",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTML Extract": {
      "main": [
        [
          {
            "node": "Prepare countries",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request": {
      "main": [
        [
          {
            "node": "HTML Extract",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTML Extract1": {
      "main": [
        [
          {
            "node": "Prepare Documents",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP Request1": {
      "main": [
        [
          {
            "node": "Write Binary File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "More Countries": {
      "main": [
        [],
        [
          {
            "node": "SplitInBatches",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set More Pages": {
      "main": [
        [
          {
            "node": "More Pages",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "SplitInBatches": {
      "main": [
        [
          {
            "node": "uProc",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Directory": {
      "main": [
        [
          {
            "node": "HTTP Request",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Read Binary File": {
      "main": [
        [
          {
            "node": "File exists?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate filename": {
      "main": [
        [
          {
            "node": "Read Binary File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Prepare Documents": {
      "main": [
        [
          {
            "node": "MongoDB1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Prepare countries": {
      "main": [
        [
          {
            "node": "SplitInBatches",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Read Binary File1": {
      "main": [
        [
          {
            "node": "HTML Extract1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Write Binary File": {
      "main": [
        [
          {
            "node": "Read Binary File1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Page to Scrape": {
      "main": [
        [
          {
            "node": "Set",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "On clicking 'execute'": {
      "main": [
        [
          {
            "node": "Create Directory",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

功能特点

  • 自动检测新邮件
  • AI智能内容分析
  • 自定义分类规则
  • 批量处理能力
  • 详细的处理日志

技术分析

节点类型及作用

  • Manualtrigger
  • Httprequest
  • Htmlextract
  • Splitinbatches
  • Mongodb

复杂度评估

配置难度:
★★★★☆
维护难度:
★★☆☆☆
扩展性:
★★★★☆

实施指南

前置条件

  • 有效的Gmail账户
  • n8n平台访问权限
  • Google API凭证
  • AI分类服务订阅

配置步骤

  1. 在n8n中导入工作流JSON文件
  2. 配置Gmail节点的认证信息
  3. 设置AI分类器的API密钥
  4. 自定义分类规则和标签映射
  5. 测试工作流执行
  6. 配置定时触发器(可选)

关键参数

参数名称 默认值 说明
maxEmails 50 单次处理的最大邮件数量
confidenceThreshold 0.8 分类置信度阈值
autoLabel true 是否自动添加标签

最佳实践

优化建议

  • 定期更新AI分类模型以提高准确性
  • 根据邮件量调整处理批次大小
  • 设置合理的分类置信度阈值
  • 定期清理过期的分类规则

安全注意事项

  • 妥善保管API密钥和认证信息
  • 限制工作流的访问权限
  • 定期审查处理日志
  • 启用双因素认证保护Gmail账户

性能优化

  • 使用增量处理减少重复工作
  • 缓存频繁访问的数据
  • 并行处理多个邮件分类任务
  • 监控系统资源使用情况

故障排除

常见问题

邮件未被正确分类

检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。

Gmail认证失败

确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。

调试技巧

  • 启用详细日志记录查看每个步骤的执行情况
  • 使用测试邮件验证分类逻辑
  • 检查网络连接和API服务状态
  • 逐步执行工作流定位问题节点

错误处理

工作流包含以下错误处理机制:

  • 网络超时自动重试(最多3次)
  • API错误记录和告警
  • 处理失败邮件的隔离机制
  • 异常情况下的回滚操作