extract_swifts
工作流概述
这是一个包含23个节点的复杂工作流,主要用于自动化处理各种任务。
工作流源代码
{
"id": "14",
"name": "extract_swifts",
"nodes": [
{
"name": "On clicking 'execute'",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-140,
820
],
"parameters": {},
"typeVersion": 1
},
{
"name": "HTTP Request",
"type": "n8n-nodes-base.httpRequest",
"position": [
320,
820
],
"parameters": {
"url": "https://www.theswiftcodes.com/browse-by-country/",
"options": {},
"responseFormat": "string"
},
"typeVersion": 1
},
{
"name": "HTML Extract",
"type": "n8n-nodes-base.htmlExtract",
"position": [
510,
820
],
"parameters": {
"options": {},
"extractionValues": {
"values": [
{
"key": "countries",
"attribute": "href",
"cssSelector": "ol > li > a",
"returnArray": true,
"returnValue": "attribute"
}
]
}
},
"typeVersion": 1
},
{
"name": "SplitInBatches",
"type": "n8n-nodes-base.splitInBatches",
"position": [
910,
820
],
"parameters": {
"options": {
"reset": false
},
"batchSize": 1
},
"typeVersion": 1
},
{
"name": "HTTP Request1",
"type": "n8n-nodes-base.httpRequest",
"position": [
2250,
740
],
"parameters": {
"url": "={{$node[\"Set\"].json[\"url\"]}}",
"options": {},
"responseFormat": "file"
},
"typeVersion": 1
},
{
"name": "HTML Extract1",
"type": "n8n-nodes-base.htmlExtract",
"position": [
2750,
590
],
"parameters": {
"options": {},
"sourceData": "binary",
"extractionValues": {
"values": [
{
"key": "next_button",
"attribute": "href",
"cssSelector": "span.next > a",
"returnValue": "attribute"
},
{
"key": "names",
"cssSelector": "td.table-name",
"returnArray": true
},
{
"key": "swifts",
"cssSelector": "td.table-swift",
"returnArray": true
},
{
"key": "cities",
"cssSelector": "td.table-city",
"returnArray": true
},
{
"key": "branches",
"cssSelector": "td.table-branch",
"returnArray": true
}
]
}
},
"typeVersion": 1
},
{
"name": "MongoDB1",
"type": "n8n-nodes-base.mongoDb",
"position": [
3280,
590
],
"parameters": {
"fields": "iso_code,country,page,name,branch,city,swift_code,createdAt,updatedAt",
"options": {
"dateFields": "createdAt,updatedAt"
},
"operation": "insert",
"collection": "swifts.meetup"
},
"credentials": {
"mongoDb": "db-mongo"
},
"typeVersion": 1
},
{
"name": "uProc",
"type": "n8n-nodes-base.uproc",
"position": [
1100,
820
],
"parameters": {
"tool": "getCountryNormalized",
"group": "geographic",
"country": "={{$node[\"SplitInBatches\"].json[\"country\"].replace(/[\/0-9]/g, \"\")}}",
"additionalOptions": {}
},
"credentials": {
"uprocApi": "uproc-miquel"
},
"typeVersion": 1
},
{
"name": "Prepare Documents",
"type": "n8n-nodes-base.function",
"position": [
2930,
590
],
"parameters": {
"functionCode": "var newItems = [];
for (i = 0; i < items[0].json.swifts.length; i++) {
var item = {
iso_code: $node['uProc'].json.message.code,
country: $node['SplitInBatches'].json.country.replace(/[-\/0-9]/g, \"\"),
page: $node['Set Page to Scrape'].json.page,
name: items[0].json.names[i],
city: items[0].json.cities[i],
branch: items[0].json.branches[i],
swift_code: items[0].json.swifts[i],
createdAt: new Date(),
updatedAt: new Date()
}
newItems.push({json: item});
}
return newItems;
"
},
"typeVersion": 1
},
{
"name": "More Countries",
"type": "n8n-nodes-base.if",
"position": [
2810,
1100
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$node[\"SplitInBatches\"].context[\"noItemsLeft\"] + \"\"}}",
"value2": "true"
}
]
}
},
"typeVersion": 1
},
{
"name": "Set Page to Scrape",
"type": "n8n-nodes-base.functionItem",
"position": [
1290,
680
],
"parameters": {
"functionCode": "const staticData = getWorkflowStaticData('global');
item.page = \"\";
if (staticData.page && staticData.page.length) {
item.page = staticData.page;
} else {
item.page = $node['SplitInBatches'].json.country;
}
return item;
"
},
"typeVersion": 1
},
{
"name": "More Pages",
"type": "n8n-nodes-base.if",
"position": [
3070,
1020
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$json[\"more_pages\"] + \"\"}}",
"value2": "true"
}
]
}
},
"typeVersion": 1
},
{
"name": "Set More Pages",
"type": "n8n-nodes-base.function",
"position": [
3470,
590
],
"parameters": {
"functionCode": "var next_page = $node['HTML Extract1'].json.next_button && $node['HTML Extract1'].json.next_button.length ? $node['HTML Extract1'].json.next_button : \"\";
var more_pages = next_page.length > 0;
const staticData = getWorkflowStaticData('global');
//all current items are after date: needs pagination
if (more_pages) {
staticData.page = next_page;
} else {
//don't check more items in previous pages;
delete staticData.page;
}
return [
{
json: {
more_pages: more_pages
}
}
];
"
},
"typeVersion": 1
},
{
"name": "Set",
"type": "n8n-nodes-base.set",
"position": [
1440,
680
],
"parameters": {
"values": {
"string": [
{
"name": "url",
"value": "=https://www.theswiftcodes.com{{$node[\"Set Page to Scrape\"].json[\"page\"]}}"
}
]
},
"options": {}
},
"typeVersion": 1
},
{
"name": "Generate filename",
"type": "n8n-nodes-base.functionItem",
"position": [
1600,
610
],
"parameters": {
"functionCode": "var generateNameFromUrl = function(url){
return url.replace(/[^a-z0-9]/gi, \"_\");
}
item.file = generateNameFromUrl(item.url) + \".html\"
return item;"
},
"typeVersion": 1
},
{
"name": "Read Binary File",
"type": "n8n-nodes-base.readBinaryFile",
"position": [
1770,
610
],
"parameters": {
"filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
},
"typeVersion": 1,
"continueOnFail": true,
"alwaysOutputData": true
},
{
"name": "File exists?",
"type": "n8n-nodes-base.if",
"position": [
1950,
610
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{$node[\"Read Binary File\"].binary.data.mimeType}}",
"value2": "text/html"
}
]
}
},
"typeVersion": 1
},
{
"name": "Write Binary File",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
2400,
740
],
"parameters": {
"fileName": "=/home/node/.cache/scrapper/{{$node[\"Generate filename\"].json[\"file\"]}}",
"dataPropertyName": "=data"
},
"typeVersion": 1
},
{
"name": "Read Binary File1",
"type": "n8n-nodes-base.readBinaryFile",
"position": [
2570,
590
],
"parameters": {
"filePath": "=/home/node/.cache/scrapper/{{$json[\"file\"]}}"
},
"typeVersion": 1,
"continueOnFail": true,
"alwaysOutputData": true
},
{
"name": "Wait",
"type": "n8n-nodes-base.function",
"position": [
2090,
740
],
"parameters": {
"functionCode": "const waitTimeSeconds = 1;
return new Promise((resolve) => {
setTimeout(() => {
resolve([]);
}, waitTimeSeconds * 1000);
});
"
},
"typeVersion": 1,
"continueOnFail": true,
"alwaysOutputData": true
},
{
"name": "Prepare countries",
"type": "n8n-nodes-base.function",
"position": [
700,
820
],
"parameters": {
"functionCode": "return items[0].json.countries.map(function(country) {
return {
json: {country: country}
}
});"
},
"typeVersion": 1
},
{
"name": "Create Directory",
"type": "n8n-nodes-base.executeCommand",
"position": [
70,
820
],
"parameters": {
"command": "mkdir -p /home/node/.cache/scrapper/"
},
"typeVersion": 1,
"continueOnFail": true
},
{
"name": "MongoDB",
"type": "n8n-nodes-base.mongoDb",
"disabled": true,
"position": [
3100,
520
],
"parameters": {
"query": "={\"swift_code\": \"{{$json[\"swift_code\"]}}\"}",
"options": {},
"collection": "swifts.meetup"
},
"credentials": {
"mongoDb": "db-mongo"
},
"executeOnce": false,
"typeVersion": 1,
"alwaysOutputData": true
}
],
"active": false,
"settings": {},
"connections": {
"Set": {
"main": [
[
{
"node": "Generate filename",
"type": "main",
"index": 0
}
]
]
},
"Wait": {
"main": [
[
{
"node": "HTTP Request1",
"type": "main",
"index": 0
}
]
]
},
"uProc": {
"main": [
[
{
"node": "Set Page to Scrape",
"type": "main",
"index": 0
}
]
]
},
"MongoDB": {
"main": [
[]
]
},
"MongoDB1": {
"main": [
[
{
"node": "Set More Pages",
"type": "main",
"index": 0
}
]
]
},
"More Pages": {
"main": [
[
{
"node": "Set Page to Scrape",
"type": "main",
"index": 0
}
],
[
{
"node": "More Countries",
"type": "main",
"index": 0
}
]
]
},
"File exists?": {
"main": [
[
{
"node": "Read Binary File1",
"type": "main",
"index": 0
}
],
[
{
"node": "Wait",
"type": "main",
"index": 0
}
]
]
},
"HTML Extract": {
"main": [
[
{
"node": "Prepare countries",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request": {
"main": [
[
{
"node": "HTML Extract",
"type": "main",
"index": 0
}
]
]
},
"HTML Extract1": {
"main": [
[
{
"node": "Prepare Documents",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request1": {
"main": [
[
{
"node": "Write Binary File",
"type": "main",
"index": 0
}
]
]
},
"More Countries": {
"main": [
[],
[
{
"node": "SplitInBatches",
"type": "main",
"index": 0
}
]
]
},
"Set More Pages": {
"main": [
[
{
"node": "More Pages",
"type": "main",
"index": 0
}
]
]
},
"SplitInBatches": {
"main": [
[
{
"node": "uProc",
"type": "main",
"index": 0
}
]
]
},
"Create Directory": {
"main": [
[
{
"node": "HTTP Request",
"type": "main",
"index": 0
}
]
]
},
"Read Binary File": {
"main": [
[
{
"node": "File exists?",
"type": "main",
"index": 0
}
]
]
},
"Generate filename": {
"main": [
[
{
"node": "Read Binary File",
"type": "main",
"index": 0
}
]
]
},
"Prepare Documents": {
"main": [
[
{
"node": "MongoDB1",
"type": "main",
"index": 0
}
]
]
},
"Prepare countries": {
"main": [
[
{
"node": "SplitInBatches",
"type": "main",
"index": 0
}
]
]
},
"Read Binary File1": {
"main": [
[
{
"node": "HTML Extract1",
"type": "main",
"index": 0
}
]
]
},
"Write Binary File": {
"main": [
[
{
"node": "Read Binary File1",
"type": "main",
"index": 0
}
]
]
},
"Set Page to Scrape": {
"main": [
[
{
"node": "Set",
"type": "main",
"index": 0
}
]
]
},
"On clicking 'execute'": {
"main": [
[
{
"node": "Create Directory",
"type": "main",
"index": 0
}
]
]
}
}
}
功能特点
- 自动检测新邮件
- AI智能内容分析
- 自定义分类规则
- 批量处理能力
- 详细的处理日志
技术分析
节点类型及作用
- Manualtrigger
- Httprequest
- Htmlextract
- Splitinbatches
- Mongodb
复杂度评估
配置难度:
维护难度:
扩展性:
实施指南
前置条件
- 有效的Gmail账户
- n8n平台访问权限
- Google API凭证
- AI分类服务订阅
配置步骤
- 在n8n中导入工作流JSON文件
- 配置Gmail节点的认证信息
- 设置AI分类器的API密钥
- 自定义分类规则和标签映射
- 测试工作流执行
- 配置定时触发器(可选)
关键参数
| 参数名称 | 默认值 | 说明 |
|---|---|---|
| maxEmails | 50 | 单次处理的最大邮件数量 |
| confidenceThreshold | 0.8 | 分类置信度阈值 |
| autoLabel | true | 是否自动添加标签 |
最佳实践
优化建议
- 定期更新AI分类模型以提高准确性
- 根据邮件量调整处理批次大小
- 设置合理的分类置信度阈值
- 定期清理过期的分类规则
安全注意事项
- 妥善保管API密钥和认证信息
- 限制工作流的访问权限
- 定期审查处理日志
- 启用双因素认证保护Gmail账户
性能优化
- 使用增量处理减少重复工作
- 缓存频繁访问的数据
- 并行处理多个邮件分类任务
- 监控系统资源使用情况
故障排除
常见问题
邮件未被正确分类
检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。
Gmail认证失败
确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。
调试技巧
- 启用详细日志记录查看每个步骤的执行情况
- 使用测试邮件验证分类逻辑
- 检查网络连接和API服务状态
- 逐步执行工作流定位问题节点
错误处理
工作流包含以下错误处理机制:
- 网络超时自动重试(最多3次)
- API错误记录和告警
- 处理失败邮件的隔离机制
- 异常情况下的回滚操作