Gmail to Vector Embeddings with PGVector and Ollama
工作流概述
这是一个包含20个节点的复杂工作流,主要用于自动化处理各种任务。
工作流源代码
{
"id": "ZiIoKEClTk83g1Jt",
"meta": {
"instanceId": "8a3ba313628b26e4e4cf0504ff23322f235d6b433d92e59bcf8762764730ed80",
"templateCredsSetupCompleted": true
},
"name": "Gmail to Vector Embeddings with PGVector and Ollama",
"tags": [],
"nodes": [
{
"id": "162b1a8b-2471-4880-9fcb-7f2dcfe175a8",
"name": "Embeddings Ollama",
"type": "@n8n/n8n-nodes-langchain.embeddingsOllama",
"position": [
1920,
-100
],
"parameters": {
"model": "nomic-embed-text:latest"
},
"credentials": {},
"typeVersion": 1
},
{
"id": "49eb04b0-3b54-499c-ba46-3251102a4017",
"name": "Default Data Loader",
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"position": [
2040,
-97.5
],
"parameters": {
"options": {
"metadata": {
"metadataValues": [
{
"name": "emails_metadata.id",
"value": "={{ $('Extract email fields').item.json.email_id }}"
},
{
"name": "emails_metadata.thread_id",
"value": "={{ $('Extract email fields').item.json.thread_id }}"
}
]
}
},
"jsonData": "={{ $('Extract email fields').item.json.email_text }}",
"jsonMode": "expressionData"
},
"typeVersion": 1
},
{
"id": "b4853472-6ac7-4da5-97b3-b22950ddff06",
"name": "Recursive Character Text Splitter",
"type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
"position": [
2128,
100
],
"parameters": {
"options": {},
"chunkSize": 2000,
"chunkOverlap": 50
},
"typeVersion": 1
},
{
"id": "b189f134-f78e-438f-9189-2f2b276b487d",
"name": "Gmail Trigger",
"type": "n8n-nodes-base.gmailTrigger",
"position": [
1260,
280
],
"parameters": {
"simple": false,
"filters": {
"labelIds": [
"INBOX"
]
},
"options": {
"downloadAttachments": true
},
"pollTimes": {
"item": [
{
"mode": "everyMinute"
}
]
}
},
"credentials": {},
"typeVersion": 1.2
},
{
"id": "81cba4c5-7762-483d-a076-3fa8799f70ce",
"name": "Loop Over Items",
"type": "n8n-nodes-base.splitInBatches",
"position": [
840,
40
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "f82243ad-6efd-4be2-bf4e-5001870ae854",
"name": "Split Out",
"type": "n8n-nodes-base.splitOut",
"position": [
640,
40
],
"parameters": {
"options": {
"destinationFieldName": "after"
},
"fieldToSplitOut": "weeks"
},
"typeVersion": 1
},
{
"id": "2163d5ec-416f-4299-8a9d-10c26eaef32f",
"name": "Was manually triggered?",
"type": "n8n-nodes-base.if",
"position": [
2416,
-145
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "3cbc77e7-1796-4e1b-bbff-6391dd131336",
"operator": {
"type": "boolean",
"operation": "false",
"singleValue": true
},
"leftValue": "={{ $('Manual Trigger').isExecuted }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "76557325-c94e-47a9-9384-e6cbea94f67e",
"name": "Manual Trigger",
"type": "n8n-nodes-base.manualTrigger",
"position": [
0,
40
],
"parameters": {},
"typeVersion": 1
},
{
"id": "b9400906-a458-4305-8805-bb6bea17396b",
"name": "No Operation, do nothing",
"type": "n8n-nodes-base.noOp",
"position": [
2636,
-145
],
"parameters": {},
"typeVersion": 1
},
{
"id": "5f0aa7c2-85b3-4585-8c8c-727af27de61c",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
-60,
-540
],
"parameters": {
"color": 6,
"width": 1440,
"height": 780,
"content": "## Bulk e-mail import
Press the `Test workflow` button to run this once, and bulk import of all your e-mail
### IMPORTANT
Specify your Gmail account creation date by editing the code node"
},
"typeVersion": 1
},
{
"id": "2b85d362-d40d-49c0-b3a7-27fdbee8e90b",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
360,
-100
],
"parameters": {
"width": 220,
"height": 300,
"content": "## Edit this ⬇️
And specify your Gmail account creation date"
},
"typeVersion": 1
},
{
"id": "05a9dd25-ae36-4e3c-a249-787ee1047bff",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
740,
260
],
"parameters": {
"color": 4,
"width": 640,
"height": 180,
"content": "## Activate the workflow
And this trigger will check for new mail, every minute"
},
"typeVersion": 1
},
{
"id": "3f19abc5-a165-49e8-b97e-233c47949e68",
"name": "Set before and after dates",
"type": "n8n-nodes-base.set",
"position": [
1040,
-320
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "48e8d703-e52a-46cc-bd72-9b0d3352091b",
"name": "after",
"type": "string",
"value": "={{ $json.after }}"
},
{
"id": "a515cf56-9bc6-4724-a0ef-01a6159606f7",
"name": "before",
"type": "string",
"value": "={{ DateTime.fromISO($json.after).plus(1, 'week').toISODate() }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "af742b17-2086-4698-af3a-32cb7260f380",
"name": "Extract email fields",
"type": "n8n-nodes-base.set",
"position": [
1480,
-320
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "f818bad8-b000-499c-b137-de22dff4a343",
"name": "email_text",
"type": "string",
"value": "={{ $json.text }}"
},
{
"id": "68c16520-4a26-4ea9-95f7-ee89b9f53c4f",
"name": "email_from",
"type": "string",
"value": "={{ $json.from?.text ?? '' }}"
},
{
"id": "981f1f5b-ba2f-4153-966c-45bb6b535794",
"name": "email_to",
"type": "string",
"value": "={{ $json.to?.text ?? '' }}"
},
{
"id": "b528dd23-a743-4a55-98df-e1ae823b29b3",
"name": "date",
"type": "string",
"value": "={{ DateTime.fromISO($json.date).toISO() }}"
},
{
"id": "39081032-e503-470b-8d83-b5064238d037",
"name": "email_id",
"type": "string",
"value": "={{ $json.id }}"
},
{
"id": "146e8e72-3c2c-4320-b93a-b109d2e46139",
"name": "thread_id",
"type": "string",
"value": "={{ $json.threadId }}"
},
{
"id": "a49333a5-c565-4d46-8398-d423072b1e4d",
"name": "email_subject",
"type": "string",
"value": "={{ $json.subject }}"
},
{
"id": "806cf930-450e-4221-8061-a71ec8bf9bbe",
"name": "attachments",
"type": "array",
"value": "={{ Object.keys($binary).map(item => $binary[item].fileName).filter(item => !!item) }}"
},
{
"id": "30a38aaf-04c2-4286-99c9-8bb60ae8b317",
"name": "email_cc",
"type": "string",
"value": "={{ $json.cc?.text ?? ''}}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "a51f5d5f-69c7-4153-be7f-492a8694629a",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
1640,
-540
],
"parameters": {
"width": 720,
"height": 780,
"content": "## Magic here 🪄
#### (not really, just statistics)
E-mail is stored in a `emails_metadata` structured table, and also fed to the [`nomic-embed-text`](https://ollama.com/library/nomic-embed-text) model to be stored in a `emails_embeddings` table as [vector embeddings](https://www.pinecone.io/learn/vector-embeddings/) so similarity searches are possible.
The `email_id` field can be used to make the relation between the structured records and the vector embeddings, as it's stored in their metadata as `emails_metadata.id`.
This is also the case for `thread_id`."
},
"typeVersion": 1
},
{
"id": "809e9269-1275-4c87-8c7f-1840c76f5b22",
"name": "Store structured",
"type": "n8n-nodes-base.postgres",
"onError": "continueErrorOutput",
"position": [
1700,
-320
],
"parameters": {
"table": {
"__rl": true,
"mode": "name",
"value": "emails_metadata"
},
"schema": {
"__rl": true,
"mode": "list",
"value": "public"
},
"columns": {
"value": {},
"schema": [
{
"id": "email_id",
"type": "string",
"display": true,
"removed": false,
"required": true,
"displayName": "email_id",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "thread_id",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "thread_id",
"defaultMatch": false,
"canBeUsedToMatch": false
},
{
"id": "email_from",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "email_from",
"defaultMatch": false,
"canBeUsedToMatch": false
},
{
"id": "email_to",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "email_to",
"defaultMatch": false,
"canBeUsedToMatch": false
},
{
"id": "email_cc",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "email_cc",
"defaultMatch": false,
"canBeUsedToMatch": false
},
{
"id": "date",
"type": "dateTime",
"display": true,
"removed": false,
"required": true,
"displayName": "date",
"defaultMatch": false,
"canBeUsedToMatch": false
},
{
"id": "email_subject",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "email_subject",
"defaultMatch": false,
"canBeUsedToMatch": false
},
{
"id": "attachments",
"type": "array",
"display": true,
"removed": false,
"required": false,
"displayName": "attachments",
"defaultMatch": false,
"canBeUsedToMatch": false
},
{
"id": "email_text",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "email_text",
"defaultMatch": false,
"canBeUsedToMatch": false
}
],
"mappingMode": "autoMapInputData",
"matchingColumns": [
"email_id"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {
"outputColumns": [
"*"
]
},
"operation": "upsert"
},
"credentials": {},
"typeVersion": 2.6
},
{
"id": "1c3dca79-381c-411b-8727-baa297e1ceda",
"name": "Store vectorized",
"type": "@n8n/n8n-nodes-langchain.vectorStorePGVector",
"onError": "continueRegularOutput",
"position": [
1936,
-320
],
"parameters": {
"mode": "insert",
"options": {},
"tableName": "emails_embeddings"
},
"credentials": {},
"typeVersion": 1.1
},
{
"id": "3b7e13b2-73e9-42e7-900c-59611fe5af32",
"name": "Create the table",
"type": "n8n-nodes-base.postgres",
"position": [
200,
40
],
"parameters": {
"query": "CREATE TABLE IF NOT EXISTS public.emails_metadata (
email_id character varying(64) NOT NULL,
thread_id character varying(64),
email_from text,
email_to text,
email_cc text,
date timestamp with time zone NOT NULL,
email_subject text,
email_text text,
attachments text[]
);
",
"options": {},
"operation": "executeQuery"
},
"credentials": {},
"typeVersion": 2.6
},
{
"id": "19c55312-d1da-4d1e-8637-c5b08a9c1a2d",
"name": "Explode interval into weeks",
"type": "n8n-nodes-base.code",
"position": [
420,
40
],
"parameters": {
"mode": "runOnceForEachItem",
"jsCode": "// Edit this
let whenDidICreateMyGmailAccount = DateTime.fromISO('2013-11-01')
// (don't edit further down)
whenDidICreateMyGmailAccount = whenDidICreateMyGmailAccount.set({day: 1})
let now = $now.set({day: 1})
const weeks = []
while (Math.floor(Interval.fromDateTimes(whenDidICreateMyGmailAccount, now).length('weeks')) > -1) {
weeks.push(now.toISODate())
now = now.minus({weeks: 1})
}
return {json: { weeks }};"
},
"typeVersion": 2
},
{
"id": "aed43a77-6d58-41ba-b0b0-fdd3e9fe777a",
"name": "Get a batch of messages",
"type": "n8n-nodes-base.gmail",
"position": [
1260,
-320
],
"webhookId": "bace3678-df5b-4a9c-a1ef-1c219e3fd07b",
"parameters": {
"simple": false,
"filters": {
"receivedAfter": "={{ $json.after }}",
"receivedBefore": "={{ $json.before }}"
},
"options": {
"downloadAttachments": true
},
"operation": "getAll",
"returnAll": true
},
"credentials": {},
"typeVersion": 2.1
}
],
"active": false,
"pinData": {
"Manual Trigger": [
{
"json": {}
}
]
},
"settings": {
"executionOrder": "v1"
},
"versionId": "3c337d42-a3bb-4b71-ac36-deaf0cdf6019",
"connections": {
"Split Out": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Gmail Trigger": {
"main": [
[
{
"node": "Extract email fields",
"type": "main",
"index": 0
}
]
]
},
"Manual Trigger": {
"main": [
[
{
"node": "Create the table",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Items": {
"main": [
[],
[
{
"node": "Set before and after dates",
"type": "main",
"index": 0
}
]
]
},
"Create the table": {
"main": [
[
{
"node": "Explode interval into weeks",
"type": "main",
"index": 0
}
]
]
},
"Store structured": {
"main": [
[
{
"node": "Store vectorized",
"type": "main",
"index": 0
}
],
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Store vectorized": {
"main": [
[
{
"node": "Was manually triggered?",
"type": "main",
"index": 0
}
],
[]
]
},
"Embeddings Ollama": {
"ai_embedding": [
[
{
"node": "Store vectorized",
"type": "ai_embedding",
"index": 0
}
]
]
},
"Default Data Loader": {
"ai_document": [
[
{
"node": "Store vectorized",
"type": "ai_document",
"index": 0
}
]
]
},
"Extract email fields": {
"main": [
[
{
"node": "Store structured",
"type": "main",
"index": 0
}
]
]
},
"Get a batch of messages": {
"main": [
[
{
"node": "Extract email fields",
"type": "main",
"index": 0
}
]
]
},
"Was manually triggered?": {
"main": [
[
{
"node": "No Operation, do nothing",
"type": "main",
"index": 0
}
],
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Set before and after dates": {
"main": [
[
{
"node": "Get a batch of messages",
"type": "main",
"index": 0
}
]
]
},
"Explode interval into weeks": {
"main": [
[
{
"node": "Split Out",
"type": "main",
"index": 0
}
]
]
},
"Recursive Character Text Splitter": {
"ai_textSplitter": [
[
{
"node": "Default Data Loader",
"type": "ai_textSplitter",
"index": 0
}
]
]
}
}
}
功能特点
- 自动检测新邮件
- AI智能内容分析
- 自定义分类规则
- 批量处理能力
- 详细的处理日志
技术分析
节点类型及作用
- @N8N/N8N Nodes Langchain.Embeddingsollama
- @N8N/N8N Nodes Langchain.Documentdefaultdataloader
- @N8N/N8N Nodes Langchain.Textsplitterrecursivecharactertextsplitter
- Gmailtrigger
- Splitinbatches
复杂度评估
配置难度:
维护难度:
扩展性:
实施指南
前置条件
- 有效的Gmail账户
- n8n平台访问权限
- Google API凭证
- AI分类服务订阅
配置步骤
- 在n8n中导入工作流JSON文件
- 配置Gmail节点的认证信息
- 设置AI分类器的API密钥
- 自定义分类规则和标签映射
- 测试工作流执行
- 配置定时触发器(可选)
关键参数
| 参数名称 | 默认值 | 说明 |
|---|---|---|
| maxEmails | 50 | 单次处理的最大邮件数量 |
| confidenceThreshold | 0.8 | 分类置信度阈值 |
| autoLabel | true | 是否自动添加标签 |
最佳实践
优化建议
- 定期更新AI分类模型以提高准确性
- 根据邮件量调整处理批次大小
- 设置合理的分类置信度阈值
- 定期清理过期的分类规则
安全注意事项
- 妥善保管API密钥和认证信息
- 限制工作流的访问权限
- 定期审查处理日志
- 启用双因素认证保护Gmail账户
性能优化
- 使用增量处理减少重复工作
- 缓存频繁访问的数据
- 并行处理多个邮件分类任务
- 监控系统资源使用情况
故障排除
常见问题
邮件未被正确分类
检查AI分类器的置信度阈值设置,适当降低阈值或更新训练数据。
Gmail认证失败
确认Google API凭证有效且具有正确的权限范围,重新进行OAuth授权。
调试技巧
- 启用详细日志记录查看每个步骤的执行情况
- 使用测试邮件验证分类逻辑
- 检查网络连接和API服务状态
- 逐步执行工作流定位问题节点
错误处理
工作流包含以下错误处理机制:
- 网络超时自动重试(最多3次)
- API错误记录和告警
- 处理失败邮件的隔离机制
- 异常情况下的回滚操作