|
1 |
| -import type { INodeTypeBaseDescription, IVersionedNodeType } from 'n8n-workflow'; |
2 |
| -import { VersionedNodeType } from 'n8n-workflow'; |
3 |
| - |
4 |
| -import { DocumentDefaultDataLoaderV1 } from './V1/DocumentDefaultDataLoaderV1.node'; |
5 |
| -import { DocumentDefaultDataLoaderV2 } from './V2/DocumentDefaultDataLoaderV2.node'; |
6 |
| - |
7 |
| -export class DocumentDefaultDataLoader extends VersionedNodeType { |
8 |
| - constructor() { |
9 |
| - const baseDescription: INodeTypeBaseDescription = { |
10 |
| - displayName: 'Default Data Loader', |
11 |
| - name: 'documentDefaultDataLoader', |
12 |
| - icon: 'file:binary.svg', |
13 |
| - group: ['transform'], |
14 |
| - description: 'Load data from previous step in the workflow', |
15 |
| - codex: { |
16 |
| - categories: ['AI'], |
17 |
| - subcategories: { |
18 |
| - AI: ['Document Loaders'], |
| 1 | +/* eslint-disable n8n-nodes-base/node-dirname-against-convention */ |
| 2 | +import { RecursiveCharacterTextSplitter, type TextSplitter } from '@langchain/textsplitters'; |
| 3 | +import { |
| 4 | + NodeConnectionTypes, |
| 5 | + type INodeType, |
| 6 | + type INodeTypeDescription, |
| 7 | + type ISupplyDataFunctions, |
| 8 | + type SupplyData, |
| 9 | + type IDataObject, |
| 10 | + type INodeInputConfiguration, |
| 11 | +} from 'n8n-workflow'; |
| 12 | + |
| 13 | +import { logWrapper } from '@utils/logWrapper'; |
| 14 | +import { N8nBinaryLoader } from '@utils/N8nBinaryLoader'; |
| 15 | +import { N8nJsonLoader } from '@utils/N8nJsonLoader'; |
| 16 | +import { metadataFilterField } from '@utils/sharedFields'; |
| 17 | + |
| 18 | +// Dependencies needed underneath the hood for the loaders. We add them |
| 19 | +// here only to track where what dependency is sued |
| 20 | +// import 'd3-dsv'; // for csv |
| 21 | +import 'mammoth'; // for docx |
| 22 | +import 'epub2'; // for epub |
| 23 | +import 'pdf-parse'; // for pdf |
| 24 | + |
| 25 | +function getInputs(parameters: IDataObject) { |
| 26 | + const inputs: INodeInputConfiguration[] = []; |
| 27 | + |
| 28 | + const textSplittingMode = parameters?.textSplittingMode; |
| 29 | + // If text splitting mode is 'custom' or does not exist (v1), we need to add an input for the text splitter |
| 30 | + if (!textSplittingMode || textSplittingMode === 'custom') { |
| 31 | + inputs.push({ |
| 32 | + displayName: 'Text Splitter', |
| 33 | + maxConnections: 1, |
| 34 | + type: 'ai_textSplitter', |
| 35 | + required: true, |
| 36 | + }); |
| 37 | + } |
| 38 | + |
| 39 | + return inputs; |
| 40 | +} |
| 41 | + |
| 42 | +export class DocumentDefaultDataLoader implements INodeType { |
| 43 | + description: INodeTypeDescription = { |
| 44 | + displayName: 'Default Data Loader', |
| 45 | + name: 'documentDefaultDataLoader', |
| 46 | + icon: 'file:binary.svg', |
| 47 | + group: ['transform'], |
| 48 | + version: [1, 1.1], |
| 49 | + defaultVersion: 1.1, |
| 50 | + description: 'Load data from previous step in the workflow', |
| 51 | + defaults: { |
| 52 | + name: 'Default Data Loader', |
| 53 | + }, |
| 54 | + codex: { |
| 55 | + categories: ['AI'], |
| 56 | + subcategories: { |
| 57 | + AI: ['Document Loaders'], |
| 58 | + }, |
| 59 | + resources: { |
| 60 | + primaryDocumentation: [ |
| 61 | + { |
| 62 | + url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.documentdefaultdataloader/', |
| 63 | + }, |
| 64 | + ], |
| 65 | + }, |
| 66 | + }, |
| 67 | + // eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node |
| 68 | + inputs: `={{ ((parameter) => { ${getInputs.toString()}; return getInputs(parameter) })($parameter) }}`, |
| 69 | + // eslint-disable-next-line n8n-nodes-base/node-class-description-outputs-wrong |
| 70 | + outputs: [NodeConnectionTypes.AiDocument], |
| 71 | + outputNames: ['Document'], |
| 72 | + properties: [ |
| 73 | + { |
| 74 | + displayName: |
| 75 | + 'This will load data from a previous step in the workflow. <a href="/templates/1962" target="_blank">Example</a>', |
| 76 | + name: 'notice', |
| 77 | + type: 'notice', |
| 78 | + default: '', |
| 79 | + }, |
| 80 | + { |
| 81 | + displayName: 'Text Splitting', |
| 82 | + name: 'textSplittingMode', |
| 83 | + type: 'options', |
| 84 | + default: 'simple', |
| 85 | + required: true, |
| 86 | + noDataExpression: true, |
| 87 | + displayOptions: { |
| 88 | + show: { |
| 89 | + '@version': [1.2], |
| 90 | + }, |
19 | 91 | },
|
20 |
| - resources: { |
21 |
| - primaryDocumentation: [ |
22 |
| - { |
23 |
| - url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.documentdefaultdataloader/', |
24 |
| - }, |
25 |
| - ], |
| 92 | + options: [ |
| 93 | + { |
| 94 | + name: 'Simple', |
| 95 | + value: 'simple', |
| 96 | + description: 'Uses Recursive Character Text Splitter with default options', |
| 97 | + }, |
| 98 | + { |
| 99 | + name: 'Custom', |
| 100 | + value: 'custom', |
| 101 | + description: 'Connect a text splitter of your choice', |
| 102 | + }, |
| 103 | + ], |
| 104 | + }, |
| 105 | + { |
| 106 | + displayName: 'Type of Data', |
| 107 | + name: 'dataType', |
| 108 | + type: 'options', |
| 109 | + default: 'json', |
| 110 | + required: true, |
| 111 | + noDataExpression: true, |
| 112 | + options: [ |
| 113 | + { |
| 114 | + name: 'JSON', |
| 115 | + value: 'json', |
| 116 | + description: 'Process JSON data from previous step in the workflow', |
| 117 | + }, |
| 118 | + { |
| 119 | + name: 'Binary', |
| 120 | + value: 'binary', |
| 121 | + description: 'Process binary data from previous step in the workflow', |
| 122 | + }, |
| 123 | + ], |
| 124 | + }, |
| 125 | + { |
| 126 | + displayName: 'Mode', |
| 127 | + name: 'jsonMode', |
| 128 | + type: 'options', |
| 129 | + default: 'allInputData', |
| 130 | + required: true, |
| 131 | + displayOptions: { |
| 132 | + show: { |
| 133 | + dataType: ['json'], |
| 134 | + }, |
26 | 135 | },
|
| 136 | + options: [ |
| 137 | + { |
| 138 | + name: 'Load All Input Data', |
| 139 | + value: 'allInputData', |
| 140 | + description: 'Use all JSON data that flows into the parent agent or chain', |
| 141 | + }, |
| 142 | + { |
| 143 | + name: 'Load Specific Data', |
| 144 | + value: 'expressionData', |
| 145 | + description: |
| 146 | + 'Load a subset of data, and/or data from any previous step in the workflow', |
| 147 | + }, |
| 148 | + ], |
27 | 149 | },
|
28 |
| - defaultVersion: 2, |
29 |
| - }; |
| 150 | + { |
| 151 | + displayName: 'Mode', |
| 152 | + name: 'binaryMode', |
| 153 | + type: 'options', |
| 154 | + default: 'allInputData', |
| 155 | + required: true, |
| 156 | + displayOptions: { |
| 157 | + show: { |
| 158 | + dataType: ['binary'], |
| 159 | + }, |
| 160 | + }, |
| 161 | + options: [ |
| 162 | + { |
| 163 | + name: 'Load All Input Data', |
| 164 | + value: 'allInputData', |
| 165 | + description: 'Use all Binary data that flows into the parent agent or chain', |
| 166 | + }, |
| 167 | + { |
| 168 | + name: 'Load Specific Data', |
| 169 | + value: 'specificField', |
| 170 | + description: 'Load data from a specific field in the parent agent or chain', |
| 171 | + }, |
| 172 | + ], |
| 173 | + }, |
| 174 | + { |
| 175 | + displayName: 'Data Format', |
| 176 | + name: 'loader', |
| 177 | + type: 'options', |
| 178 | + default: 'auto', |
| 179 | + required: true, |
| 180 | + displayOptions: { |
| 181 | + show: { |
| 182 | + dataType: ['binary'], |
| 183 | + }, |
| 184 | + }, |
| 185 | + options: [ |
| 186 | + { |
| 187 | + name: 'Automatically Detect by Mime Type', |
| 188 | + value: 'auto', |
| 189 | + description: 'Uses the mime type to detect the format', |
| 190 | + }, |
| 191 | + { |
| 192 | + name: 'CSV', |
| 193 | + value: 'csvLoader', |
| 194 | + description: 'Load CSV files', |
| 195 | + }, |
| 196 | + { |
| 197 | + name: 'Docx', |
| 198 | + value: 'docxLoader', |
| 199 | + description: 'Load Docx documents', |
| 200 | + }, |
| 201 | + { |
| 202 | + name: 'EPub', |
| 203 | + value: 'epubLoader', |
| 204 | + description: 'Load EPub files', |
| 205 | + }, |
| 206 | + { |
| 207 | + name: 'JSON', |
| 208 | + value: 'jsonLoader', |
| 209 | + description: 'Load JSON files', |
| 210 | + }, |
| 211 | + { |
| 212 | + name: 'PDF', |
| 213 | + value: 'pdfLoader', |
| 214 | + description: 'Load PDF documents', |
| 215 | + }, |
| 216 | + { |
| 217 | + name: 'Text', |
| 218 | + value: 'textLoader', |
| 219 | + description: 'Load plain text files', |
| 220 | + }, |
| 221 | + ], |
| 222 | + }, |
| 223 | + { |
| 224 | + displayName: 'Data', |
| 225 | + name: 'jsonData', |
| 226 | + type: 'string', |
| 227 | + typeOptions: { |
| 228 | + rows: 6, |
| 229 | + }, |
| 230 | + default: '', |
| 231 | + required: true, |
| 232 | + description: 'Drag and drop fields from the input pane, or use an expression', |
| 233 | + displayOptions: { |
| 234 | + show: { |
| 235 | + dataType: ['json'], |
| 236 | + jsonMode: ['expressionData'], |
| 237 | + }, |
| 238 | + }, |
| 239 | + }, |
| 240 | + { |
| 241 | + displayName: 'Input Data Field Name', |
| 242 | + name: 'binaryDataKey', |
| 243 | + type: 'string', |
| 244 | + default: 'data', |
| 245 | + required: true, |
| 246 | + description: |
| 247 | + 'The name of the field in the agent or chain’s input that contains the binary file to be processed', |
| 248 | + displayOptions: { |
| 249 | + show: { |
| 250 | + dataType: ['binary'], |
| 251 | + }, |
| 252 | + hide: { |
| 253 | + binaryMode: ['allInputData'], |
| 254 | + }, |
| 255 | + }, |
| 256 | + }, |
| 257 | + { |
| 258 | + displayName: 'Options', |
| 259 | + name: 'options', |
| 260 | + type: 'collection', |
| 261 | + placeholder: 'Add Option', |
| 262 | + default: {}, |
| 263 | + options: [ |
| 264 | + { |
| 265 | + displayName: 'JSON Pointers', |
| 266 | + name: 'pointers', |
| 267 | + type: 'string', |
| 268 | + default: '', |
| 269 | + description: 'Pointers to extract from JSON, e.g. "/text" or "/text, /meta/title"', |
| 270 | + displayOptions: { |
| 271 | + show: { |
| 272 | + '/loader': ['jsonLoader', 'auto'], |
| 273 | + }, |
| 274 | + }, |
| 275 | + }, |
| 276 | + { |
| 277 | + displayName: 'CSV Separator', |
| 278 | + name: 'separator', |
| 279 | + type: 'string', |
| 280 | + description: 'Separator to use for CSV', |
| 281 | + default: ',', |
| 282 | + displayOptions: { |
| 283 | + show: { |
| 284 | + '/loader': ['csvLoader', 'auto'], |
| 285 | + }, |
| 286 | + }, |
| 287 | + }, |
| 288 | + { |
| 289 | + displayName: 'CSV Column', |
| 290 | + name: 'column', |
| 291 | + type: 'string', |
| 292 | + default: '', |
| 293 | + description: 'Column to extract from CSV', |
| 294 | + displayOptions: { |
| 295 | + show: { |
| 296 | + '/loader': ['csvLoader', 'auto'], |
| 297 | + }, |
| 298 | + }, |
| 299 | + }, |
| 300 | + { |
| 301 | + displayName: 'Split Pages in PDF', |
| 302 | + description: 'Whether to split PDF pages into separate documents', |
| 303 | + name: 'splitPages', |
| 304 | + type: 'boolean', |
| 305 | + default: true, |
| 306 | + displayOptions: { |
| 307 | + show: { |
| 308 | + '/loader': ['pdfLoader', 'auto'], |
| 309 | + }, |
| 310 | + }, |
| 311 | + }, |
| 312 | + { |
| 313 | + ...metadataFilterField, |
| 314 | + displayName: 'Metadata', |
| 315 | + description: |
| 316 | + 'Metadata to add to each document. Could be used for filtering during retrieval', |
| 317 | + placeholder: 'Add property', |
| 318 | + }, |
| 319 | + ], |
| 320 | + }, |
| 321 | + ], |
| 322 | + }; |
30 | 323 |
|
31 |
| - const nodeVersions: IVersionedNodeType['nodeVersions'] = { |
32 |
| - 1: new DocumentDefaultDataLoaderV1(baseDescription), |
33 |
| - 2: new DocumentDefaultDataLoaderV2(baseDescription), |
34 |
| - }; |
| 324 | + async supplyData(this: ISupplyDataFunctions, itemIndex: number): Promise<SupplyData> { |
| 325 | + const node = this.getNode(); |
| 326 | + const dataType = this.getNodeParameter('dataType', itemIndex, 'json') as 'json' | 'binary'; |
| 327 | + |
| 328 | + let textSplitter: TextSplitter | undefined; |
| 329 | + |
| 330 | + if (node.typeVersion === 1.1) { |
| 331 | + const textSplittingMode = this.getNodeParameter('textSplittingMode', itemIndex, 'simple') as |
| 332 | + | 'simple' |
| 333 | + | 'custom'; |
35 | 334 |
|
36 |
| - super(nodeVersions, baseDescription); |
| 335 | + if (textSplittingMode === 'simple') { |
| 336 | + textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 }); |
| 337 | + } else if (textSplittingMode === 'custom') { |
| 338 | + textSplitter = (await this.getInputConnectionData(NodeConnectionTypes.AiTextSplitter, 0)) as |
| 339 | + | TextSplitter |
| 340 | + | undefined; |
| 341 | + } |
| 342 | + } else { |
| 343 | + textSplitter = (await this.getInputConnectionData(NodeConnectionTypes.AiTextSplitter, 0)) as |
| 344 | + | TextSplitter |
| 345 | + | undefined; |
| 346 | + } |
| 347 | + |
| 348 | + const binaryDataKey = this.getNodeParameter('binaryDataKey', itemIndex, '') as string; |
| 349 | + |
| 350 | + const processor = |
| 351 | + dataType === 'binary' |
| 352 | + ? new N8nBinaryLoader(this, 'options.', binaryDataKey, textSplitter) |
| 353 | + : new N8nJsonLoader(this, 'options.', textSplitter); |
| 354 | + |
| 355 | + return { |
| 356 | + response: logWrapper(processor, this), |
| 357 | + }; |
37 | 358 | }
|
38 | 359 | }
|
0 commit comments