Skip to content

Commit 037f042

Browse files
committed
feat(ADO-3519): use inline versioning and not separate folders
1 parent 7fcc7f5 commit 037f042

File tree

8 files changed

+566
-1037
lines changed

8 files changed

+566
-1037
lines changed
Lines changed: 352 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,359 @@
1-
import type { INodeTypeBaseDescription, IVersionedNodeType } from 'n8n-workflow';
2-
import { VersionedNodeType } from 'n8n-workflow';
3-
4-
import { DocumentDefaultDataLoaderV1 } from './V1/DocumentDefaultDataLoaderV1.node';
5-
import { DocumentDefaultDataLoaderV2 } from './V2/DocumentDefaultDataLoaderV2.node';
6-
7-
export class DocumentDefaultDataLoader extends VersionedNodeType {
8-
constructor() {
9-
const baseDescription: INodeTypeBaseDescription = {
10-
displayName: 'Default Data Loader',
11-
name: 'documentDefaultDataLoader',
12-
icon: 'file:binary.svg',
13-
group: ['transform'],
14-
description: 'Load data from previous step in the workflow',
15-
codex: {
16-
categories: ['AI'],
17-
subcategories: {
18-
AI: ['Document Loaders'],
1+
/* eslint-disable n8n-nodes-base/node-dirname-against-convention */
2+
import { RecursiveCharacterTextSplitter, type TextSplitter } from '@langchain/textsplitters';
3+
import {
4+
NodeConnectionTypes,
5+
type INodeType,
6+
type INodeTypeDescription,
7+
type ISupplyDataFunctions,
8+
type SupplyData,
9+
type IDataObject,
10+
type INodeInputConfiguration,
11+
} from 'n8n-workflow';
12+
13+
import { logWrapper } from '@utils/logWrapper';
14+
import { N8nBinaryLoader } from '@utils/N8nBinaryLoader';
15+
import { N8nJsonLoader } from '@utils/N8nJsonLoader';
16+
import { metadataFilterField } from '@utils/sharedFields';
17+
18+
// Dependencies needed underneath the hood for the loaders. We add them
19+
// here only to track where what dependency is sued
20+
// import 'd3-dsv'; // for csv
21+
import 'mammoth'; // for docx
22+
import 'epub2'; // for epub
23+
import 'pdf-parse'; // for pdf
24+
25+
function getInputs(parameters: IDataObject) {
26+
const inputs: INodeInputConfiguration[] = [];
27+
28+
const textSplittingMode = parameters?.textSplittingMode;
29+
// If text splitting mode is 'custom' or does not exist (v1), we need to add an input for the text splitter
30+
if (!textSplittingMode || textSplittingMode === 'custom') {
31+
inputs.push({
32+
displayName: 'Text Splitter',
33+
maxConnections: 1,
34+
type: 'ai_textSplitter',
35+
required: true,
36+
});
37+
}
38+
39+
return inputs;
40+
}
41+
42+
export class DocumentDefaultDataLoader implements INodeType {
43+
description: INodeTypeDescription = {
44+
displayName: 'Default Data Loader',
45+
name: 'documentDefaultDataLoader',
46+
icon: 'file:binary.svg',
47+
group: ['transform'],
48+
version: [1, 1.1],
49+
defaultVersion: 1.1,
50+
description: 'Load data from previous step in the workflow',
51+
defaults: {
52+
name: 'Default Data Loader',
53+
},
54+
codex: {
55+
categories: ['AI'],
56+
subcategories: {
57+
AI: ['Document Loaders'],
58+
},
59+
resources: {
60+
primaryDocumentation: [
61+
{
62+
url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.documentdefaultdataloader/',
63+
},
64+
],
65+
},
66+
},
67+
// eslint-disable-next-line n8n-nodes-base/node-class-description-inputs-wrong-regular-node
68+
inputs: `={{ ((parameter) => { ${getInputs.toString()}; return getInputs(parameter) })($parameter) }}`,
69+
// eslint-disable-next-line n8n-nodes-base/node-class-description-outputs-wrong
70+
outputs: [NodeConnectionTypes.AiDocument],
71+
outputNames: ['Document'],
72+
properties: [
73+
{
74+
displayName:
75+
'This will load data from a previous step in the workflow. <a href="/templates/1962" target="_blank">Example</a>',
76+
name: 'notice',
77+
type: 'notice',
78+
default: '',
79+
},
80+
{
81+
displayName: 'Text Splitting',
82+
name: 'textSplittingMode',
83+
type: 'options',
84+
default: 'simple',
85+
required: true,
86+
noDataExpression: true,
87+
displayOptions: {
88+
show: {
89+
'@version': [1.2],
90+
},
1991
},
20-
resources: {
21-
primaryDocumentation: [
22-
{
23-
url: 'https://docs.n8n.io/integrations/builtin/cluster-nodes/sub-nodes/n8n-nodes-langchain.documentdefaultdataloader/',
24-
},
25-
],
92+
options: [
93+
{
94+
name: 'Simple',
95+
value: 'simple',
96+
description: 'Uses Recursive Character Text Splitter with default options',
97+
},
98+
{
99+
name: 'Custom',
100+
value: 'custom',
101+
description: 'Connect a text splitter of your choice',
102+
},
103+
],
104+
},
105+
{
106+
displayName: 'Type of Data',
107+
name: 'dataType',
108+
type: 'options',
109+
default: 'json',
110+
required: true,
111+
noDataExpression: true,
112+
options: [
113+
{
114+
name: 'JSON',
115+
value: 'json',
116+
description: 'Process JSON data from previous step in the workflow',
117+
},
118+
{
119+
name: 'Binary',
120+
value: 'binary',
121+
description: 'Process binary data from previous step in the workflow',
122+
},
123+
],
124+
},
125+
{
126+
displayName: 'Mode',
127+
name: 'jsonMode',
128+
type: 'options',
129+
default: 'allInputData',
130+
required: true,
131+
displayOptions: {
132+
show: {
133+
dataType: ['json'],
134+
},
26135
},
136+
options: [
137+
{
138+
name: 'Load All Input Data',
139+
value: 'allInputData',
140+
description: 'Use all JSON data that flows into the parent agent or chain',
141+
},
142+
{
143+
name: 'Load Specific Data',
144+
value: 'expressionData',
145+
description:
146+
'Load a subset of data, and/or data from any previous step in the workflow',
147+
},
148+
],
27149
},
28-
defaultVersion: 2,
29-
};
150+
{
151+
displayName: 'Mode',
152+
name: 'binaryMode',
153+
type: 'options',
154+
default: 'allInputData',
155+
required: true,
156+
displayOptions: {
157+
show: {
158+
dataType: ['binary'],
159+
},
160+
},
161+
options: [
162+
{
163+
name: 'Load All Input Data',
164+
value: 'allInputData',
165+
description: 'Use all Binary data that flows into the parent agent or chain',
166+
},
167+
{
168+
name: 'Load Specific Data',
169+
value: 'specificField',
170+
description: 'Load data from a specific field in the parent agent or chain',
171+
},
172+
],
173+
},
174+
{
175+
displayName: 'Data Format',
176+
name: 'loader',
177+
type: 'options',
178+
default: 'auto',
179+
required: true,
180+
displayOptions: {
181+
show: {
182+
dataType: ['binary'],
183+
},
184+
},
185+
options: [
186+
{
187+
name: 'Automatically Detect by Mime Type',
188+
value: 'auto',
189+
description: 'Uses the mime type to detect the format',
190+
},
191+
{
192+
name: 'CSV',
193+
value: 'csvLoader',
194+
description: 'Load CSV files',
195+
},
196+
{
197+
name: 'Docx',
198+
value: 'docxLoader',
199+
description: 'Load Docx documents',
200+
},
201+
{
202+
name: 'EPub',
203+
value: 'epubLoader',
204+
description: 'Load EPub files',
205+
},
206+
{
207+
name: 'JSON',
208+
value: 'jsonLoader',
209+
description: 'Load JSON files',
210+
},
211+
{
212+
name: 'PDF',
213+
value: 'pdfLoader',
214+
description: 'Load PDF documents',
215+
},
216+
{
217+
name: 'Text',
218+
value: 'textLoader',
219+
description: 'Load plain text files',
220+
},
221+
],
222+
},
223+
{
224+
displayName: 'Data',
225+
name: 'jsonData',
226+
type: 'string',
227+
typeOptions: {
228+
rows: 6,
229+
},
230+
default: '',
231+
required: true,
232+
description: 'Drag and drop fields from the input pane, or use an expression',
233+
displayOptions: {
234+
show: {
235+
dataType: ['json'],
236+
jsonMode: ['expressionData'],
237+
},
238+
},
239+
},
240+
{
241+
displayName: 'Input Data Field Name',
242+
name: 'binaryDataKey',
243+
type: 'string',
244+
default: 'data',
245+
required: true,
246+
description:
247+
'The name of the field in the agent or chain’s input that contains the binary file to be processed',
248+
displayOptions: {
249+
show: {
250+
dataType: ['binary'],
251+
},
252+
hide: {
253+
binaryMode: ['allInputData'],
254+
},
255+
},
256+
},
257+
{
258+
displayName: 'Options',
259+
name: 'options',
260+
type: 'collection',
261+
placeholder: 'Add Option',
262+
default: {},
263+
options: [
264+
{
265+
displayName: 'JSON Pointers',
266+
name: 'pointers',
267+
type: 'string',
268+
default: '',
269+
description: 'Pointers to extract from JSON, e.g. "/text" or "/text, /meta/title"',
270+
displayOptions: {
271+
show: {
272+
'/loader': ['jsonLoader', 'auto'],
273+
},
274+
},
275+
},
276+
{
277+
displayName: 'CSV Separator',
278+
name: 'separator',
279+
type: 'string',
280+
description: 'Separator to use for CSV',
281+
default: ',',
282+
displayOptions: {
283+
show: {
284+
'/loader': ['csvLoader', 'auto'],
285+
},
286+
},
287+
},
288+
{
289+
displayName: 'CSV Column',
290+
name: 'column',
291+
type: 'string',
292+
default: '',
293+
description: 'Column to extract from CSV',
294+
displayOptions: {
295+
show: {
296+
'/loader': ['csvLoader', 'auto'],
297+
},
298+
},
299+
},
300+
{
301+
displayName: 'Split Pages in PDF',
302+
description: 'Whether to split PDF pages into separate documents',
303+
name: 'splitPages',
304+
type: 'boolean',
305+
default: true,
306+
displayOptions: {
307+
show: {
308+
'/loader': ['pdfLoader', 'auto'],
309+
},
310+
},
311+
},
312+
{
313+
...metadataFilterField,
314+
displayName: 'Metadata',
315+
description:
316+
'Metadata to add to each document. Could be used for filtering during retrieval',
317+
placeholder: 'Add property',
318+
},
319+
],
320+
},
321+
],
322+
};
30323

31-
const nodeVersions: IVersionedNodeType['nodeVersions'] = {
32-
1: new DocumentDefaultDataLoaderV1(baseDescription),
33-
2: new DocumentDefaultDataLoaderV2(baseDescription),
34-
};
324+
async supplyData(this: ISupplyDataFunctions, itemIndex: number): Promise<SupplyData> {
325+
const node = this.getNode();
326+
const dataType = this.getNodeParameter('dataType', itemIndex, 'json') as 'json' | 'binary';
327+
328+
let textSplitter: TextSplitter | undefined;
329+
330+
if (node.typeVersion === 1.1) {
331+
const textSplittingMode = this.getNodeParameter('textSplittingMode', itemIndex, 'simple') as
332+
| 'simple'
333+
| 'custom';
35334

36-
super(nodeVersions, baseDescription);
335+
if (textSplittingMode === 'simple') {
336+
textSplitter = new RecursiveCharacterTextSplitter({ chunkSize: 1000, chunkOverlap: 200 });
337+
} else if (textSplittingMode === 'custom') {
338+
textSplitter = (await this.getInputConnectionData(NodeConnectionTypes.AiTextSplitter, 0)) as
339+
| TextSplitter
340+
| undefined;
341+
}
342+
} else {
343+
textSplitter = (await this.getInputConnectionData(NodeConnectionTypes.AiTextSplitter, 0)) as
344+
| TextSplitter
345+
| undefined;
346+
}
347+
348+
const binaryDataKey = this.getNodeParameter('binaryDataKey', itemIndex, '') as string;
349+
350+
const processor =
351+
dataType === 'binary'
352+
? new N8nBinaryLoader(this, 'options.', binaryDataKey, textSplitter)
353+
: new N8nJsonLoader(this, 'options.', textSplitter);
354+
355+
return {
356+
response: logWrapper(processor, this),
357+
};
37358
}
38359
}

0 commit comments

Comments
 (0)