Flowise

Форк
0
711 строк · 29.5 Кб
1
import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
2
import { DocumentStore } from '../../database/entities/DocumentStore'
3
// @ts-ignore
4
import {
5
    addFileToStorage,
6
    getFileFromStorage,
7
    ICommonObject,
8
    IDocument,
9
    removeFilesFromStorage,
10
    removeSpecificFileFromStorage
11
} from 'flowise-components'
12
import {
13
    DocumentStoreStatus,
14
    IDocumentStoreFileChunkPagedResponse,
15
    IDocumentStoreLoader,
16
    IDocumentStoreLoaderFile,
17
    IDocumentStoreLoaderForPreview,
18
    IDocumentStoreWhereUsed
19
} from '../../Interface'
20
import { DocumentStoreFileChunk } from '../../database/entities/DocumentStoreFileChunk'
21
import { v4 as uuidv4 } from 'uuid'
22
import { databaseEntities } from '../../utils'
23
import logger from '../../utils/logger'
24
import nodesService from '../nodes'
25
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
26
import { StatusCodes } from 'http-status-codes'
27
import { getErrorMessage } from '../../errors/utils'
28
import { ChatFlow } from '../../database/entities/ChatFlow'
29

30
const DOCUMENT_STORE_BASE_FOLDER = 'docustore'
31

32
const createDocumentStore = async (newDocumentStore: DocumentStore) => {
33
    try {
34
        const appServer = getRunningExpressApp()
35
        const documentStore = appServer.AppDataSource.getRepository(DocumentStore).create(newDocumentStore)
36
        const dbResponse = await appServer.AppDataSource.getRepository(DocumentStore).save(documentStore)
37
        return dbResponse
38
    } catch (error) {
39
        throw new InternalFlowiseError(
40
            StatusCodes.INTERNAL_SERVER_ERROR,
41
            `Error: documentStoreServices.createDocumentStore - ${getErrorMessage(error)}`
42
        )
43
    }
44
}
45

46
const getAllDocumentStores = async () => {
47
    try {
48
        const appServer = getRunningExpressApp()
49
        const entities = await appServer.AppDataSource.getRepository(DocumentStore).find()
50
        return entities
51
    } catch (error) {
52
        throw new InternalFlowiseError(
53
            StatusCodes.INTERNAL_SERVER_ERROR,
54
            `Error: documentStoreServices.getAllDocumentStores - ${getErrorMessage(error)}`
55
        )
56
    }
57
}
58

59
const deleteLoaderFromDocumentStore = async (storeId: string, loaderId: string) => {
60
    try {
61
        const appServer = getRunningExpressApp()
62
        const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
63
            id: storeId
64
        })
65
        if (!entity) {
66
            throw new InternalFlowiseError(
67
                StatusCodes.NOT_FOUND,
68
                `Error: documentStoreServices.deleteLoaderFromDocumentStore - Document store ${storeId} not found`
69
            )
70
        }
71
        const existingLoaders = JSON.parse(entity.loaders)
72
        const found = existingLoaders.find((uFile: IDocumentStoreLoader) => uFile.id === loaderId)
73
        if (found) {
74
            if (found.path) {
75
                //remove the existing files, if any of the file loaders were used.
76
                await removeSpecificFileFromStorage(DOCUMENT_STORE_BASE_FOLDER, entity.id, found.path)
77
            }
78
            const index = existingLoaders.indexOf(found)
79
            if (index > -1) {
80
                existingLoaders.splice(index, 1)
81
            }
82
            // remove the chunks
83
            await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).delete({ docId: found.id })
84

85
            entity.loaders = JSON.stringify(existingLoaders)
86
            const results = await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
87
            return results
88
        } else {
89
            throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Unable to locate loader in Document Store ${entity.name}`)
90
        }
91
    } catch (error) {
92
        throw new InternalFlowiseError(
93
            StatusCodes.INTERNAL_SERVER_ERROR,
94
            `Error: documentStoreServices.deleteLoaderFromDocumentStore - ${getErrorMessage(error)}`
95
        )
96
    }
97
}
98

99
const getDocumentStoreById = async (storeId: string) => {
100
    try {
101
        const appServer = getRunningExpressApp()
102
        const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
103
            id: storeId
104
        })
105
        if (!entity) {
106
            throw new InternalFlowiseError(
107
                StatusCodes.NOT_FOUND,
108
                `Error: documentStoreServices.getDocumentStoreById - Document store ${storeId} not found`
109
            )
110
        }
111
        return entity
112
    } catch (error) {
113
        throw new InternalFlowiseError(
114
            StatusCodes.INTERNAL_SERVER_ERROR,
115
            `Error: documentStoreServices.getDocumentStoreById - ${getErrorMessage(error)}`
116
        )
117
    }
118
}
119

120
const getUsedChatflowNames = async (entity: DocumentStore) => {
121
    try {
122
        const appServer = getRunningExpressApp()
123
        if (entity.whereUsed) {
124
            const whereUsed = JSON.parse(entity.whereUsed)
125
            const updatedWhereUsed: IDocumentStoreWhereUsed[] = []
126
            for (let i = 0; i < whereUsed.length; i++) {
127
                const associatedChatflow = await appServer.AppDataSource.getRepository(ChatFlow).findOne({
128
                    where: { id: whereUsed[i] },
129
                    select: ['id', 'name']
130
                })
131
                if (associatedChatflow) {
132
                    updatedWhereUsed.push({
133
                        id: whereUsed[i],
134
                        name: associatedChatflow.name
135
                    })
136
                }
137
            }
138
            return updatedWhereUsed
139
        }
140
        return []
141
    } catch (error) {
142
        throw new InternalFlowiseError(
143
            StatusCodes.INTERNAL_SERVER_ERROR,
144
            `Error: documentStoreServices.getUsedChatflowNames - ${getErrorMessage(error)}`
145
        )
146
    }
147
}
148

149
// Get chunks for a specific loader or store
150
const getDocumentStoreFileChunks = async (storeId: string, fileId: string, pageNo: number = 1) => {
151
    try {
152
        const appServer = getRunningExpressApp()
153
        const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
154
            id: storeId
155
        })
156
        if (!entity) {
157
            throw new InternalFlowiseError(
158
                StatusCodes.NOT_FOUND,
159
                `Error: documentStoreServices.getDocumentStoreById - Document store ${storeId} not found`
160
            )
161
        }
162
        const loaders = JSON.parse(entity.loaders)
163

164
        let found: IDocumentStoreLoader | undefined
165
        if (fileId !== 'all') {
166
            found = loaders.find((loader: IDocumentStoreLoader) => loader.id === fileId)
167
            if (!found) {
168
                throw new InternalFlowiseError(
169
                    StatusCodes.NOT_FOUND,
170
                    `Error: documentStoreServices.getDocumentStoreById - Document file ${fileId} not found`
171
                )
172
            }
173
        }
174
        let totalChars = 0
175
        loaders.forEach((loader: IDocumentStoreLoader) => {
176
            totalChars += loader.totalChars
177
        })
178
        if (found) {
179
            found.totalChars = totalChars
180
            found.id = entity.id
181
            found.status = entity.status
182
        }
183
        const PAGE_SIZE = 50
184
        const skip = (pageNo - 1) * PAGE_SIZE
185
        const take = PAGE_SIZE
186
        let whereCondition: any = { docId: fileId }
187
        if (fileId === 'all') {
188
            whereCondition = { storeId: storeId }
189
        }
190
        const count = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).count({
191
            where: whereCondition
192
        })
193
        const chunksWithCount = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).find({
194
            skip,
195
            take,
196
            where: whereCondition,
197
            order: {
198
                chunkNo: 'ASC'
199
            }
200
        })
201

202
        if (!chunksWithCount) {
203
            throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `File ${fileId} not found`)
204
        }
205

206
        const response: IDocumentStoreFileChunkPagedResponse = {
207
            chunks: chunksWithCount,
208
            count: count,
209
            file: found,
210
            currentPage: pageNo,
211
            storeName: entity.name,
212
            description: entity.description
213
        }
214
        return response
215
    } catch (error) {
216
        throw new InternalFlowiseError(
217
            StatusCodes.INTERNAL_SERVER_ERROR,
218
            `Error: documentStoreServices.getDocumentStoreFileChunks - ${getErrorMessage(error)}`
219
        )
220
    }
221
}
222

223
const deleteDocumentStore = async (storeId: string) => {
224
    try {
225
        const appServer = getRunningExpressApp()
226
        // delete all the chunks associated with the store
227
        await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).delete({
228
            storeId: storeId
229
        })
230
        // now delete the files associated with the store
231
        const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
232
            id: storeId
233
        })
234
        if (!entity) throw new Error(`Document store ${storeId} not found`)
235
        await removeFilesFromStorage(DOCUMENT_STORE_BASE_FOLDER, entity.id)
236
        // now delete the store
237
        const tbd = await appServer.AppDataSource.getRepository(DocumentStore).delete({
238
            id: storeId
239
        })
240

241
        return { deleted: tbd.affected }
242
    } catch (error) {
243
        throw new InternalFlowiseError(
244
            StatusCodes.INTERNAL_SERVER_ERROR,
245
            `Error: documentStoreServices.deleteDocumentStore - ${getErrorMessage(error)}`
246
        )
247
    }
248
}
249

250
const deleteDocumentStoreFileChunk = async (storeId: string, docId: string, chunkId: string) => {
251
    try {
252
        const appServer = getRunningExpressApp()
253
        const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
254
            id: storeId
255
        })
256
        if (!entity) {
257
            throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${storeId} not found`)
258
        }
259
        const loaders = JSON.parse(entity.loaders)
260
        const found = loaders.find((ldr: IDocumentStoreLoader) => ldr.id === docId)
261
        if (!found) {
262
            throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store loader ${docId} not found`)
263
        }
264

265
        const tbdChunk = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).findOneBy({
266
            id: chunkId
267
        })
268
        if (!tbdChunk) {
269
            throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document Chunk ${chunkId} not found`)
270
        }
271
        await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).delete(chunkId)
272
        found.totalChunks--
273
        found.totalChars -= tbdChunk.pageContent.length
274
        entity.loaders = JSON.stringify(loaders)
275
        await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
276
        return getDocumentStoreFileChunks(storeId, docId)
277
    } catch (error) {
278
        throw new InternalFlowiseError(
279
            StatusCodes.INTERNAL_SERVER_ERROR,
280
            `Error: documentStoreServices.deleteDocumentStoreFileChunk - ${getErrorMessage(error)}`
281
        )
282
    }
283
}
284

285
const editDocumentStoreFileChunk = async (storeId: string, docId: string, chunkId: string, content: string, metadata: ICommonObject) => {
286
    try {
287
        const appServer = getRunningExpressApp()
288
        const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
289
            id: storeId
290
        })
291
        if (!entity) {
292
            throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${storeId} not found`)
293
        }
294
        const loaders = JSON.parse(entity.loaders)
295
        const found = loaders.find((ldr: IDocumentStoreLoader) => ldr.id === docId)
296
        if (!found) {
297
            throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store loader ${docId} not found`)
298
        }
299

300
        const editChunk = await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).findOneBy({
301
            id: chunkId
302
        })
303
        if (!editChunk) {
304
            throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document Chunk ${chunkId} not found`)
305
        }
306
        found.totalChars -= editChunk.pageContent.length
307
        editChunk.pageContent = content
308
        editChunk.metadata = JSON.stringify(metadata)
309
        found.totalChars += content.length
310
        await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).save(editChunk)
311
        entity.loaders = JSON.stringify(loaders)
312
        await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
313
        return getDocumentStoreFileChunks(storeId, docId)
314
    } catch (error) {
315
        throw new InternalFlowiseError(
316
            StatusCodes.INTERNAL_SERVER_ERROR,
317
            `Error: documentStoreServices.editDocumentStoreFileChunk - ${getErrorMessage(error)}`
318
        )
319
    }
320
}
321

322
// Update documentStore
323
const updateDocumentStore = async (documentStore: DocumentStore, updatedDocumentStore: DocumentStore) => {
324
    try {
325
        const appServer = getRunningExpressApp()
326
        const tmpUpdatedDocumentStore = appServer.AppDataSource.getRepository(DocumentStore).merge(documentStore, updatedDocumentStore)
327
        const dbResponse = await appServer.AppDataSource.getRepository(DocumentStore).save(tmpUpdatedDocumentStore)
328
        return dbResponse
329
    } catch (error) {
330
        throw new InternalFlowiseError(
331
            StatusCodes.INTERNAL_SERVER_ERROR,
332
            `Error: documentStoreServices.updateDocumentStore - ${getErrorMessage(error)}`
333
        )
334
    }
335
}
336

337
const _saveFileToStorage = async (fileBase64: string, entity: DocumentStore) => {
338
    const splitDataURI = fileBase64.split(',')
339
    const filename = splitDataURI.pop()?.split(':')[1] ?? ''
340
    const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
341
    const mimePrefix = splitDataURI.pop()
342
    let mime = ''
343
    if (mimePrefix) {
344
        mime = mimePrefix.split(';')[0].split(':')[1]
345
    }
346
    await addFileToStorage(mime, bf, filename, DOCUMENT_STORE_BASE_FOLDER, entity.id)
347
    return {
348
        id: uuidv4(),
349
        name: filename,
350
        mimePrefix: mime,
351
        size: bf.length,
352
        status: DocumentStoreStatus.NEW,
353
        uploaded: new Date()
354
    }
355
}
356

357
const _splitIntoChunks = async (data: IDocumentStoreLoaderForPreview) => {
358
    try {
359
        const appServer = getRunningExpressApp()
360
        let splitterInstance = null
361
        if (data.splitterConfig && Object.keys(data.splitterConfig).length > 0) {
362
            const nodeInstanceFilePath = appServer.nodesPool.componentNodes[data.splitterId].filePath as string
363
            const nodeModule = await import(nodeInstanceFilePath)
364
            const newNodeInstance = new nodeModule.nodeClass()
365
            let nodeData = {
366
                inputs: { ...data.splitterConfig },
367
                id: 'splitter_0'
368
            }
369
            splitterInstance = await newNodeInstance.init(nodeData)
370
        }
371
        const nodeInstanceFilePath = appServer.nodesPool.componentNodes[data.loaderId].filePath as string
372
        const nodeModule = await import(nodeInstanceFilePath)
373
        // doc loader configs
374
        const nodeData = {
375
            credential: data.credential || undefined,
376
            inputs: { ...data.loaderConfig, textSplitter: splitterInstance },
377
            outputs: { output: 'document' }
378
        }
379
        const options: ICommonObject = {
380
            chatflowid: uuidv4(),
381
            appDataSource: appServer.AppDataSource,
382
            databaseEntities,
383
            logger
384
        }
385
        const docNodeInstance = new nodeModule.nodeClass()
386
        let docs: IDocument[] = await docNodeInstance.init(nodeData, '', options)
387
        return docs
388
    } catch (error) {
389
        throw new InternalFlowiseError(
390
            StatusCodes.INTERNAL_SERVER_ERROR,
391
            `Error: documentStoreServices.splitIntoChunks - ${getErrorMessage(error)}`
392
        )
393
    }
394
}
395

396
const _normalizeFilePaths = async (data: IDocumentStoreLoaderForPreview, entity: DocumentStore | null) => {
397
    const keys = Object.getOwnPropertyNames(data.loaderConfig)
398
    let rehydrated = false
399
    for (let i = 0; i < keys.length; i++) {
400
        const input = data.loaderConfig[keys[i]]
401
        if (!input) {
402
            continue
403
        }
404
        if (typeof input !== 'string') {
405
            continue
406
        }
407
        let documentStoreEntity: DocumentStore | null = entity
408
        if (input.startsWith('FILE-STORAGE::')) {
409
            if (!documentStoreEntity) {
410
                const appServer = getRunningExpressApp()
411
                documentStoreEntity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
412
                    id: data.storeId
413
                })
414
                if (!documentStoreEntity) {
415
                    throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${data.storeId} not found`)
416
                }
417
            }
418
            const fileName = input.replace('FILE-STORAGE::', '')
419
            let files: string[] = []
420
            if (fileName.startsWith('[') && fileName.endsWith(']')) {
421
                files = JSON.parse(fileName)
422
            } else {
423
                files = [fileName]
424
            }
425
            const loaders = JSON.parse(documentStoreEntity.loaders)
426
            const currentLoader = loaders.find((ldr: IDocumentStoreLoader) => ldr.id === data.id)
427
            if (currentLoader) {
428
                const base64Files: string[] = []
429
                for (const file of files) {
430
                    const bf = await getFileFromStorage(file, DOCUMENT_STORE_BASE_FOLDER, documentStoreEntity.id)
431
                    // find the file entry that has the same name as the file
432
                    const uploadedFile = currentLoader.files.find((uFile: IDocumentStoreLoaderFile) => uFile.name === file)
433
                    const mimePrefix = 'data:' + uploadedFile.mimePrefix + ';base64'
434
                    const base64String = mimePrefix + ',' + bf.toString('base64') + `,filename:${file}`
435
                    base64Files.push(base64String)
436
                }
437
                data.loaderConfig[keys[i]] = JSON.stringify(base64Files)
438
                rehydrated = true
439
            }
440
        }
441
    }
442
    data.rehydrated = rehydrated
443
}
444

445
const previewChunks = async (data: IDocumentStoreLoaderForPreview) => {
446
    try {
447
        if (data.preview) {
448
            if (
449
                data.loaderId === 'cheerioWebScraper' ||
450
                data.loaderId === 'puppeteerWebScraper' ||
451
                data.loaderId === 'playwrightWebScraper'
452
            ) {
453
                data.loaderConfig['limit'] = 3
454
            }
455
        }
456
        if (!data.rehydrated) {
457
            await _normalizeFilePaths(data, null)
458
        }
459
        let docs = await _splitIntoChunks(data)
460
        const totalChunks = docs.length
461
        // if -1, return all chunks
462
        if (data.previewChunkCount === -1) data.previewChunkCount = totalChunks
463
        // return all docs if the user ask for more than we have
464
        if (totalChunks <= data.previewChunkCount) data.previewChunkCount = totalChunks
465
        // return only the first n chunks
466
        if (totalChunks > data.previewChunkCount) docs = docs.slice(0, data.previewChunkCount)
467

468
        return { chunks: docs, totalChunks: totalChunks, previewChunkCount: data.previewChunkCount }
469
    } catch (error) {
470
        throw new InternalFlowiseError(
471
            StatusCodes.INTERNAL_SERVER_ERROR,
472
            `Error: documentStoreServices.previewChunks - ${getErrorMessage(error)}`
473
        )
474
    }
475
}
476

477
const processAndSaveChunks = async (data: IDocumentStoreLoaderForPreview) => {
478
    try {
479
        const appServer = getRunningExpressApp()
480
        const entity = await appServer.AppDataSource.getRepository(DocumentStore).findOneBy({
481
            id: data.storeId
482
        })
483
        if (!entity) {
484
            throw new InternalFlowiseError(
485
                StatusCodes.NOT_FOUND,
486
                `Error: documentStoreServices.processAndSaveChunks - Document store ${data.storeId} not found`
487
            )
488
        }
489

490
        const newLoaderId = data.id ?? uuidv4()
491
        const existingLoaders = JSON.parse(entity.loaders)
492
        const found = existingLoaders.find((ldr: IDocumentStoreLoader) => ldr.id === newLoaderId)
493
        if (found) {
494
            // clean up the current status and mark the loader as pending_sync
495
            found.totalChunks = 0
496
            found.totalChars = 0
497
            found.status = DocumentStoreStatus.SYNCING
498
            entity.loaders = JSON.stringify(existingLoaders)
499
        } else {
500
            let loader: IDocumentStoreLoader = {
501
                id: newLoaderId,
502
                loaderId: data.loaderId,
503
                loaderName: data.loaderName,
504
                loaderConfig: data.loaderConfig,
505
                splitterId: data.splitterId,
506
                splitterName: data.splitterName,
507
                splitterConfig: data.splitterConfig,
508
                totalChunks: 0,
509
                totalChars: 0,
510
                status: DocumentStoreStatus.SYNCING
511
            }
512
            if (data.credential) {
513
                loader.credential = data.credential
514
            }
515
            existingLoaders.push(loader)
516
            entity.loaders = JSON.stringify(existingLoaders)
517
        }
518
        await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
519
        // this method will run async, will have to be moved to a worker thread
520
        _saveChunksToStorage(data, entity, newLoaderId).then(() => {})
521
        return getDocumentStoreFileChunks(data.storeId as string, newLoaderId)
522
    } catch (error) {
523
        throw new InternalFlowiseError(
524
            StatusCodes.INTERNAL_SERVER_ERROR,
525
            `Error: documentStoreServices.processAndSaveChunks - ${getErrorMessage(error)}`
526
        )
527
    }
528
}
529

530
const _saveChunksToStorage = async (data: IDocumentStoreLoaderForPreview, entity: DocumentStore, newLoaderId: string) => {
531
    const re = new RegExp('^data.*;base64', 'i')
532

533
    try {
534
        const appServer = getRunningExpressApp()
535
        //step 1: restore the full paths, if any
536
        await _normalizeFilePaths(data, entity)
537
        //step 2: split the file into chunks
538
        previewChunks(data).then(async (response) => {
539
            //{ chunks: docs, totalChunks: totalChunks, previewChunkCount: data.previewChunkCount }
540
            //step 3: remove base64 files and save them to storage, this needs to be rewritten
541
            let filesWithMetadata = []
542
            const keys = Object.getOwnPropertyNames(data.loaderConfig)
543
            for (let i = 0; i < keys.length; i++) {
544
                const input = data.loaderConfig[keys[i]]
545
                if (!input) {
546
                    continue
547
                }
548
                if (typeof input !== 'string') {
549
                    continue
550
                }
551
                if (input.startsWith('[') && input.endsWith(']')) {
552
                    const files = JSON.parse(input)
553
                    const fileNames: string[] = []
554
                    for (let j = 0; j < files.length; j++) {
555
                        const file = files[j]
556
                        if (re.test(file)) {
557
                            const fileMetadata = await _saveFileToStorage(file, entity)
558
                            fileNames.push(fileMetadata.name)
559
                            filesWithMetadata.push(fileMetadata)
560
                        }
561
                    }
562
                    data.loaderConfig[keys[i]] = 'FILE-STORAGE::' + JSON.stringify(fileNames)
563
                } else if (re.test(input)) {
564
                    const fileNames: string[] = []
565
                    const fileMetadata = await _saveFileToStorage(input, entity)
566
                    fileNames.push(fileMetadata.name)
567
                    filesWithMetadata.push(fileMetadata)
568
                    data.loaderConfig[keys[i]] = 'FILE-STORAGE::' + JSON.stringify(fileNames)
569
                    break
570
                }
571
            }
572
            const existingLoaders = JSON.parse(entity.loaders)
573
            const loader = existingLoaders.find((ldr: IDocumentStoreLoader) => ldr.id === newLoaderId)
574
            if (data.id) {
575
                //step 4: remove all files and chunks associated with the previous loader
576
                const index = existingLoaders.indexOf(loader)
577
                if (index > -1) {
578
                    existingLoaders.splice(index, 1)
579
                    if (!data.rehydrated) {
580
                        if (loader.files) {
581
                            loader.files.map(async (file: IDocumentStoreLoaderFile) => {
582
                                await removeSpecificFileFromStorage(DOCUMENT_STORE_BASE_FOLDER, entity.id, file.name)
583
                            })
584
                        }
585
                    }
586
                }
587
            }
588
            //step 5: upload with the new files and loaderConfig
589
            if (filesWithMetadata.length > 0) {
590
                loader.loaderConfig = data.loaderConfig
591
                loader.files = filesWithMetadata
592
            }
593
            //step 6: update the loaders with the new loaderConfig
594
            if (data.id) {
595
                existingLoaders.push(loader)
596
            }
597
            //step 7: remove all previous chunks
598
            await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).delete({ docId: newLoaderId })
599
            if (response.chunks) {
600
                //step 8: now save the new chunks
601
                const totalChars = response.chunks.reduce((acc: number, chunk) => acc + chunk.pageContent.length, 0)
602
                response.chunks.map(async (chunk: IDocument, index: number) => {
603
                    const docChunk: DocumentStoreFileChunk = {
604
                        docId: newLoaderId,
605
                        storeId: data.storeId || '',
606
                        id: uuidv4(),
607
                        chunkNo: index + 1,
608
                        pageContent: chunk.pageContent,
609
                        metadata: JSON.stringify(chunk.metadata)
610
                    }
611
                    const dChunk = appServer.AppDataSource.getRepository(DocumentStoreFileChunk).create(docChunk)
612
                    await appServer.AppDataSource.getRepository(DocumentStoreFileChunk).save(dChunk)
613
                })
614
                // update the loader with the new metrics
615
                loader.totalChunks = response.totalChunks
616
                loader.totalChars = totalChars
617
            }
618
            loader.status = 'SYNC'
619
            // have a flag and iterate over the loaders and update the entity status to SYNC
620
            const allSynced = existingLoaders.every((ldr: IDocumentStoreLoader) => ldr.status === 'SYNC')
621
            entity.status = allSynced ? DocumentStoreStatus.SYNC : DocumentStoreStatus.STALE
622
            entity.loaders = JSON.stringify(existingLoaders)
623
            //step 9: update the entity in the database
624
            await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
625
            return
626
        })
627
    } catch (error) {
628
        throw new InternalFlowiseError(
629
            StatusCodes.INTERNAL_SERVER_ERROR,
630
            `Error: documentStoreServices._saveChunksToStorage - ${getErrorMessage(error)}`
631
        )
632
    }
633
}
634

635
// Get all component nodes
636
const getDocumentLoaders = async () => {
637
    const removeDocumentLoadersWithName = ['documentStore', 'vectorStoreToDocument', 'unstructuredFolderLoader', 'folderFiles']
638

639
    try {
640
        const dbResponse = await nodesService.getAllNodesForCategory('Document Loaders')
641
        return dbResponse.filter((node) => !removeDocumentLoadersWithName.includes(node.name))
642
    } catch (error) {
643
        throw new InternalFlowiseError(
644
            StatusCodes.INTERNAL_SERVER_ERROR,
645
            `Error: documentStoreServices.getDocumentLoaders - ${getErrorMessage(error)}`
646
        )
647
    }
648
}
649

650
const updateDocumentStoreUsage = async (chatId: string, storeId: string | undefined) => {
651
    try {
652
        // find the document store
653
        const appServer = getRunningExpressApp()
654
        // find all entities that have the chatId in their whereUsed
655
        const entities = await appServer.AppDataSource.getRepository(DocumentStore).find()
656
        entities.map(async (entity: DocumentStore) => {
657
            const whereUsed = JSON.parse(entity.whereUsed)
658
            const found = whereUsed.find((w: string) => w === chatId)
659
            if (found) {
660
                if (!storeId) {
661
                    // remove the chatId from the whereUsed, as the store is being deleted
662
                    const index = whereUsed.indexOf(chatId)
663
                    if (index > -1) {
664
                        whereUsed.splice(index, 1)
665
                        entity.whereUsed = JSON.stringify(whereUsed)
666
                        await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
667
                    }
668
                } else if (entity.id === storeId) {
669
                    // do nothing, already found and updated
670
                } else if (entity.id !== storeId) {
671
                    // remove the chatId from the whereUsed, as a new store is being used
672
                    const index = whereUsed.indexOf(chatId)
673
                    if (index > -1) {
674
                        whereUsed.splice(index, 1)
675
                        entity.whereUsed = JSON.stringify(whereUsed)
676
                        await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
677
                    }
678
                }
679
            } else {
680
                if (entity.id === storeId) {
681
                    // add the chatId to the whereUsed
682
                    whereUsed.push(chatId)
683
                    entity.whereUsed = JSON.stringify(whereUsed)
684
                    await appServer.AppDataSource.getRepository(DocumentStore).save(entity)
685
                }
686
            }
687
        })
688
    } catch (error) {
689
        throw new InternalFlowiseError(
690
            StatusCodes.INTERNAL_SERVER_ERROR,
691
            `Error: documentStoreServices.updateDocumentStoreUsage - ${getErrorMessage(error)}`
692
        )
693
    }
694
}
695

696
export default {
697
    updateDocumentStoreUsage,
698
    deleteDocumentStore,
699
    createDocumentStore,
700
    deleteLoaderFromDocumentStore,
701
    getAllDocumentStores,
702
    getDocumentStoreById,
703
    getUsedChatflowNames,
704
    getDocumentStoreFileChunks,
705
    updateDocumentStore,
706
    previewChunks,
707
    processAndSaveChunks,
708
    deleteDocumentStoreFileChunk,
709
    editDocumentStoreFileChunk,
710
    getDocumentLoaders
711
}
712

Использование cookies

Мы используем файлы cookie в соответствии с Политикой конфиденциальности и Политикой использования cookies.

Нажимая кнопку «Принимаю», Вы даете АО «СберТех» согласие на обработку Ваших персональных данных в целях совершенствования нашего веб-сайта и Сервиса GitVerse, а также повышения удобства их использования.

Запретить использование cookies Вы можете самостоятельно в настройках Вашего браузера.