agsamantha/node_modules/langchain/dist/document_loaders/fs/multi_file.js

95 lines
3.4 KiB
JavaScript
Raw Normal View History

2024-10-02 15:15:21 -05:00
import { extname, resolve } from "node:path";
import { stat } from "node:fs/promises";
import { BaseDocumentLoader } from "../base.js";
import { UnknownHandling } from "./directory.js";
/**
* A document loader that loads documents from multiple files. It extends the
* `BaseDocumentLoader` class and implements the `load()` method.
* @example
* ```typescript
*
* const multiFileLoader = new MultiFileLoader(
* ["path/to/file1.pdf", "path/to/file2.txt"],
* {
* ".pdf": (path: string) => new PDFLoader(path),
* },
* );
*
* const docs = await multiFileLoader.load();
* console.log({ docs });
*
* ```
*/
export class MultiFileLoader extends BaseDocumentLoader {
constructor(filePaths, loaders, unknown = UnknownHandling.Warn) {
super();
Object.defineProperty(this, "filePaths", {
enumerable: true,
configurable: true,
writable: true,
value: filePaths
});
Object.defineProperty(this, "loaders", {
enumerable: true,
configurable: true,
writable: true,
value: loaders
});
Object.defineProperty(this, "unknown", {
enumerable: true,
configurable: true,
writable: true,
value: unknown
});
if (Object.keys(loaders).length === 0) {
throw new Error("Must provide at least one loader");
}
for (const extension in loaders) {
if (Object.hasOwn(loaders, extension)) {
if (extension[0] !== ".") {
throw new Error(`Extension must start with a dot: ${extension}`);
}
}
}
}
/**
* Loads the documents from the provided file paths. It checks if the file
* is a directory and ignores it. If a file is a file, it checks if there
* is a corresponding loader function for the file extension in the `loaders`
* mapping. If there is, it loads the documents. If there is no
* corresponding loader function and `unknown` is set to `Warn`, it logs a
* warning message. If `unknown` is set to `Error`, it throws an error.
* @returns A promise that resolves to an array of loaded documents.
*/
async load() {
const documents = [];
for (const filePath of this.filePaths) {
const fullPath = resolve(filePath);
const fileStat = await stat(fullPath);
if (fileStat.isDirectory()) {
console.warn(`Ignoring directory: ${fullPath}`);
continue;
}
const loaderFactory = this.loaders[extname(fullPath)];
if (loaderFactory) {
const loader = loaderFactory(fullPath);
documents.push(...(await loader.load()));
}
else {
switch (this.unknown) {
case UnknownHandling.Ignore:
break;
case UnknownHandling.Warn:
console.warn(`Unknown file type: ${fullPath}`);
break;
case UnknownHandling.Error:
throw new Error(`Unknown file type: ${fullPath}`);
default:
throw new Error(`Unknown unknown handling: ${this.unknown}`);
}
}
}
return documents;
}
}