52 lines
1.9 KiB
JavaScript
52 lines
1.9 KiB
JavaScript
|
"use strict";
|
||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||
|
exports.DocxLoader = void 0;
|
||
|
const documents_1 = require("@langchain/core/documents");
|
||
|
const buffer_1 = require("langchain/document_loaders/fs/buffer");
|
||
|
/**
|
||
|
* A class that extends the `BufferLoader` class. It represents a document
|
||
|
* loader that loads documents from DOCX files.
|
||
|
*/
|
||
|
class DocxLoader extends buffer_1.BufferLoader {
|
||
|
constructor(filePathOrBlob) {
|
||
|
super(filePathOrBlob);
|
||
|
}
|
||
|
/**
|
||
|
* A method that takes a `raw` buffer and `metadata` as parameters and
|
||
|
* returns a promise that resolves to an array of `Document` instances. It
|
||
|
* uses the `extractRawText` function from the `mammoth` module to extract
|
||
|
* the raw text content from the buffer. If the extracted text content is
|
||
|
* empty, it returns an empty array. Otherwise, it creates a new
|
||
|
* `Document` instance with the extracted text content and the provided
|
||
|
* metadata, and returns it as an array.
|
||
|
* @param raw The raw buffer from which to extract text content.
|
||
|
* @param metadata The metadata to be associated with the created `Document` instance.
|
||
|
* @returns A promise that resolves to an array of `Document` instances.
|
||
|
*/
|
||
|
async parse(raw, metadata) {
|
||
|
const { extractRawText } = await DocxLoaderImports();
|
||
|
const docx = await extractRawText({
|
||
|
buffer: raw,
|
||
|
});
|
||
|
if (!docx.value)
|
||
|
return [];
|
||
|
return [
|
||
|
new documents_1.Document({
|
||
|
pageContent: docx.value,
|
||
|
metadata,
|
||
|
}),
|
||
|
];
|
||
|
}
|
||
|
}
|
||
|
exports.DocxLoader = DocxLoader;
|
||
|
async function DocxLoaderImports() {
|
||
|
try {
|
||
|
const { extractRawText } = await import("mammoth");
|
||
|
return { extractRawText };
|
||
|
}
|
||
|
catch (e) {
|
||
|
console.error(e);
|
||
|
throw new Error("Failed to load mammoth. Please install it with eg. `npm install mammoth`.");
|
||
|
}
|
||
|
}
|