agsamantha/node_modules/@langchain/community/dist/document_loaders/fs/docx.js

import { Document } from "@langchain/core/documents";
import { BufferLoader } from "langchain/document_loaders/fs/buffer";
/**
 * A class that extends the `BufferLoader` class. It represents a document
 * loader that loads documents from DOCX files.
 */
export class DocxLoader extends BufferLoader {
    constructor(filePathOrBlob) {
        super(filePathOrBlob);
    }
    /**
     * A method that takes a `raw` buffer and `metadata` as parameters and
     * returns a promise that resolves to an array of `Document` instances. It
     * uses the `extractRawText` function from the `mammoth` module to extract
     * the raw text content from the buffer. If the extracted text content is
     * empty, it returns an empty array. Otherwise, it creates a new
     * `Document` instance with the extracted text content and the provided
     * metadata, and returns it as an array.
     * @param raw The raw buffer from which to extract text content.
     * @param metadata The metadata to be associated with the created `Document` instance.
     * @returns A promise that resolves to an array of `Document` instances.
     */
    async parse(raw, metadata) {
        const { extractRawText } = await DocxLoaderImports();
        const docx = await extractRawText({
            buffer: raw,
        });
        if (!docx.value)
            return [];
        return [
            new Document({
                pageContent: docx.value,
                metadata,
            }),
        ];
    }
}
async function DocxLoaderImports() {
    try {
        const { extractRawText } = await import("mammoth");
        return { extractRawText };
    }
    catch (e) {
        console.error(e);
        throw new Error("Failed to load mammoth. Please install it with eg. `npm install mammoth`.");
    }
}
update readme+reqs 2024-10-02 15:15:21 -05:00			`import { Document } from "@langchain/core/documents";`
			`import { BufferLoader } from "langchain/document_loaders/fs/buffer";`
			`/**`
			* A class that extends the `BufferLoader` class. It represents a document
			`* loader that loads documents from DOCX files.`
			`*/`
			`export class DocxLoader extends BufferLoader {`
			`constructor(filePathOrBlob) {`
			`super(filePathOrBlob);`
			`}`
			`/**`
			* A method that takes a `raw` buffer and `metadata` as parameters and
			* returns a promise that resolves to an array of `Document` instances. It
			* uses the `extractRawText` function from the `mammoth` module to extract
			`* the raw text content from the buffer. If the extracted text content is`
			`* empty, it returns an empty array. Otherwise, it creates a new`
			* `Document` instance with the extracted text content and the provided
			`* metadata, and returns it as an array.`
			`* @param raw The raw buffer from which to extract text content.`
			* @param metadata The metadata to be associated with the created `Document` instance.
			* @returns A promise that resolves to an array of `Document` instances.
			`*/`
			`async parse(raw, metadata) {`
			`const { extractRawText } = await DocxLoaderImports();`
			`const docx = await extractRawText({`
			`buffer: raw,`
			`});`
			`if (!docx.value)`
			`return [];`
			`return [`
			`new Document({`
			`pageContent: docx.value,`
			`metadata,`
			`}),`
			`];`
			`}`
			`}`
			`async function DocxLoaderImports() {`
			`try {`
			`const { extractRawText } = await import("mammoth");`
			`return { extractRawText };`
			`}`
			`catch (e) {`
			`console.error(e);`
			throw new Error("Failed to load mammoth. Please install it with eg. `npm install mammoth`.");
			`}`
			`}`