agsamantha/node_modules/@langchain/community/dist/document_loaders/fs/pptx.js

import { parseOfficeAsync } from "officeparser";
import { Document } from "@langchain/core/documents";
import { BufferLoader } from "langchain/document_loaders/fs/buffer";
/**
 * A class that extends the `BufferLoader` class. It represents a document
 * loader that loads documents from PDF files.
 */
export class PPTXLoader extends BufferLoader {
    constructor(filePathOrBlob) {
        super(filePathOrBlob);
    }
    /**
     * A method that takes a `raw` buffer and `metadata` as parameters and
     * returns a promise that resolves to an array of `Document` instances. It
     * uses the `parseOfficeAsync` function from the `officeparser` module to extract
     * the raw text content from the buffer. If the extracted powerpoint content is
     * empty, it returns an empty array. Otherwise, it creates a new
     * `Document` instance with the extracted powerpoint content and the provided
     * metadata, and returns it as an array.
     * @param raw The buffer to be parsed.
     * @param metadata The metadata of the document.
     * @returns A promise that resolves to an array of `Document` instances.
     */
    async parse(raw, metadata) {
        const pptx = await parseOfficeAsync(raw, { outputErrorToConsole: true });
        if (!pptx)
            return [];
        return [
            new Document({
                pageContent: pptx,
                metadata,
            }),
        ];
    }
}