agsamantha/node_modules/@langchain/community/dist/document_loaders/fs/pptx.cjs

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.PPTXLoader = void 0;
const officeparser_1 = require("officeparser");
const documents_1 = require("@langchain/core/documents");
const buffer_1 = require("langchain/document_loaders/fs/buffer");
/**
 * A class that extends the `BufferLoader` class. It represents a document
 * loader that loads documents from PDF files.
 */
class PPTXLoader extends buffer_1.BufferLoader {
    constructor(filePathOrBlob) {
        super(filePathOrBlob);
    }
    /**
     * A method that takes a `raw` buffer and `metadata` as parameters and
     * returns a promise that resolves to an array of `Document` instances. It
     * uses the `parseOfficeAsync` function from the `officeparser` module to extract
     * the raw text content from the buffer. If the extracted powerpoint content is
     * empty, it returns an empty array. Otherwise, it creates a new
     * `Document` instance with the extracted powerpoint content and the provided
     * metadata, and returns it as an array.
     * @param raw The buffer to be parsed.
     * @param metadata The metadata of the document.
     * @returns A promise that resolves to an array of `Document` instances.
     */
    async parse(raw, metadata) {
        const pptx = await (0, officeparser_1.parseOfficeAsync)(raw, { outputErrorToConsole: true });
        if (!pptx)
            return [];
        return [
            new documents_1.Document({
                pageContent: pptx,
                metadata,
            }),
        ];
    }
}
exports.PPTXLoader = PPTXLoader;