agsamantha/node_modules/@langchain/community/dist/document_loaders/fs/pptx.cjs
2024-10-02 15:15:21 -05:00

39 lines
1.6 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.PPTXLoader = void 0;
const officeparser_1 = require("officeparser");
const documents_1 = require("@langchain/core/documents");
const buffer_1 = require("langchain/document_loaders/fs/buffer");
/**
* A class that extends the `BufferLoader` class. It represents a document
* loader that loads documents from PDF files.
*/
class PPTXLoader extends buffer_1.BufferLoader {
constructor(filePathOrBlob) {
super(filePathOrBlob);
}
/**
* A method that takes a `raw` buffer and `metadata` as parameters and
* returns a promise that resolves to an array of `Document` instances. It
* uses the `parseOfficeAsync` function from the `officeparser` module to extract
* the raw text content from the buffer. If the extracted powerpoint content is
* empty, it returns an empty array. Otherwise, it creates a new
* `Document` instance with the extracted powerpoint content and the provided
* metadata, and returns it as an array.
* @param raw The buffer to be parsed.
* @param metadata The metadata of the document.
* @returns A promise that resolves to an array of `Document` instances.
*/
async parse(raw, metadata) {
const pptx = await (0, officeparser_1.parseOfficeAsync)(raw, { outputErrorToConsole: true });
if (!pptx)
return [];
return [
new documents_1.Document({
pageContent: pptx,
metadata,
}),
];
}
}
exports.PPTXLoader = PPTXLoader;