agsamantha/node_modules/@langchain/community/dist/document_loaders/fs/srt.cjs

51 lines
2 KiB
JavaScript
Raw Normal View History

2024-10-02 20:15:21 +00:00
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.SRTLoader = void 0;
const srt_parser_2_1 = __importDefault(require("srt-parser-2"));
const text_1 = require("langchain/document_loaders/fs/text");
/**
* A class that extends the `TextLoader` class. It represents a document
* loader that loads documents from SRT (SubRip) subtitle files. It has a
* constructor that takes a `filePathOrBlob` parameter representing the
* path to the SRT file or a `Blob` object. The `parse()` method is
* implemented to parse the SRT file and extract the text content of each
* subtitle.
* @example
* ```typescript
* const loader = new SRTLoader("path/to/file.srt");
* const docs = await loader.load();
* console.log({ docs });
* ```
*/
class SRTLoader extends text_1.TextLoader {
constructor(filePathOrBlob) {
super(filePathOrBlob);
}
/**
* A protected method that takes a `raw` string as a parameter and returns
* a promise that resolves to an array of strings. It parses the raw SRT
* string using the `SRTParser2` class from the `srt-parser-2` module. It
* retrieves the subtitle objects from the parsed SRT data and extracts
* the text content from each subtitle object. It filters out any empty
* text content and joins the non-empty text content with a space
* separator.
* @param raw The raw SRT string to be parsed.
* @returns A promise that resolves to an array of strings representing the text content of each subtitle.
*/
async parse(raw) {
// eslint-disable-next-line new-cap
const parser = new srt_parser_2_1.default();
const srts = parser.fromSrt(raw);
return [
srts
.map((srt) => srt.text)
.filter(Boolean)
.join(" "),
];
}
}
exports.SRTLoader = SRTLoader;