agsamantha/node_modules/@langchain/community/dist/document_loaders/web/assemblyai.cjs
2024-10-02 15:15:21 -05:00

187 lines
7.2 KiB
JavaScript

"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.AudioSubtitleLoader = exports.AudioTranscriptSentencesLoader = exports.AudioTranscriptParagraphsLoader = exports.AudioTranscriptLoader = void 0;
const assemblyai_1 = require("assemblyai");
const documents_1 = require("@langchain/core/documents");
const env_1 = require("@langchain/core/utils/env");
const base_1 = require("@langchain/core/document_loaders/base");
const defaultOptions = {
userAgent: {
integration: { name: "LangChainJS", version: "1.0.1" },
},
};
/**
* Base class for AssemblyAI loaders.
*/
class AssemblyAILoader extends base_1.BaseDocumentLoader {
/**
* Create a new AssemblyAI loader.
* @param assemblyAIOptions The options to configure the AssemblyAI loader.
* Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable.
*/
constructor(assemblyAIOptions) {
super();
Object.defineProperty(this, "client", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
let options = assemblyAIOptions;
if (!options) {
options = {};
}
if (!options.apiKey) {
options.apiKey = (0, env_1.getEnvironmentVariable)("ASSEMBLYAI_API_KEY");
}
if (!options.apiKey) {
throw new Error("No AssemblyAI API key provided");
}
this.client = new assemblyai_1.AssemblyAI({
...defaultOptions,
...options,
});
}
}
class CreateTranscriptLoader extends AssemblyAILoader {
/**
* Transcribe audio or retrieve an existing transcript by its ID.
* @param params The parameters to transcribe audio, or the ID of the transcript to retrieve.
* @param assemblyAIOptions The options to configure the AssemblyAI loader.
* Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable.
*/
constructor(params, assemblyAIOptions) {
super(assemblyAIOptions);
Object.defineProperty(this, "transcribeParams", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "transcriptId", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
if (typeof params === "string") {
this.transcriptId = params;
}
else {
this.transcribeParams = params;
}
}
async transcribeOrGetTranscript() {
if (this.transcriptId) {
return await this.client.transcripts.get(this.transcriptId);
}
if (this.transcribeParams) {
let transcribeParams;
if ("audio_url" in this.transcribeParams) {
transcribeParams = {
...this.transcribeParams,
audio: this.transcribeParams.audio_url,
};
}
else {
transcribeParams = this.transcribeParams;
}
return await this.client.transcripts.transcribe(transcribeParams);
}
else {
throw new Error("No transcript ID or transcribe parameters provided");
}
}
}
/**
* Transcribe audio and load the transcript as a document using AssemblyAI.
*/
class AudioTranscriptLoader extends CreateTranscriptLoader {
/**
* Transcribe audio and load the transcript as a document using AssemblyAI.
* @returns A promise that resolves to a single document containing the transcript text
* as the page content, and the transcript object as the metadata.
*/
async load() {
const transcript = await this.transcribeOrGetTranscript();
return [
new documents_1.Document({
pageContent: transcript.text,
metadata: transcript,
}),
];
}
}
exports.AudioTranscriptLoader = AudioTranscriptLoader;
/**
* Transcribe audio and load the paragraphs of the transcript, creating a document for each paragraph.
*/
class AudioTranscriptParagraphsLoader extends CreateTranscriptLoader {
/**
* Transcribe audio and load the paragraphs of the transcript, creating a document for each paragraph.
* @returns A promise that resolves to an array of documents, each containing a paragraph of the transcript.
*/
async load() {
const transcript = await this.transcribeOrGetTranscript();
const paragraphsResponse = await this.client.transcripts.paragraphs(transcript.id);
return paragraphsResponse.paragraphs.map((p) => new documents_1.Document({
pageContent: p.text,
metadata: p,
}));
}
}
exports.AudioTranscriptParagraphsLoader = AudioTranscriptParagraphsLoader;
/**
* Transcribe audio and load the sentences of the transcript, creating a document for each sentence.
*/
class AudioTranscriptSentencesLoader extends CreateTranscriptLoader {
/**
* Transcribe audio and load the sentences of the transcript, creating a document for each sentence.
* @returns A promise that resolves to an array of documents, each containing a sentence of the transcript.
*/
async load() {
const transcript = await this.transcribeOrGetTranscript();
const sentencesResponse = await this.client.transcripts.sentences(transcript.id);
return sentencesResponse.sentences.map((p) => new documents_1.Document({
pageContent: p.text,
metadata: p,
}));
}
}
exports.AudioTranscriptSentencesLoader = AudioTranscriptSentencesLoader;
/**
* Transcribe audio and load subtitles for the transcript as `srt` or `vtt` format.
*/
class AudioSubtitleLoader extends CreateTranscriptLoader {
/**
* Create a new AudioSubtitleLoader.
* @param params The parameters to transcribe audio, or the ID of the transcript to retrieve.
* @param subtitleFormat The format of the subtitles, either `srt` or `vtt`.
* @param assemblyAIOptions The options to configure the AssemblyAI loader.
* Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable.
*/
constructor(params, subtitleFormat = "srt", assemblyAIOptions) {
super(params, assemblyAIOptions);
Object.defineProperty(this, "subtitleFormat", {
enumerable: true,
configurable: true,
writable: true,
value: subtitleFormat
});
this.subtitleFormat = subtitleFormat;
}
/**
* Transcribe audio and load subtitles for the transcript as `srt` or `vtt` format.
* @returns A promise that resolves a document containing the subtitles as the page content.
*/
async load() {
const transcript = await this.transcribeOrGetTranscript();
const subtitles = await this.client.transcripts.subtitles(transcript.id, this.subtitleFormat);
return [
new documents_1.Document({
pageContent: subtitles,
}),
];
}
}
exports.AudioSubtitleLoader = AudioSubtitleLoader;