187 lines
7.2 KiB
JavaScript
187 lines
7.2 KiB
JavaScript
"use strict";
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.AudioSubtitleLoader = exports.AudioTranscriptSentencesLoader = exports.AudioTranscriptParagraphsLoader = exports.AudioTranscriptLoader = void 0;
|
|
const assemblyai_1 = require("assemblyai");
|
|
const documents_1 = require("@langchain/core/documents");
|
|
const env_1 = require("@langchain/core/utils/env");
|
|
const base_1 = require("@langchain/core/document_loaders/base");
|
|
const defaultOptions = {
|
|
userAgent: {
|
|
integration: { name: "LangChainJS", version: "1.0.1" },
|
|
},
|
|
};
|
|
/**
|
|
* Base class for AssemblyAI loaders.
|
|
*/
|
|
class AssemblyAILoader extends base_1.BaseDocumentLoader {
|
|
/**
|
|
* Create a new AssemblyAI loader.
|
|
* @param assemblyAIOptions The options to configure the AssemblyAI loader.
|
|
* Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable.
|
|
*/
|
|
constructor(assemblyAIOptions) {
|
|
super();
|
|
Object.defineProperty(this, "client", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
let options = assemblyAIOptions;
|
|
if (!options) {
|
|
options = {};
|
|
}
|
|
if (!options.apiKey) {
|
|
options.apiKey = (0, env_1.getEnvironmentVariable)("ASSEMBLYAI_API_KEY");
|
|
}
|
|
if (!options.apiKey) {
|
|
throw new Error("No AssemblyAI API key provided");
|
|
}
|
|
this.client = new assemblyai_1.AssemblyAI({
|
|
...defaultOptions,
|
|
...options,
|
|
});
|
|
}
|
|
}
|
|
class CreateTranscriptLoader extends AssemblyAILoader {
|
|
/**
|
|
* Transcribe audio or retrieve an existing transcript by its ID.
|
|
* @param params The parameters to transcribe audio, or the ID of the transcript to retrieve.
|
|
* @param assemblyAIOptions The options to configure the AssemblyAI loader.
|
|
* Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable.
|
|
*/
|
|
constructor(params, assemblyAIOptions) {
|
|
super(assemblyAIOptions);
|
|
Object.defineProperty(this, "transcribeParams", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "transcriptId", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
if (typeof params === "string") {
|
|
this.transcriptId = params;
|
|
}
|
|
else {
|
|
this.transcribeParams = params;
|
|
}
|
|
}
|
|
async transcribeOrGetTranscript() {
|
|
if (this.transcriptId) {
|
|
return await this.client.transcripts.get(this.transcriptId);
|
|
}
|
|
if (this.transcribeParams) {
|
|
let transcribeParams;
|
|
if ("audio_url" in this.transcribeParams) {
|
|
transcribeParams = {
|
|
...this.transcribeParams,
|
|
audio: this.transcribeParams.audio_url,
|
|
};
|
|
}
|
|
else {
|
|
transcribeParams = this.transcribeParams;
|
|
}
|
|
return await this.client.transcripts.transcribe(transcribeParams);
|
|
}
|
|
else {
|
|
throw new Error("No transcript ID or transcribe parameters provided");
|
|
}
|
|
}
|
|
}
|
|
/**
|
|
* Transcribe audio and load the transcript as a document using AssemblyAI.
|
|
*/
|
|
class AudioTranscriptLoader extends CreateTranscriptLoader {
|
|
/**
|
|
* Transcribe audio and load the transcript as a document using AssemblyAI.
|
|
* @returns A promise that resolves to a single document containing the transcript text
|
|
* as the page content, and the transcript object as the metadata.
|
|
*/
|
|
async load() {
|
|
const transcript = await this.transcribeOrGetTranscript();
|
|
return [
|
|
new documents_1.Document({
|
|
pageContent: transcript.text,
|
|
metadata: transcript,
|
|
}),
|
|
];
|
|
}
|
|
}
|
|
exports.AudioTranscriptLoader = AudioTranscriptLoader;
|
|
/**
|
|
* Transcribe audio and load the paragraphs of the transcript, creating a document for each paragraph.
|
|
*/
|
|
class AudioTranscriptParagraphsLoader extends CreateTranscriptLoader {
|
|
/**
|
|
* Transcribe audio and load the paragraphs of the transcript, creating a document for each paragraph.
|
|
* @returns A promise that resolves to an array of documents, each containing a paragraph of the transcript.
|
|
*/
|
|
async load() {
|
|
const transcript = await this.transcribeOrGetTranscript();
|
|
const paragraphsResponse = await this.client.transcripts.paragraphs(transcript.id);
|
|
return paragraphsResponse.paragraphs.map((p) => new documents_1.Document({
|
|
pageContent: p.text,
|
|
metadata: p,
|
|
}));
|
|
}
|
|
}
|
|
exports.AudioTranscriptParagraphsLoader = AudioTranscriptParagraphsLoader;
|
|
/**
|
|
* Transcribe audio and load the sentences of the transcript, creating a document for each sentence.
|
|
*/
|
|
class AudioTranscriptSentencesLoader extends CreateTranscriptLoader {
|
|
/**
|
|
* Transcribe audio and load the sentences of the transcript, creating a document for each sentence.
|
|
* @returns A promise that resolves to an array of documents, each containing a sentence of the transcript.
|
|
*/
|
|
async load() {
|
|
const transcript = await this.transcribeOrGetTranscript();
|
|
const sentencesResponse = await this.client.transcripts.sentences(transcript.id);
|
|
return sentencesResponse.sentences.map((p) => new documents_1.Document({
|
|
pageContent: p.text,
|
|
metadata: p,
|
|
}));
|
|
}
|
|
}
|
|
exports.AudioTranscriptSentencesLoader = AudioTranscriptSentencesLoader;
|
|
/**
|
|
* Transcribe audio and load subtitles for the transcript as `srt` or `vtt` format.
|
|
*/
|
|
class AudioSubtitleLoader extends CreateTranscriptLoader {
|
|
/**
|
|
* Create a new AudioSubtitleLoader.
|
|
* @param params The parameters to transcribe audio, or the ID of the transcript to retrieve.
|
|
* @param subtitleFormat The format of the subtitles, either `srt` or `vtt`.
|
|
* @param assemblyAIOptions The options to configure the AssemblyAI loader.
|
|
* Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable.
|
|
*/
|
|
constructor(params, subtitleFormat = "srt", assemblyAIOptions) {
|
|
super(params, assemblyAIOptions);
|
|
Object.defineProperty(this, "subtitleFormat", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: subtitleFormat
|
|
});
|
|
this.subtitleFormat = subtitleFormat;
|
|
}
|
|
/**
|
|
* Transcribe audio and load subtitles for the transcript as `srt` or `vtt` format.
|
|
* @returns A promise that resolves a document containing the subtitles as the page content.
|
|
*/
|
|
async load() {
|
|
const transcript = await this.transcribeOrGetTranscript();
|
|
const subtitles = await this.client.transcripts.subtitles(transcript.id, this.subtitleFormat);
|
|
return [
|
|
new documents_1.Document({
|
|
pageContent: subtitles,
|
|
}),
|
|
];
|
|
}
|
|
}
|
|
exports.AudioSubtitleLoader = AudioSubtitleLoader;
|