agsamantha/node_modules/@langchain/community/dist/document_loaders/web/assemblyai.js

181 lines
6.7 KiB
JavaScript
Raw Normal View History

2024-10-02 15:15:21 -05:00
import { AssemblyAI, } from "assemblyai";
import { Document } from "@langchain/core/documents";
import { getEnvironmentVariable } from "@langchain/core/utils/env";
import { BaseDocumentLoader } from "@langchain/core/document_loaders/base";
const defaultOptions = {
userAgent: {
integration: { name: "LangChainJS", version: "1.0.1" },
},
};
/**
* Base class for AssemblyAI loaders.
*/
class AssemblyAILoader extends BaseDocumentLoader {
/**
* Create a new AssemblyAI loader.
* @param assemblyAIOptions The options to configure the AssemblyAI loader.
* Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable.
*/
constructor(assemblyAIOptions) {
super();
Object.defineProperty(this, "client", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
let options = assemblyAIOptions;
if (!options) {
options = {};
}
if (!options.apiKey) {
options.apiKey = getEnvironmentVariable("ASSEMBLYAI_API_KEY");
}
if (!options.apiKey) {
throw new Error("No AssemblyAI API key provided");
}
this.client = new AssemblyAI({
...defaultOptions,
...options,
});
}
}
class CreateTranscriptLoader extends AssemblyAILoader {
/**
* Transcribe audio or retrieve an existing transcript by its ID.
* @param params The parameters to transcribe audio, or the ID of the transcript to retrieve.
* @param assemblyAIOptions The options to configure the AssemblyAI loader.
* Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable.
*/
constructor(params, assemblyAIOptions) {
super(assemblyAIOptions);
Object.defineProperty(this, "transcribeParams", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "transcriptId", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
if (typeof params === "string") {
this.transcriptId = params;
}
else {
this.transcribeParams = params;
}
}
async transcribeOrGetTranscript() {
if (this.transcriptId) {
return await this.client.transcripts.get(this.transcriptId);
}
if (this.transcribeParams) {
let transcribeParams;
if ("audio_url" in this.transcribeParams) {
transcribeParams = {
...this.transcribeParams,
audio: this.transcribeParams.audio_url,
};
}
else {
transcribeParams = this.transcribeParams;
}
return await this.client.transcripts.transcribe(transcribeParams);
}
else {
throw new Error("No transcript ID or transcribe parameters provided");
}
}
}
/**
* Transcribe audio and load the transcript as a document using AssemblyAI.
*/
export class AudioTranscriptLoader extends CreateTranscriptLoader {
/**
* Transcribe audio and load the transcript as a document using AssemblyAI.
* @returns A promise that resolves to a single document containing the transcript text
* as the page content, and the transcript object as the metadata.
*/
async load() {
const transcript = await this.transcribeOrGetTranscript();
return [
new Document({
pageContent: transcript.text,
metadata: transcript,
}),
];
}
}
/**
* Transcribe audio and load the paragraphs of the transcript, creating a document for each paragraph.
*/
export class AudioTranscriptParagraphsLoader extends CreateTranscriptLoader {
/**
* Transcribe audio and load the paragraphs of the transcript, creating a document for each paragraph.
* @returns A promise that resolves to an array of documents, each containing a paragraph of the transcript.
*/
async load() {
const transcript = await this.transcribeOrGetTranscript();
const paragraphsResponse = await this.client.transcripts.paragraphs(transcript.id);
return paragraphsResponse.paragraphs.map((p) => new Document({
pageContent: p.text,
metadata: p,
}));
}
}
/**
* Transcribe audio and load the sentences of the transcript, creating a document for each sentence.
*/
export class AudioTranscriptSentencesLoader extends CreateTranscriptLoader {
/**
* Transcribe audio and load the sentences of the transcript, creating a document for each sentence.
* @returns A promise that resolves to an array of documents, each containing a sentence of the transcript.
*/
async load() {
const transcript = await this.transcribeOrGetTranscript();
const sentencesResponse = await this.client.transcripts.sentences(transcript.id);
return sentencesResponse.sentences.map((p) => new Document({
pageContent: p.text,
metadata: p,
}));
}
}
/**
* Transcribe audio and load subtitles for the transcript as `srt` or `vtt` format.
*/
export class AudioSubtitleLoader extends CreateTranscriptLoader {
/**
* Create a new AudioSubtitleLoader.
* @param params The parameters to transcribe audio, or the ID of the transcript to retrieve.
* @param subtitleFormat The format of the subtitles, either `srt` or `vtt`.
* @param assemblyAIOptions The options to configure the AssemblyAI loader.
* Configure the `assemblyAIOptions.apiKey` with your AssemblyAI API key, or configure it as the `ASSEMBLYAI_API_KEY` environment variable.
*/
constructor(params, subtitleFormat = "srt", assemblyAIOptions) {
super(params, assemblyAIOptions);
Object.defineProperty(this, "subtitleFormat", {
enumerable: true,
configurable: true,
writable: true,
value: subtitleFormat
});
this.subtitleFormat = subtitleFormat;
}
/**
* Transcribe audio and load subtitles for the transcript as `srt` or `vtt` format.
* @returns A promise that resolves a document containing the subtitles as the page content.
*/
async load() {
const transcript = await this.transcribeOrGetTranscript();
const subtitles = await this.client.transcripts.subtitles(transcript.id, this.subtitleFormat);
return [
new Document({
pageContent: subtitles,
}),
];
}
}