72 lines
3 KiB
TypeScript
72 lines
3 KiB
TypeScript
import { LlamaModel, LlamaContext, LlamaChatSession, type ConversationInteraction } from "node-llama-cpp";
|
|
import { SimpleChatModel, type BaseChatModelParams } from "@langchain/core/language_models/chat_models";
|
|
import type { BaseLanguageModelCallOptions } from "@langchain/core/language_models/base";
|
|
import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager";
|
|
import { BaseMessage } from "@langchain/core/messages";
|
|
import { ChatGenerationChunk } from "@langchain/core/outputs";
|
|
import { LlamaBaseCppInputs } from "../utils/llama_cpp.js";
|
|
/**
|
|
* Note that the modelPath is the only required parameter. For testing you
|
|
* can set this in the environment variable `LLAMA_PATH`.
|
|
*/
|
|
export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseChatModelParams {
|
|
}
|
|
export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
|
|
/** The maximum number of tokens the response should contain. */
|
|
maxTokens?: number;
|
|
/** A function called when matching the provided token array */
|
|
onToken?: (tokens: number[]) => void;
|
|
}
|
|
/**
|
|
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
* version supported in version 2.0.0.
|
|
* This also requires that have a locally built version of Llama2 installed.
|
|
* @example
|
|
* ```typescript
|
|
* // Initialize the ChatLlamaCpp model with the path to the model binary file.
|
|
* const model = new ChatLlamaCpp({
|
|
* modelPath: "/Replace/with/path/to/your/model/gguf-llama2-q4_0.bin",
|
|
* temperature: 0.5,
|
|
* });
|
|
*
|
|
* // Call the model with a message and await the response.
|
|
* const response = await model.invoke([
|
|
* new HumanMessage({ content: "My name is John." }),
|
|
* ]);
|
|
*
|
|
* // Log the response to the console.
|
|
* console.log({ response });
|
|
*
|
|
* ```
|
|
*/
|
|
export declare class ChatLlamaCpp extends SimpleChatModel<LlamaCppCallOptions> {
|
|
static inputs: LlamaCppInputs;
|
|
maxTokens?: number;
|
|
temperature?: number;
|
|
topK?: number;
|
|
topP?: number;
|
|
trimWhitespaceSuffix?: boolean;
|
|
_model: LlamaModel;
|
|
_context: LlamaContext;
|
|
_session: LlamaChatSession | null;
|
|
lc_serializable: boolean;
|
|
static lc_name(): string;
|
|
constructor(inputs: LlamaCppInputs);
|
|
_llmType(): string;
|
|
/** @ignore */
|
|
_combineLLMOutput(): {};
|
|
invocationParams(): {
|
|
maxTokens: number | undefined;
|
|
temperature: number | undefined;
|
|
topK: number | undefined;
|
|
topP: number | undefined;
|
|
trimWhitespaceSuffix: boolean | undefined;
|
|
};
|
|
/** @ignore */
|
|
_call(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): Promise<string>;
|
|
_streamResponseChunks(input: BaseMessage[], _options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): AsyncGenerator<ChatGenerationChunk>;
|
|
protected _buildSession(messages: BaseMessage[]): string;
|
|
protected _convertMessagesToInteractions(messages: BaseMessage[]): ConversationInteraction[];
|
|
protected _buildPrompt(input: BaseMessage[]): string;
|
|
}
|