43 lines
2 KiB
TypeScript
43 lines
2 KiB
TypeScript
import { LlamaModel, LlamaContext, LlamaChatSession, LlamaJsonSchemaGrammar, LlamaGrammar, GbnfJsonSchema } from "node-llama-cpp";
|
|
import { LLM, type BaseLLMCallOptions, type BaseLLMParams } from "@langchain/core/language_models/llms";
|
|
import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager";
|
|
import { GenerationChunk } from "@langchain/core/outputs";
|
|
import { LlamaBaseCppInputs } from "../utils/llama_cpp.js";
|
|
/**
|
|
* Note that the modelPath is the only required parameter. For testing you
|
|
* can set this in the environment variable `LLAMA_PATH`.
|
|
*/
|
|
export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseLLMParams {
|
|
}
|
|
export interface LlamaCppCallOptions extends BaseLLMCallOptions {
|
|
/** The maximum number of tokens the response should contain. */
|
|
maxTokens?: number;
|
|
/** A function called when matching the provided token array */
|
|
onToken?: (tokens: number[]) => void;
|
|
}
|
|
/**
|
|
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
* version supported in version 2.0.0.
|
|
* This also requires that have a locally built version of Llama2 installed.
|
|
*/
|
|
export declare class LlamaCpp extends LLM<LlamaCppCallOptions> {
|
|
lc_serializable: boolean;
|
|
static inputs: LlamaCppInputs;
|
|
maxTokens?: number;
|
|
temperature?: number;
|
|
topK?: number;
|
|
topP?: number;
|
|
trimWhitespaceSuffix?: boolean;
|
|
_model: LlamaModel;
|
|
_context: LlamaContext;
|
|
_session: LlamaChatSession;
|
|
_jsonSchema: LlamaJsonSchemaGrammar<GbnfJsonSchema> | undefined;
|
|
_gbnf: LlamaGrammar | undefined;
|
|
static lc_name(): string;
|
|
constructor(inputs: LlamaCppInputs);
|
|
_llmType(): string;
|
|
/** @ignore */
|
|
_call(prompt: string, options?: this["ParsedCallOptions"]): Promise<string>;
|
|
_streamResponseChunks(prompt: string, _options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): AsyncGenerator<GenerationChunk>;
|
|
}
|