import { LlamaModel, LlamaContext, LlamaChatSession, LlamaJsonSchemaGrammar, LlamaGrammar, GbnfJsonSchema } from "node-llama-cpp"; import { LLM, type BaseLLMCallOptions, type BaseLLMParams } from "@langchain/core/language_models/llms"; import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager"; import { GenerationChunk } from "@langchain/core/outputs"; import { LlamaBaseCppInputs } from "../utils/llama_cpp.js"; /** * Note that the modelPath is the only required parameter. For testing you * can set this in the environment variable `LLAMA_PATH`. */ export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseLLMParams { } export interface LlamaCppCallOptions extends BaseLLMCallOptions { /** The maximum number of tokens the response should contain. */ maxTokens?: number; /** A function called when matching the provided token array */ onToken?: (tokens: number[]) => void; } /** * To use this model you need to have the `node-llama-cpp` module installed. * This can be installed using `npm install -S node-llama-cpp` and the minimum * version supported in version 2.0.0. * This also requires that have a locally built version of Llama2 installed. */ export declare class LlamaCpp extends LLM { lc_serializable: boolean; static inputs: LlamaCppInputs; maxTokens?: number; temperature?: number; topK?: number; topP?: number; trimWhitespaceSuffix?: boolean; _model: LlamaModel; _context: LlamaContext; _session: LlamaChatSession; _jsonSchema: LlamaJsonSchemaGrammar | undefined; _gbnf: LlamaGrammar | undefined; static lc_name(): string; constructor(inputs: LlamaCppInputs); _llmType(): string; /** @ignore */ _call(prompt: string, options?: this["ParsedCallOptions"]): Promise; _streamResponseChunks(prompt: string, _options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): AsyncGenerator; }