import { LLM, } from "@langchain/core/language_models/llms"; import { GenerationChunk } from "@langchain/core/outputs"; import { createLlamaModel, createLlamaContext, createLlamaSession, createLlamaJsonSchemaGrammar, createCustomGrammar, } from "../utils/llama_cpp.js"; /** * To use this model you need to have the `node-llama-cpp` module installed. * This can be installed using `npm install -S node-llama-cpp` and the minimum * version supported in version 2.0.0. * This also requires that have a locally built version of Llama2 installed. */ export class LlamaCpp extends LLM { static lc_name() { return "LlamaCpp"; } constructor(inputs) { super(inputs); Object.defineProperty(this, "lc_serializable", { enumerable: true, configurable: true, writable: true, value: true }); Object.defineProperty(this, "maxTokens", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "temperature", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "topK", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "topP", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "trimWhitespaceSuffix", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "_model", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "_context", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "_session", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "_jsonSchema", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "_gbnf", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.maxTokens = inputs?.maxTokens; this.temperature = inputs?.temperature; this.topK = inputs?.topK; this.topP = inputs?.topP; this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix; this._model = createLlamaModel(inputs); this._context = createLlamaContext(this._model, inputs); this._session = createLlamaSession(this._context); this._jsonSchema = createLlamaJsonSchemaGrammar(inputs?.jsonSchema); this._gbnf = createCustomGrammar(inputs?.gbnf); } _llmType() { return "llama2_cpp"; } /** @ignore */ async _call(prompt, options) { try { let promptGrammer; if (this._jsonSchema !== undefined) { promptGrammer = this._jsonSchema; } else if (this._gbnf !== undefined) { promptGrammer = this._gbnf; } else { promptGrammer = undefined; } const promptOptions = { grammar: promptGrammer, onToken: options?.onToken, maxTokens: this?.maxTokens, temperature: this?.temperature, topK: this?.topK, topP: this?.topP, trimWhitespaceSuffix: this?.trimWhitespaceSuffix, }; const completion = await this._session.prompt(prompt, promptOptions); return completion; } catch (e) { throw new Error("Error getting prompt completion."); } } async *_streamResponseChunks(prompt, _options, runManager) { const promptOptions = { temperature: this?.temperature, maxTokens: this?.maxTokens, topK: this?.topK, topP: this?.topP, }; const stream = await this.caller.call(async () => this._context.evaluate(this._context.encode(prompt), promptOptions)); for await (const chunk of stream) { yield new GenerationChunk({ text: this._context.decode([chunk]), generationInfo: {}, }); await runManager?.handleLLMNewToken(this._context.decode([chunk]) ?? ""); } } }