agsamantha/node_modules/@langchain/community/dist/llms/llama_cpp.js
2024-10-02 15:15:21 -05:00

141 lines
4.8 KiB
JavaScript

import { LLM, } from "@langchain/core/language_models/llms";
import { GenerationChunk } from "@langchain/core/outputs";
import { createLlamaModel, createLlamaContext, createLlamaSession, createLlamaJsonSchemaGrammar, createCustomGrammar, } from "../utils/llama_cpp.js";
/**
* To use this model you need to have the `node-llama-cpp` module installed.
* This can be installed using `npm install -S node-llama-cpp` and the minimum
* version supported in version 2.0.0.
* This also requires that have a locally built version of Llama2 installed.
*/
export class LlamaCpp extends LLM {
static lc_name() {
return "LlamaCpp";
}
constructor(inputs) {
super(inputs);
Object.defineProperty(this, "lc_serializable", {
enumerable: true,
configurable: true,
writable: true,
value: true
});
Object.defineProperty(this, "maxTokens", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "temperature", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "topK", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "topP", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "trimWhitespaceSuffix", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_model", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_context", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_session", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_jsonSchema", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_gbnf", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.maxTokens = inputs?.maxTokens;
this.temperature = inputs?.temperature;
this.topK = inputs?.topK;
this.topP = inputs?.topP;
this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
this._model = createLlamaModel(inputs);
this._context = createLlamaContext(this._model, inputs);
this._session = createLlamaSession(this._context);
this._jsonSchema = createLlamaJsonSchemaGrammar(inputs?.jsonSchema);
this._gbnf = createCustomGrammar(inputs?.gbnf);
}
_llmType() {
return "llama2_cpp";
}
/** @ignore */
async _call(prompt, options) {
try {
let promptGrammer;
if (this._jsonSchema !== undefined) {
promptGrammer = this._jsonSchema;
}
else if (this._gbnf !== undefined) {
promptGrammer = this._gbnf;
}
else {
promptGrammer = undefined;
}
const promptOptions = {
grammar: promptGrammer,
onToken: options?.onToken,
maxTokens: this?.maxTokens,
temperature: this?.temperature,
topK: this?.topK,
topP: this?.topP,
trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
};
const completion = await this._session.prompt(prompt, promptOptions);
return completion;
}
catch (e) {
throw new Error("Error getting prompt completion.");
}
}
async *_streamResponseChunks(prompt, _options, runManager) {
const promptOptions = {
temperature: this?.temperature,
maxTokens: this?.maxTokens,
topK: this?.topK,
topP: this?.topP,
};
const stream = await this.caller.call(async () => this._context.evaluate(this._context.encode(prompt), promptOptions));
for await (const chunk of stream) {
yield new GenerationChunk({
text: this._context.decode([chunk]),
generationInfo: {},
});
await runManager?.handleLLMNewToken(this._context.decode([chunk]) ?? "");
}
}
}