agsamantha/node_modules/langchain/dist/smith/runner_utils.js
2024-10-02 15:15:21 -05:00

571 lines
22 KiB
JavaScript

import { mapStoredMessagesToChatMessages } from "@langchain/core/messages";
import { Runnable, RunnableLambda, getCallbackManagerForConfig, } from "@langchain/core/runnables";
import { LangChainTracer } from "@langchain/core/tracers/tracer_langchain";
import { BaseTracer } from "@langchain/core/tracers/base";
import { AsyncCaller } from "@langchain/core/utils/async_caller";
import { Client, RunTree, } from "langsmith";
import { loadEvaluator } from "../evaluation/loader.js";
import { isOffTheShelfEvaluator, isCustomEvaluator, } from "./config.js";
import { randomName } from "./name_generation.js";
import { ProgressBar } from "./progress.js";
class SingleRunIdExtractor {
constructor() {
Object.defineProperty(this, "runIdPromiseResolver", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "runIdPromise", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "handleChainStart", {
enumerable: true,
configurable: true,
writable: true,
value: (_chain, _inputs, runId) => {
this.runIdPromiseResolver(runId);
}
});
this.runIdPromise = new Promise((extract) => {
this.runIdPromiseResolver = extract;
});
}
async extract() {
return this.runIdPromise;
}
}
class SingleRunExtractor extends BaseTracer {
constructor() {
super();
Object.defineProperty(this, "runPromiseResolver", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "runPromise", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
/** The name of the callback handler. */
Object.defineProperty(this, "name", {
enumerable: true,
configurable: true,
writable: true,
value: "single_run_extractor"
});
this.runPromise = new Promise((extract) => {
this.runPromiseResolver = extract;
});
}
async persistRun(run) {
this.runPromiseResolver(run);
}
async extract() {
return this.runPromise;
}
}
/**
* Wraps an evaluator function + implements the RunEvaluator interface.
*/
class DynamicRunEvaluator {
constructor(evaluator) {
Object.defineProperty(this, "evaluator", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.evaluator = new RunnableLambda({ func: evaluator });
}
/**
* Evaluates a run with an optional example and returns the evaluation result.
* @param run The run to evaluate.
* @param example The optional example to use for evaluation.
* @returns A promise that extracts to the evaluation result.
*/
async evaluateRun(run, example) {
const extractor = new SingleRunIdExtractor();
const tracer = new LangChainTracer({ projectName: "evaluators" });
const result = await this.evaluator.invoke({
run,
example,
input: run.inputs,
prediction: run.outputs,
reference: example?.outputs,
}, {
callbacks: [extractor, tracer],
});
const runId = await extractor.extract();
return {
sourceRunId: runId,
...result,
};
}
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function isLLMStringEvaluator(evaluator) {
return evaluator && typeof evaluator.evaluateStrings === "function";
}
/**
* Internal implementation of RunTree, which uses the
* provided callback manager instead of the internal LangSmith client.
*
* The goal of this class is to ensure seamless interop when intergrated
* with other Runnables.
*/
class CallbackManagerRunTree extends RunTree {
constructor(config, callbackManager) {
super(config);
Object.defineProperty(this, "callbackManager", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "activeCallbackManager", {
enumerable: true,
configurable: true,
writable: true,
value: undefined
});
this.callbackManager = callbackManager;
}
createChild(config) {
const child = new CallbackManagerRunTree({
...config,
parent_run: this,
project_name: this.project_name,
client: this.client,
}, this.activeCallbackManager?.getChild() ?? this.callbackManager);
this.child_runs.push(child);
return child;
}
async postRun() {
// how it is translated in comparison to basic RunTree?
this.activeCallbackManager = await this.callbackManager.handleChainStart(typeof this.serialized !== "object" &&
this.serialized != null &&
"lc" in this.serialized
? this.serialized
: {
id: ["langchain", "smith", "CallbackManagerRunTree"],
lc: 1,
type: "not_implemented",
}, this.inputs, this.id, this.run_type, undefined, undefined, this.name);
}
async patchRun() {
if (this.error) {
await this.activeCallbackManager?.handleChainError(this.error, this.id, this.parent_run?.id, undefined, undefined);
}
else {
await this.activeCallbackManager?.handleChainEnd(this.outputs ?? {}, this.id, this.parent_run?.id, undefined, undefined);
}
}
}
class RunnableTraceable extends Runnable {
constructor(fields) {
super(fields);
Object.defineProperty(this, "lc_serializable", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "lc_namespace", {
enumerable: true,
configurable: true,
writable: true,
value: ["langchain_core", "runnables"]
});
Object.defineProperty(this, "func", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
if (!isLangsmithTraceableFunction(fields.func)) {
throw new Error("RunnableTraceable requires a function that is wrapped in traceable higher-order function");
}
this.func = fields.func;
}
async invoke(input, options) {
const [config] = this._getOptionsList(options ?? {}, 1);
const callbackManager = await getCallbackManagerForConfig(config);
const partialConfig = "langsmith:traceable" in this.func
? this.func["langsmith:traceable"]
: { name: "<lambda>" };
if (!callbackManager)
throw new Error("CallbackManager not found");
const runTree = new CallbackManagerRunTree({
...partialConfig,
parent_run: callbackManager?._parentRunId
? new RunTree({ name: "<parent>", id: callbackManager?._parentRunId })
: undefined,
}, callbackManager);
if (typeof input === "object" &&
input != null &&
Object.keys(input).length === 1) {
if ("args" in input && Array.isArray(input)) {
return (await this.func(runTree, ...input));
}
if ("input" in input &&
!(typeof input === "object" &&
input != null &&
!Array.isArray(input) &&
// eslint-disable-next-line no-instanceof/no-instanceof
!(input instanceof Date))) {
try {
return (await this.func(runTree, input.input));
}
catch (err) {
return (await this.func(runTree, input));
}
}
}
return (await this.func(runTree, input));
}
}
/**
* Wraps an off-the-shelf evaluator (loaded using loadEvaluator; of EvaluatorType[T])
* and composes with a prepareData function so the user can prepare the trace and
* dataset data for the evaluator.
*/
class PreparedRunEvaluator {
constructor(evaluator, evaluationName, formatEvaluatorInputs) {
Object.defineProperty(this, "evaluator", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "formatEvaluatorInputs", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "isStringEvaluator", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "evaluationName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.evaluator = evaluator;
this.isStringEvaluator = typeof evaluator?.evaluateStrings === "function";
this.evaluationName = evaluationName;
this.formatEvaluatorInputs = formatEvaluatorInputs;
}
static async fromEvalConfig(config) {
const evaluatorType = typeof config === "string" ? config : config.evaluatorType;
const evalConfig = typeof config === "string" ? {} : config;
const evaluator = await loadEvaluator(evaluatorType, evalConfig);
const feedbackKey = evalConfig?.feedbackKey ?? evaluator?.evaluationName;
if (!isLLMStringEvaluator(evaluator)) {
throw new Error(`Evaluator of type ${evaluatorType} not yet supported. ` +
"Please use a string evaluator, or implement your " +
"evaluation logic as a custom evaluator.");
}
if (!feedbackKey) {
throw new Error(`Evaluator of type ${evaluatorType} must have an evaluationName` +
` or feedbackKey. Please manually provide a feedbackKey in the EvalConfig.`);
}
return new PreparedRunEvaluator(evaluator, feedbackKey, evalConfig?.formatEvaluatorInputs);
}
/**
* Evaluates a run with an optional example and returns the evaluation result.
* @param run The run to evaluate.
* @param example The optional example to use for evaluation.
* @returns A promise that extracts to the evaluation result.
*/
async evaluateRun(run, example) {
const { prediction, input, reference } = this.formatEvaluatorInputs({
rawInput: run.inputs,
rawPrediction: run.outputs,
rawReferenceOutput: example?.outputs,
run,
});
const extractor = new SingleRunIdExtractor();
const tracer = new LangChainTracer({ projectName: "evaluators" });
if (this.isStringEvaluator) {
const evalResult = await this.evaluator.evaluateStrings({
prediction: prediction,
reference: reference,
input: input,
}, {
callbacks: [extractor, tracer],
});
const runId = await extractor.extract();
return {
key: this.evaluationName,
comment: evalResult?.reasoning,
sourceRunId: runId,
...evalResult,
};
}
throw new Error("Evaluator not yet supported. " +
"Please use a string evaluator, or implement your " +
"evaluation logic as a custom evaluator.");
}
}
class LoadedEvalConfig {
constructor(evaluators) {
Object.defineProperty(this, "evaluators", {
enumerable: true,
configurable: true,
writable: true,
value: evaluators
});
}
static async fromRunEvalConfig(config) {
// Custom evaluators are applied "as-is"
const customEvaluators = (config?.customEvaluators ?? config.evaluators?.filter(isCustomEvaluator))?.map((evaluator) => {
if (typeof evaluator === "function") {
return new DynamicRunEvaluator(evaluator);
}
else {
return evaluator;
}
});
const offTheShelfEvaluators = await Promise.all(config?.evaluators
?.filter(isOffTheShelfEvaluator)
?.map(async (evaluator) => await PreparedRunEvaluator.fromEvalConfig(evaluator)) ?? []);
return new LoadedEvalConfig((customEvaluators ?? []).concat(offTheShelfEvaluators ?? []));
}
}
/**
* Internals expect a constructor () -> Runnable. This function wraps/coerces
* the provided LangChain object, custom function, or factory function into
* a constructor of a runnable.
* @param modelOrFactory The model or factory to create a wrapped model from.
* @returns A function that returns the wrapped model.
* @throws Error if the modelOrFactory is invalid.
*/
const createWrappedModel = async (modelOrFactory) => {
if (Runnable.isRunnable(modelOrFactory)) {
return () => modelOrFactory;
}
if (typeof modelOrFactory === "function") {
if (isLangsmithTraceableFunction(modelOrFactory)) {
const wrappedModel = new RunnableTraceable({ func: modelOrFactory });
return () => wrappedModel;
}
try {
// If it works with no arguments, assume it's a factory
let res = modelOrFactory();
if (res &&
typeof res.then === "function") {
res = await res;
}
return modelOrFactory;
}
catch (err) {
// Otherwise, it's a custom UDF, and we'll wrap
// the function in a lambda
const wrappedModel = new RunnableLambda({ func: modelOrFactory });
return () => wrappedModel;
}
}
throw new Error("Invalid modelOrFactory");
};
const loadExamples = async ({ datasetName, client, projectName, }) => {
const exampleIterator = client.listExamples({ datasetName });
const configs = [];
const runExtractors = [];
const examples = [];
for await (const example of exampleIterator) {
const runExtractor = new SingleRunExtractor();
configs.push({
callbacks: [
new LangChainTracer({ exampleId: example.id, projectName }),
runExtractor,
],
});
examples.push(example);
runExtractors.push(runExtractor);
}
return {
configs,
examples,
runExtractors,
};
};
const applyEvaluators = async ({ evaluation, runs, examples, client, maxConcurrency, }) => {
// TODO: Parallelize and/or put in callbacks to speed up evals.
const { evaluators } = evaluation;
const progress = new ProgressBar({
total: examples.length,
format: "Running Evaluators: {bar} {percentage}% | {value}/{total}\n",
});
const caller = new AsyncCaller({
maxConcurrency,
});
const requests = runs.map(async (run, i) => caller.call(async () => {
const evaluatorResults = await Promise.allSettled(evaluators.map((evaluator) => client.evaluateRun(run, evaluator, {
referenceExample: examples[i],
loadChildRuns: false,
})));
progress.increment();
return {
execution_time: run?.end_time && run.start_time
? run.end_time - run.start_time
: undefined,
feedback: evaluatorResults.map((evalResult) => evalResult.status === "fulfilled"
? evalResult.value
: evalResult.reason),
run_id: run.id,
};
}));
const results = await Promise.all(requests);
return results.reduce((acc, result, i) => ({
...acc,
[examples[i].id]: result,
}), {});
};
const getExamplesInputs = (examples, chainOrFactory, dataType) => {
if (dataType === "chat") {
// For some batty reason, we store the chat dataset differently.
// { type: "system", data: { content: inputs.input } },
// But we need to create AIMesage, SystemMessage, etc.
return examples.map(({ inputs }) => mapStoredMessagesToChatMessages(inputs.input));
}
// If it's a language model and ALL example inputs have a single value,
// then we can be friendly and flatten the inputs to a list of strings.
const isLanguageModel = typeof chainOrFactory === "object" &&
typeof chainOrFactory._llmType === "function";
if (isLanguageModel &&
examples.every(({ inputs }) => Object.keys(inputs).length === 1)) {
return examples.map(({ inputs }) => Object.values(inputs)[0]);
}
return examples.map(({ inputs }) => inputs);
};
/**
* Evaluates a given model or chain against a specified LangSmith dataset.
*
* This function fetches example records from the specified dataset,
* runs the model or chain against each example, and returns the evaluation
* results.
*
* @param chainOrFactory - A model or factory/constructor function to be evaluated. It can be a
* Runnable instance, a factory function that returns a Runnable, or a user-defined
* function or factory.
*
* @param datasetName - The name of the dataset against which the evaluation will be
* performed. This dataset should already be defined and contain the relevant data
* for evaluation.
*
* @param options - (Optional) Additional parameters for the evaluation process:
* - `evaluators` (RunEvalType[]): Evaluators to apply to a dataset run.
* - `formatEvaluatorInputs` (EvaluatorInputFormatter): Convert the evaluation data into formats that can be used by the evaluator.
* - `projectName` (string): Name of the project for logging and tracking.
* - `projectMetadata` (Record<string, unknown>): Additional metadata for the project.
* - `client` (Client): Client instance for LangSmith service interaction.
* - `maxConcurrency` (number): Maximum concurrency level for dataset processing.
*
* @returns A promise that resolves to an `EvalResults` object. This object includes
* detailed results of the evaluation, such as execution time, run IDs, and feedback
* for each entry in the dataset.
*
* @example
* ```typescript
* // Example usage for evaluating a model on a dataset
* async function evaluateModel() {
* const chain = /* ...create your model or chain...*\//
* const datasetName = 'example-dataset';
* const client = new Client(/* ...config... *\//);
*
* const results = await runOnDataset(chain, datasetName, {
* evaluators: [/* ...evaluators... *\//],
* client,
* });
*
* console.log('Evaluation Results:', results);
* }
*
* evaluateModel();
* ```
* In this example, `runOnDataset` is used to evaluate a language model (or a chain of models) against
* a dataset named 'example-dataset'. The evaluation process is configured using `RunOnDatasetParams["evaluators"]`, which can
* include both standard and custom evaluators. The `Client` instance is used to interact with LangChain services.
* The function returns the evaluation results, which can be logged or further processed as needed.
*/
export async function runOnDataset(chainOrFactory, datasetName, options) {
const { projectName, projectMetadata, client, maxConcurrency, } = options ?? {};
const evaluationConfig = options?.evaluationConfig ??
(options?.evaluators != null
? {
evaluators: options.evaluators,
formatEvaluatorInputs: options.formatEvaluatorInputs,
}
: undefined);
const wrappedModel = await createWrappedModel(chainOrFactory);
const testClient = client ?? new Client();
const testProjectName = projectName ?? randomName();
const dataset = await testClient.readDataset({ datasetName });
const datasetId = dataset.id;
const testConcurrency = maxConcurrency ?? 5;
const { configs, examples, runExtractors } = await loadExamples({
datasetName,
client: testClient,
projectName: testProjectName,
maxConcurrency: testConcurrency,
});
await testClient.createProject({
projectName: testProjectName,
referenceDatasetId: datasetId,
projectExtra: { metadata: { ...projectMetadata } },
});
const wrappedRunnable = new RunnableLambda({
func: wrappedModel,
}).withConfig({ runName: "evaluationRun" });
const runInputs = getExamplesInputs(examples, chainOrFactory, dataset.data_type);
const progress = new ProgressBar({
total: runInputs.length,
format: "Predicting: {bar} {percentage}% | {value}/{total}",
});
// TODO: Collect the runs as well.
await wrappedRunnable
.withListeners({
onEnd: () => progress.increment(),
})
// TODO: Insert evaluation inline for immediate feedback.
.batch(runInputs, configs, {
maxConcurrency,
returnExceptions: true,
});
progress.complete();
const runs = [];
for (let i = 0; i < examples.length; i += 1) {
runs.push(await runExtractors[i].extract());
}
let evalResults = {};
if (evaluationConfig) {
const loadedEvalConfig = await LoadedEvalConfig.fromRunEvalConfig(evaluationConfig);
evalResults = await applyEvaluators({
evaluation: loadedEvalConfig,
runs,
examples,
client: testClient,
maxConcurrency: testConcurrency,
});
}
const results = {
projectName: testProjectName,
results: evalResults ?? {},
};
return results;
}
function isLangsmithTraceableFunction(x) {
return typeof x === "function" && "langsmith:traceable" in x;
}