agsamantha/node_modules/langsmith/dist/evaluation/_runner.d.ts
2024-10-02 15:15:21 -05:00

171 lines
6.5 KiB
TypeScript

import { Client } from "../index.js";
import { Example, KVMap, Run, TracerSession } from "../schemas.js";
import { EvaluationResult, EvaluationResults, RunEvaluator } from "./evaluator.js";
type TargetT<TInput = any, TOutput = KVMap> = ((input: TInput, config?: KVMap) => Promise<TOutput>) | ((input: TInput, config?: KVMap) => TOutput) | {
invoke: (input: TInput, config?: KVMap) => TOutput;
} | {
invoke: (input: TInput, config?: KVMap) => Promise<TOutput>;
};
type DataT = string | AsyncIterable<Example> | Example[];
type SummaryEvaluatorT = ((runs: Array<Run>, examples: Array<Example>) => Promise<EvaluationResult | EvaluationResults>) | ((runs: Array<Run>, examples: Array<Example>) => EvaluationResult | EvaluationResults);
type EvaluatorT = RunEvaluator | ((run: Run, example?: Example) => EvaluationResult | EvaluationResults) | ((run: Run, example?: Example) => Promise<EvaluationResult | EvaluationResults>);
interface _ForwardResults {
run: Run;
example: Example;
}
interface _ExperimentManagerArgs {
data?: DataT;
experiment?: TracerSession | string;
metadata?: KVMap;
client?: Client;
runs?: AsyncGenerator<Run>;
evaluationResults?: AsyncGenerator<EvaluationResults>;
summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
examples?: Example[];
numRepetitions?: number;
_runsArray?: Run[];
}
export interface EvaluateOptions {
/**
* The dataset to evaluate on. Can be a dataset name, a list of
* examples, or a generator of examples.
*/
data: DataT;
/**
* A list of evaluators to run on each example.
* @default undefined
*/
evaluators?: Array<EvaluatorT>;
/**
* A list of summary evaluators to run on the entire dataset.
* @default undefined
*/
summaryEvaluators?: Array<SummaryEvaluatorT>;
/**
* Metadata to attach to the experiment.
* @default undefined
*/
metadata?: KVMap;
/**
* A prefix to provide for your experiment name.
* @default undefined
*/
experimentPrefix?: string;
/**
* A free-form description of the experiment.
*/
description?: string;
/**
* The maximum number of concurrent evaluations to run.
* @default undefined
*/
maxConcurrency?: number;
/**
* The LangSmith client to use.
* @default undefined
*/
client?: Client;
/**
* The number of repetitions to perform. Each example
* will be run this many times.
* @default 1
*/
numRepetitions?: number;
}
export declare function evaluate(
/**
* The target system or function to evaluate.
*/
target: TargetT, options: EvaluateOptions): Promise<ExperimentResults>;
interface ExperimentResultRow {
run: Run;
example: Example;
evaluationResults: EvaluationResults;
}
/**
* Manage the execution of experiments.
*
* Supports lazily running predictions and evaluations in parallel to facilitate
* result streaming and early debugging.
*/
export declare class _ExperimentManager {
_data?: DataT;
_runs?: AsyncGenerator<Run>;
_evaluationResults?: AsyncGenerator<EvaluationResults>;
_summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults, any, unknown>, any, unknown>;
_examples?: Example[];
_numRepetitions?: number;
_runsArray?: Run[];
client: Client;
_experiment?: TracerSession;
_experimentName: string;
_metadata: KVMap;
_description?: string;
get experimentName(): string;
getExamples(): Promise<Array<Example>>;
setExamples(examples: Example[]): void;
get datasetId(): Promise<string>;
get evaluationResults(): AsyncGenerator<EvaluationResults>;
get runs(): AsyncGenerator<Run>;
constructor(args: _ExperimentManagerArgs);
_getExperiment(): TracerSession;
_getExperimentMetadata(): Promise<KVMap>;
_createProject(firstExample: Example, projectMetadata: KVMap): Promise<TracerSession>;
_getProject(firstExample: Example): Promise<TracerSession>;
protected _printExperimentStart(): Promise<void>;
start(): Promise<_ExperimentManager>;
withPredictions(target: TargetT, options?: {
maxConcurrency?: number;
}): Promise<_ExperimentManager>;
withEvaluators(evaluators: Array<EvaluatorT | RunEvaluator>, options?: {
maxConcurrency?: number;
}): Promise<_ExperimentManager>;
withSummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): Promise<_ExperimentManager>;
getResults(): AsyncGenerator<ExperimentResultRow>;
getSummaryScores(): Promise<EvaluationResults>;
/**
* Run the target function or runnable on the examples.
* @param {TargetT} target The target function or runnable to evaluate.
* @param options
* @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
*/
_predict(target: TargetT, options?: {
maxConcurrency?: number;
}): AsyncGenerator<_ForwardResults>;
_runEvaluators(evaluators: Array<RunEvaluator>, currentResults: ExperimentResultRow, fields: {
client: Client;
}): Promise<ExperimentResultRow>;
/**
* Run the evaluators on the prediction stream.
* Expects runs to be available in the manager.
* (e.g. from a previous prediction step)
* @param {Array<RunEvaluator>} evaluators
* @param {number} maxConcurrency
*/
_score(evaluators: Array<RunEvaluator>, options?: {
maxConcurrency?: number;
}): AsyncGenerator<ExperimentResultRow>;
_applySummaryEvaluators(summaryEvaluators: Array<SummaryEvaluatorT>): AsyncGenerator<(runsArray: Run[]) => AsyncGenerator<EvaluationResults>>;
_getDatasetVersion(): Promise<string | undefined>;
_getDatasetSplits(): Promise<string[] | undefined>;
_end(): Promise<void>;
}
/**
* Represents the results of an evaluate() call.
* This class provides an iterator interface to iterate over the experiment results
* as they become available. It also provides methods to access the experiment name,
* the number of results, and to wait for the results to be processed.
*/
declare class ExperimentResults implements AsyncIterableIterator<ExperimentResultRow> {
private manager;
results: ExperimentResultRow[];
processedCount: number;
summaryResults: EvaluationResults;
constructor(experimentManager: _ExperimentManager);
get experimentName(): string;
[Symbol.asyncIterator](): AsyncIterableIterator<ExperimentResultRow>;
next(): Promise<IteratorResult<ExperimentResultRow>>;
processData(manager: _ExperimentManager): Promise<void>;
get length(): number;
}
export {};