import { Client } from "../index.js"; import { Example, KVMap, Run, TracerSession } from "../schemas.js"; import { EvaluationResult, EvaluationResults, RunEvaluator } from "./evaluator.js"; type TargetT = ((input: TInput, config?: KVMap) => Promise) | ((input: TInput, config?: KVMap) => TOutput) | { invoke: (input: TInput, config?: KVMap) => TOutput; } | { invoke: (input: TInput, config?: KVMap) => Promise; }; type DataT = string | AsyncIterable | Example[]; type SummaryEvaluatorT = ((runs: Array, examples: Array) => Promise) | ((runs: Array, examples: Array) => EvaluationResult | EvaluationResults); type EvaluatorT = RunEvaluator | ((run: Run, example?: Example) => EvaluationResult | EvaluationResults) | ((run: Run, example?: Example) => Promise); interface _ForwardResults { run: Run; example: Example; } interface _ExperimentManagerArgs { data?: DataT; experiment?: TracerSession | string; metadata?: KVMap; client?: Client; runs?: AsyncGenerator; evaluationResults?: AsyncGenerator; summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator, any, unknown>; examples?: Example[]; numRepetitions?: number; _runsArray?: Run[]; } export interface EvaluateOptions { /** * The dataset to evaluate on. Can be a dataset name, a list of * examples, or a generator of examples. */ data: DataT; /** * A list of evaluators to run on each example. * @default undefined */ evaluators?: Array; /** * A list of summary evaluators to run on the entire dataset. * @default undefined */ summaryEvaluators?: Array; /** * Metadata to attach to the experiment. * @default undefined */ metadata?: KVMap; /** * A prefix to provide for your experiment name. * @default undefined */ experimentPrefix?: string; /** * A free-form description of the experiment. */ description?: string; /** * The maximum number of concurrent evaluations to run. * @default undefined */ maxConcurrency?: number; /** * The LangSmith client to use. * @default undefined */ client?: Client; /** * The number of repetitions to perform. Each example * will be run this many times. * @default 1 */ numRepetitions?: number; } export declare function evaluate( /** * The target system or function to evaluate. */ target: TargetT, options: EvaluateOptions): Promise; interface ExperimentResultRow { run: Run; example: Example; evaluationResults: EvaluationResults; } /** * Manage the execution of experiments. * * Supports lazily running predictions and evaluations in parallel to facilitate * result streaming and early debugging. */ export declare class _ExperimentManager { _data?: DataT; _runs?: AsyncGenerator; _evaluationResults?: AsyncGenerator; _summaryResults?: AsyncGenerator<(runsArray: Run[]) => AsyncGenerator, any, unknown>; _examples?: Example[]; _numRepetitions?: number; _runsArray?: Run[]; client: Client; _experiment?: TracerSession; _experimentName: string; _metadata: KVMap; _description?: string; get experimentName(): string; getExamples(): Promise>; setExamples(examples: Example[]): void; get datasetId(): Promise; get evaluationResults(): AsyncGenerator; get runs(): AsyncGenerator; constructor(args: _ExperimentManagerArgs); _getExperiment(): TracerSession; _getExperimentMetadata(): Promise; _createProject(firstExample: Example, projectMetadata: KVMap): Promise; _getProject(firstExample: Example): Promise; protected _printExperimentStart(): Promise; start(): Promise<_ExperimentManager>; withPredictions(target: TargetT, options?: { maxConcurrency?: number; }): Promise<_ExperimentManager>; withEvaluators(evaluators: Array, options?: { maxConcurrency?: number; }): Promise<_ExperimentManager>; withSummaryEvaluators(summaryEvaluators: Array): Promise<_ExperimentManager>; getResults(): AsyncGenerator; getSummaryScores(): Promise; /** * Run the target function or runnable on the examples. * @param {TargetT} target The target function or runnable to evaluate. * @param options * @returns {AsyncGenerator<_ForwardResults>} An async generator of the results. */ _predict(target: TargetT, options?: { maxConcurrency?: number; }): AsyncGenerator<_ForwardResults>; _runEvaluators(evaluators: Array, currentResults: ExperimentResultRow, fields: { client: Client; }): Promise; /** * Run the evaluators on the prediction stream. * Expects runs to be available in the manager. * (e.g. from a previous prediction step) * @param {Array} evaluators * @param {number} maxConcurrency */ _score(evaluators: Array, options?: { maxConcurrency?: number; }): AsyncGenerator; _applySummaryEvaluators(summaryEvaluators: Array): AsyncGenerator<(runsArray: Run[]) => AsyncGenerator>; _getDatasetVersion(): Promise; _getDatasetSplits(): Promise; _end(): Promise; } /** * Represents the results of an evaluate() call. * This class provides an iterator interface to iterate over the experiment results * as they become available. It also provides methods to access the experiment name, * the number of results, and to wait for the results to be processed. */ declare class ExperimentResults implements AsyncIterableIterator { private manager; results: ExperimentResultRow[]; processedCount: number; summaryResults: EvaluationResults; constructor(experimentManager: _ExperimentManager); get experimentName(): string; [Symbol.asyncIterator](): AsyncIterableIterator; next(): Promise>; processData(manager: _ExperimentManager): Promise; get length(): number; } export {};