806 lines
29 KiB
JavaScript
806 lines
29 KiB
JavaScript
|
"use strict";
|
||
|
Object.defineProperty(exports, "__esModule", { value: true });
|
||
|
exports._ExperimentManager = exports.evaluate = void 0;
|
||
|
const index_js_1 = require("../index.cjs");
|
||
|
const traceable_js_1 = require("../traceable.cjs");
|
||
|
const _git_js_1 = require("../utils/_git.cjs");
|
||
|
const _uuid_js_1 = require("../utils/_uuid.cjs");
|
||
|
const async_caller_js_1 = require("../utils/async_caller.cjs");
|
||
|
const atee_js_1 = require("../utils/atee.cjs");
|
||
|
const env_js_1 = require("../utils/env.cjs");
|
||
|
const error_js_1 = require("../utils/error.cjs");
|
||
|
const _random_name_js_1 = require("./_random_name.cjs");
|
||
|
const evaluator_js_1 = require("./evaluator.cjs");
|
||
|
const uuid_1 = require("uuid");
|
||
|
function evaluate(
|
||
|
/**
|
||
|
* The target system or function to evaluate.
|
||
|
*/
|
||
|
target, options) {
|
||
|
return _evaluate(target, options);
|
||
|
}
|
||
|
exports.evaluate = evaluate;
|
||
|
/**
|
||
|
* Manage the execution of experiments.
|
||
|
*
|
||
|
* Supports lazily running predictions and evaluations in parallel to facilitate
|
||
|
* result streaming and early debugging.
|
||
|
*/
|
||
|
class _ExperimentManager {
|
||
|
get experimentName() {
|
||
|
if (this._experimentName) {
|
||
|
return this._experimentName;
|
||
|
}
|
||
|
else {
|
||
|
throw new Error("Experiment name not provided, and experiment not yet started.");
|
||
|
}
|
||
|
}
|
||
|
async getExamples() {
|
||
|
if (!this._examples) {
|
||
|
if (!this._data) {
|
||
|
throw new Error("Data not provided in this experiment.");
|
||
|
}
|
||
|
const unresolvedData = _resolveData(this._data, { client: this.client });
|
||
|
if (!this._examples) {
|
||
|
this._examples = [];
|
||
|
}
|
||
|
const exs = [];
|
||
|
for await (const example of unresolvedData) {
|
||
|
exs.push(example);
|
||
|
}
|
||
|
if (this._numRepetitions && this._numRepetitions > 0) {
|
||
|
const repeatedExamples = [];
|
||
|
for (let i = 0; i < this._numRepetitions; i++) {
|
||
|
repeatedExamples.push(...exs);
|
||
|
}
|
||
|
this.setExamples(repeatedExamples);
|
||
|
}
|
||
|
else {
|
||
|
this.setExamples(exs);
|
||
|
}
|
||
|
}
|
||
|
return this._examples;
|
||
|
}
|
||
|
setExamples(examples) {
|
||
|
this._examples = examples;
|
||
|
}
|
||
|
get datasetId() {
|
||
|
return this.getExamples().then((examples) => {
|
||
|
if (examples.length === 0) {
|
||
|
throw new Error("No examples found in the dataset.");
|
||
|
}
|
||
|
if (this._experiment && this._experiment.reference_dataset_id) {
|
||
|
return this._experiment.reference_dataset_id;
|
||
|
}
|
||
|
return examples[0].dataset_id;
|
||
|
});
|
||
|
}
|
||
|
get evaluationResults() {
|
||
|
if (this._evaluationResults === undefined) {
|
||
|
return async function* () {
|
||
|
for (const _ of await this.getExamples()) {
|
||
|
yield { results: [] };
|
||
|
}
|
||
|
}.call(this);
|
||
|
}
|
||
|
else {
|
||
|
return this._evaluationResults;
|
||
|
}
|
||
|
}
|
||
|
get runs() {
|
||
|
if (this._runsArray && this._runsArray.length > 0) {
|
||
|
throw new Error("Runs already provided as an array.");
|
||
|
}
|
||
|
if (this._runs === undefined) {
|
||
|
throw new Error("Runs not provided in this experiment. Please predict first.");
|
||
|
}
|
||
|
else {
|
||
|
return this._runs;
|
||
|
}
|
||
|
}
|
||
|
constructor(args) {
|
||
|
Object.defineProperty(this, "_data", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_runs", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_evaluationResults", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_summaryResults", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_examples", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_numRepetitions", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_runsArray", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "client", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_experiment", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_experimentName", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_metadata", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "_description", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
this.client = args.client ?? new index_js_1.Client();
|
||
|
if (!args.experiment) {
|
||
|
this._experimentName = (0, _random_name_js_1.randomName)();
|
||
|
}
|
||
|
else if (typeof args.experiment === "string") {
|
||
|
this._experimentName = `${args.experiment}-${(0, uuid_1.v4)().slice(0, 8)}`;
|
||
|
}
|
||
|
else {
|
||
|
if (!args.experiment.name) {
|
||
|
throw new Error("Experiment must have a name");
|
||
|
}
|
||
|
this._experimentName = args.experiment.name;
|
||
|
this._experiment = args.experiment;
|
||
|
}
|
||
|
let metadata = args.metadata || {};
|
||
|
if (!("revision_id" in metadata)) {
|
||
|
metadata = {
|
||
|
revision_id: (0, env_js_1.getLangChainEnvVarsMetadata)().revision_id,
|
||
|
...metadata,
|
||
|
};
|
||
|
}
|
||
|
this._metadata = metadata;
|
||
|
if (args.examples && args.examples.length) {
|
||
|
this.setExamples(args.examples);
|
||
|
}
|
||
|
this._data = args.data;
|
||
|
if (args._runsArray && args._runsArray.length) {
|
||
|
this._runsArray = args._runsArray;
|
||
|
}
|
||
|
this._runs = args.runs;
|
||
|
this._evaluationResults = args.evaluationResults;
|
||
|
this._summaryResults = args.summaryResults;
|
||
|
this._numRepetitions = args.numRepetitions;
|
||
|
}
|
||
|
_getExperiment() {
|
||
|
if (!this._experiment) {
|
||
|
throw new Error("Experiment not yet started.");
|
||
|
}
|
||
|
return this._experiment;
|
||
|
}
|
||
|
async _getExperimentMetadata() {
|
||
|
let projectMetadata = this._metadata ?? {};
|
||
|
const gitInfo = await (0, _git_js_1.getGitInfo)();
|
||
|
if (gitInfo) {
|
||
|
projectMetadata = {
|
||
|
...projectMetadata,
|
||
|
git: gitInfo,
|
||
|
};
|
||
|
}
|
||
|
if (this._experiment) {
|
||
|
const experimentMetadata = this._experiment.extra && "metadata" in this._experiment.extra
|
||
|
? this._experiment.extra.metadata
|
||
|
: {};
|
||
|
projectMetadata = {
|
||
|
...experimentMetadata,
|
||
|
...projectMetadata,
|
||
|
};
|
||
|
}
|
||
|
return projectMetadata;
|
||
|
}
|
||
|
async _createProject(firstExample, projectMetadata) {
|
||
|
// Create the project, updating the experimentName until we find a unique one.
|
||
|
let project;
|
||
|
const originalExperimentName = this._experimentName;
|
||
|
for (let i = 0; i < 10; i++) {
|
||
|
try {
|
||
|
project = await this.client.createProject({
|
||
|
projectName: this._experimentName,
|
||
|
referenceDatasetId: firstExample.dataset_id,
|
||
|
metadata: projectMetadata,
|
||
|
description: this._description,
|
||
|
});
|
||
|
return project;
|
||
|
}
|
||
|
catch (e) {
|
||
|
// Naming collision
|
||
|
if (e?.name === "LangSmithConflictError") {
|
||
|
const ent = (0, uuid_1.v4)().slice(0, 6);
|
||
|
this._experimentName = `${originalExperimentName}-${ent}`;
|
||
|
}
|
||
|
else {
|
||
|
throw e;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
throw new Error("Could not generate a unique experiment name within 10 attempts." +
|
||
|
" Please try again with a different name.");
|
||
|
}
|
||
|
async _getProject(firstExample) {
|
||
|
let project;
|
||
|
if (!this._experiment) {
|
||
|
const projectMetadata = await this._getExperimentMetadata();
|
||
|
project = await this._createProject(firstExample, projectMetadata);
|
||
|
this._experiment = project;
|
||
|
}
|
||
|
return this._experiment;
|
||
|
}
|
||
|
async _printExperimentStart() {
|
||
|
console.log(`Starting evaluation of experiment: ${this.experimentName}`);
|
||
|
const firstExample = this._examples?.[0];
|
||
|
const datasetId = firstExample?.dataset_id;
|
||
|
if (!datasetId || !this._experiment)
|
||
|
return;
|
||
|
const datasetUrl = await this.client.getDatasetUrl({ datasetId });
|
||
|
const compareUrl = `${datasetUrl}/compare?selectedSessions=${this._experiment.id}`;
|
||
|
console.log(`View results at ${compareUrl}`);
|
||
|
}
|
||
|
async start() {
|
||
|
const examples = await this.getExamples();
|
||
|
const firstExample = examples[0];
|
||
|
const project = await this._getProject(firstExample);
|
||
|
await this._printExperimentStart();
|
||
|
this._metadata["num_repetitions"] = this._numRepetitions;
|
||
|
return new _ExperimentManager({
|
||
|
examples,
|
||
|
experiment: project,
|
||
|
metadata: this._metadata,
|
||
|
client: this.client,
|
||
|
evaluationResults: this._evaluationResults,
|
||
|
summaryResults: this._summaryResults,
|
||
|
});
|
||
|
}
|
||
|
async withPredictions(target, options) {
|
||
|
const experimentResults = this._predict(target, options);
|
||
|
return new _ExperimentManager({
|
||
|
examples: await this.getExamples(),
|
||
|
experiment: this._experiment,
|
||
|
metadata: this._metadata,
|
||
|
client: this.client,
|
||
|
runs: (async function* () {
|
||
|
for await (const pred of experimentResults) {
|
||
|
yield pred.run;
|
||
|
}
|
||
|
})(),
|
||
|
});
|
||
|
}
|
||
|
async withEvaluators(evaluators, options) {
|
||
|
const resolvedEvaluators = _resolveEvaluators(evaluators);
|
||
|
const experimentResults = this._score(resolvedEvaluators, options);
|
||
|
const [r1, r2] = (0, atee_js_1.atee)(experimentResults);
|
||
|
return new _ExperimentManager({
|
||
|
examples: await this.getExamples(),
|
||
|
experiment: this._experiment,
|
||
|
metadata: this._metadata,
|
||
|
client: this.client,
|
||
|
runs: (async function* () {
|
||
|
for await (const result of r1) {
|
||
|
yield result.run;
|
||
|
}
|
||
|
})(),
|
||
|
evaluationResults: (async function* () {
|
||
|
for await (const result of r2) {
|
||
|
yield result.evaluationResults;
|
||
|
}
|
||
|
})(),
|
||
|
summaryResults: this._summaryResults,
|
||
|
});
|
||
|
}
|
||
|
async withSummaryEvaluators(summaryEvaluators) {
|
||
|
const aggregateFeedbackGen = this._applySummaryEvaluators(summaryEvaluators);
|
||
|
return new _ExperimentManager({
|
||
|
examples: await this.getExamples(),
|
||
|
experiment: this._experiment,
|
||
|
metadata: this._metadata,
|
||
|
client: this.client,
|
||
|
runs: this.runs,
|
||
|
_runsArray: this._runsArray,
|
||
|
evaluationResults: this._evaluationResults,
|
||
|
summaryResults: aggregateFeedbackGen,
|
||
|
});
|
||
|
}
|
||
|
async *getResults() {
|
||
|
const examples = await this.getExamples();
|
||
|
const evaluationResults = [];
|
||
|
if (!this._runsArray) {
|
||
|
this._runsArray = [];
|
||
|
for await (const run of this.runs) {
|
||
|
this._runsArray.push(run);
|
||
|
}
|
||
|
}
|
||
|
for await (const evaluationResult of this.evaluationResults) {
|
||
|
evaluationResults.push(evaluationResult);
|
||
|
}
|
||
|
for (let i = 0; i < this._runsArray.length; i++) {
|
||
|
yield {
|
||
|
run: this._runsArray[i],
|
||
|
example: examples[i],
|
||
|
evaluationResults: evaluationResults[i],
|
||
|
};
|
||
|
}
|
||
|
}
|
||
|
async getSummaryScores() {
|
||
|
if (!this._summaryResults) {
|
||
|
return { results: [] };
|
||
|
}
|
||
|
const results = [];
|
||
|
for await (const evaluationResultsGenerator of this._summaryResults) {
|
||
|
if (typeof evaluationResultsGenerator === "function") {
|
||
|
// This is because runs array is not available until after this generator
|
||
|
// is set, so we need to pass it like so.
|
||
|
for await (const evaluationResults of evaluationResultsGenerator(this._runsArray ?? [])) {
|
||
|
results.push(...evaluationResults.results);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return { results };
|
||
|
}
|
||
|
// Private methods
|
||
|
/**
|
||
|
* Run the target function or runnable on the examples.
|
||
|
* @param {TargetT} target The target function or runnable to evaluate.
|
||
|
* @param options
|
||
|
* @returns {AsyncGenerator<_ForwardResults>} An async generator of the results.
|
||
|
*/
|
||
|
async *_predict(target, options) {
|
||
|
const maxConcurrency = options?.maxConcurrency ?? 0;
|
||
|
const examples = await this.getExamples();
|
||
|
if (maxConcurrency === 0) {
|
||
|
for (const example of examples) {
|
||
|
yield await _forward(target, example, this.experimentName, this._metadata, this.client);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
const caller = new async_caller_js_1.AsyncCaller({
|
||
|
maxConcurrency,
|
||
|
});
|
||
|
const futures = [];
|
||
|
for await (const example of examples) {
|
||
|
futures.push(caller.call(_forward, target, example, this.experimentName, this._metadata, this.client));
|
||
|
}
|
||
|
for await (const future of futures) {
|
||
|
yield future;
|
||
|
}
|
||
|
}
|
||
|
// Close out the project.
|
||
|
await this._end();
|
||
|
}
|
||
|
async _runEvaluators(evaluators, currentResults, fields) {
|
||
|
const { run, example, evaluationResults } = currentResults;
|
||
|
for (const evaluator of evaluators) {
|
||
|
try {
|
||
|
const options = {
|
||
|
reference_example_id: example.id,
|
||
|
project_name: "evaluators",
|
||
|
metadata: {
|
||
|
example_version: example.modified_at
|
||
|
? new Date(example.modified_at).toISOString()
|
||
|
: new Date(example.created_at).toISOString(),
|
||
|
},
|
||
|
client: fields.client,
|
||
|
tracingEnabled: true,
|
||
|
};
|
||
|
const evaluatorResponse = await evaluator.evaluateRun(run, example, options);
|
||
|
evaluationResults.results.push(...(await fields.client.logEvaluationFeedback(evaluatorResponse, run)));
|
||
|
}
|
||
|
catch (e) {
|
||
|
console.error(`Error running evaluator ${evaluator.evaluateRun.name} on run ${run.id}: ${e}`);
|
||
|
(0, error_js_1.printErrorStackTrace)(e);
|
||
|
}
|
||
|
}
|
||
|
return {
|
||
|
run,
|
||
|
example,
|
||
|
evaluationResults,
|
||
|
};
|
||
|
}
|
||
|
/**
|
||
|
* Run the evaluators on the prediction stream.
|
||
|
* Expects runs to be available in the manager.
|
||
|
* (e.g. from a previous prediction step)
|
||
|
* @param {Array<RunEvaluator>} evaluators
|
||
|
* @param {number} maxConcurrency
|
||
|
*/
|
||
|
async *_score(evaluators, options) {
|
||
|
const { maxConcurrency = 0 } = options || {};
|
||
|
if (maxConcurrency === 0) {
|
||
|
for await (const currentResults of this.getResults()) {
|
||
|
yield this._runEvaluators(evaluators, currentResults, {
|
||
|
client: this.client,
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
const caller = new async_caller_js_1.AsyncCaller({
|
||
|
maxConcurrency,
|
||
|
});
|
||
|
const futures = [];
|
||
|
for await (const currentResults of this.getResults()) {
|
||
|
futures.push(caller.call(this._runEvaluators, evaluators, currentResults, {
|
||
|
client: this.client,
|
||
|
}));
|
||
|
}
|
||
|
for (const result of futures) {
|
||
|
yield result;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
async *_applySummaryEvaluators(summaryEvaluators) {
|
||
|
const projectId = this._getExperiment().id;
|
||
|
const examples = await this.getExamples();
|
||
|
const options = Array.from({ length: summaryEvaluators.length }).map(() => ({
|
||
|
project_name: "evaluators",
|
||
|
experiment: this.experimentName,
|
||
|
projectId: projectId,
|
||
|
}));
|
||
|
const wrappedEvaluators = await wrapSummaryEvaluators(summaryEvaluators, options);
|
||
|
yield async function* (runsArray) {
|
||
|
const aggregateFeedback = [];
|
||
|
for (const evaluator of wrappedEvaluators) {
|
||
|
try {
|
||
|
const summaryEvalResult = await evaluator(runsArray, examples);
|
||
|
const flattenedResults = this.client._selectEvalResults(summaryEvalResult);
|
||
|
aggregateFeedback.push(...flattenedResults);
|
||
|
for (const result of flattenedResults) {
|
||
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
||
|
const { targetRunId, key, ...feedback } = result;
|
||
|
const evaluatorInfo = feedback.evaluatorInfo;
|
||
|
delete feedback.evaluatorInfo;
|
||
|
await this.client.createFeedback(null, key, {
|
||
|
...feedback,
|
||
|
projectId: projectId,
|
||
|
sourceInfo: evaluatorInfo,
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
catch (e) {
|
||
|
console.error(`Error running summary evaluator ${evaluator.name}: ${JSON.stringify(e, null, 2)}`);
|
||
|
(0, error_js_1.printErrorStackTrace)(e);
|
||
|
}
|
||
|
}
|
||
|
yield {
|
||
|
results: aggregateFeedback,
|
||
|
};
|
||
|
}.bind(this);
|
||
|
}
|
||
|
async _getDatasetVersion() {
|
||
|
const examples = await this.getExamples();
|
||
|
const modifiedAt = examples.map((ex) => ex.modified_at);
|
||
|
// Python might return microseconds, which we need
|
||
|
// to account for when comparing dates.
|
||
|
const modifiedAtTime = modifiedAt.map((date) => {
|
||
|
function getMiliseconds(isoString) {
|
||
|
const time = isoString.split("T").at(1);
|
||
|
if (!time)
|
||
|
return "";
|
||
|
const regex = /[0-9]{2}:[0-9]{2}:[0-9]{2}.([0-9]+)/;
|
||
|
const strMiliseconds = time.match(regex)?.[1];
|
||
|
return strMiliseconds ?? "";
|
||
|
}
|
||
|
const jsDate = new Date(date);
|
||
|
let source = getMiliseconds(date);
|
||
|
let parsed = getMiliseconds(jsDate.toISOString());
|
||
|
const length = Math.max(source.length, parsed.length);
|
||
|
source = source.padEnd(length, "0");
|
||
|
parsed = parsed.padEnd(length, "0");
|
||
|
const microseconds = (Number.parseInt(source, 10) - Number.parseInt(parsed, 10)) / 1000;
|
||
|
const time = jsDate.getTime() + microseconds;
|
||
|
return { date, time };
|
||
|
});
|
||
|
if (modifiedAtTime.length === 0)
|
||
|
return undefined;
|
||
|
return modifiedAtTime.reduce((max, current) => (current.time > max.time ? current : max), modifiedAtTime[0]).date;
|
||
|
}
|
||
|
async _getDatasetSplits() {
|
||
|
const examples = await this.getExamples();
|
||
|
const allSplits = examples.reduce((acc, ex) => {
|
||
|
if (ex.metadata && ex.metadata.dataset_split) {
|
||
|
if (Array.isArray(ex.metadata.dataset_split)) {
|
||
|
ex.metadata.dataset_split.forEach((split) => acc.add(split));
|
||
|
}
|
||
|
else if (typeof ex.metadata.dataset_split === "string") {
|
||
|
acc.add(ex.metadata.dataset_split);
|
||
|
}
|
||
|
}
|
||
|
return acc;
|
||
|
}, new Set());
|
||
|
return allSplits.size ? Array.from(allSplits) : undefined;
|
||
|
}
|
||
|
async _end() {
|
||
|
const experiment = this._experiment;
|
||
|
if (!experiment) {
|
||
|
throw new Error("Experiment not yet started.");
|
||
|
}
|
||
|
const projectMetadata = await this._getExperimentMetadata();
|
||
|
projectMetadata["dataset_version"] = await this._getDatasetVersion();
|
||
|
projectMetadata["dataset_splits"] = await this._getDatasetSplits();
|
||
|
// Update revision_id if not already set
|
||
|
if (!projectMetadata["revision_id"]) {
|
||
|
projectMetadata["revision_id"] = await (0, _git_js_1.getDefaultRevisionId)();
|
||
|
}
|
||
|
await this.client.updateProject(experiment.id, {
|
||
|
endTime: new Date().toISOString(),
|
||
|
metadata: projectMetadata,
|
||
|
});
|
||
|
}
|
||
|
}
|
||
|
exports._ExperimentManager = _ExperimentManager;
|
||
|
/**
|
||
|
* Represents the results of an evaluate() call.
|
||
|
* This class provides an iterator interface to iterate over the experiment results
|
||
|
* as they become available. It also provides methods to access the experiment name,
|
||
|
* the number of results, and to wait for the results to be processed.
|
||
|
*/
|
||
|
class ExperimentResults {
|
||
|
constructor(experimentManager) {
|
||
|
Object.defineProperty(this, "manager", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
Object.defineProperty(this, "results", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: []
|
||
|
});
|
||
|
Object.defineProperty(this, "processedCount", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: 0
|
||
|
});
|
||
|
Object.defineProperty(this, "summaryResults", {
|
||
|
enumerable: true,
|
||
|
configurable: true,
|
||
|
writable: true,
|
||
|
value: void 0
|
||
|
});
|
||
|
this.manager = experimentManager;
|
||
|
}
|
||
|
get experimentName() {
|
||
|
return this.manager.experimentName;
|
||
|
}
|
||
|
[Symbol.asyncIterator]() {
|
||
|
return this;
|
||
|
}
|
||
|
async next() {
|
||
|
if (this.processedCount < this.results.length) {
|
||
|
const result = this.results[this.processedCount];
|
||
|
this.processedCount++;
|
||
|
return Promise.resolve({ value: result, done: false });
|
||
|
}
|
||
|
else {
|
||
|
return Promise.resolve({ value: undefined, done: true });
|
||
|
}
|
||
|
}
|
||
|
async processData(manager) {
|
||
|
for await (const item of manager.getResults()) {
|
||
|
this.results.push(item);
|
||
|
this.processedCount++;
|
||
|
}
|
||
|
this.summaryResults = await manager.getSummaryScores();
|
||
|
}
|
||
|
get length() {
|
||
|
return this.results.length;
|
||
|
}
|
||
|
}
|
||
|
async function _evaluate(target, fields) {
|
||
|
const client = fields.client ?? new index_js_1.Client();
|
||
|
const runs = _isCallable(target) ? null : target;
|
||
|
const [experiment_, newRuns] = await _resolveExperiment(fields.experiment ?? null, runs, client);
|
||
|
let manager = await new _ExperimentManager({
|
||
|
data: Array.isArray(fields.data) ? undefined : fields.data,
|
||
|
examples: Array.isArray(fields.data) ? fields.data : undefined,
|
||
|
client,
|
||
|
metadata: fields.metadata,
|
||
|
experiment: experiment_ ?? fields.experimentPrefix,
|
||
|
runs: newRuns ?? undefined,
|
||
|
numRepetitions: fields.numRepetitions ?? 1,
|
||
|
}).start();
|
||
|
if (_isCallable(target)) {
|
||
|
manager = await manager.withPredictions(target, {
|
||
|
maxConcurrency: fields.maxConcurrency,
|
||
|
});
|
||
|
}
|
||
|
if (fields.evaluators) {
|
||
|
manager = await manager.withEvaluators(fields.evaluators, {
|
||
|
maxConcurrency: fields.maxConcurrency,
|
||
|
});
|
||
|
}
|
||
|
if (fields.summaryEvaluators) {
|
||
|
manager = await manager.withSummaryEvaluators(fields.summaryEvaluators);
|
||
|
}
|
||
|
// Start consuming the results.
|
||
|
const results = new ExperimentResults(manager);
|
||
|
await results.processData(manager);
|
||
|
return results;
|
||
|
}
|
||
|
async function _forward(fn, example, experimentName, metadata, client) {
|
||
|
let run = null;
|
||
|
const _getRun = (r) => {
|
||
|
run = r;
|
||
|
};
|
||
|
const options = {
|
||
|
reference_example_id: example.id,
|
||
|
on_end: _getRun,
|
||
|
project_name: experimentName,
|
||
|
metadata: {
|
||
|
...metadata,
|
||
|
example_version: example.modified_at
|
||
|
? new Date(example.modified_at).toISOString()
|
||
|
: new Date(example.created_at).toISOString(),
|
||
|
},
|
||
|
client,
|
||
|
tracingEnabled: true,
|
||
|
};
|
||
|
const wrappedFn = "invoke" in fn
|
||
|
? (0, traceable_js_1.traceable)(async (inputs) => {
|
||
|
let langChainCallbacks;
|
||
|
try {
|
||
|
// TODO: Deprecate this and rely on interop on 0.2 minor bump.
|
||
|
const { getLangchainCallbacks } = await import("../langchain.js");
|
||
|
langChainCallbacks = await getLangchainCallbacks();
|
||
|
}
|
||
|
catch {
|
||
|
// no-op
|
||
|
}
|
||
|
// Issue with retrieving LangChain callbacks, rely on interop
|
||
|
if (langChainCallbacks === undefined) {
|
||
|
return await fn.invoke(inputs);
|
||
|
}
|
||
|
else {
|
||
|
return await fn.invoke(inputs, { callbacks: langChainCallbacks });
|
||
|
}
|
||
|
}, options)
|
||
|
: (0, traceable_js_1.traceable)(fn, options);
|
||
|
try {
|
||
|
await wrappedFn(example.inputs);
|
||
|
}
|
||
|
catch (e) {
|
||
|
console.error(`Error running target function: ${e}`);
|
||
|
(0, error_js_1.printErrorStackTrace)(e);
|
||
|
}
|
||
|
if (!run) {
|
||
|
throw new Error(`Run not created by target function.
|
||
|
This is most likely due to tracing not being enabled.\n
|
||
|
Try setting "LANGSMITH_TRACING=true" in your environment.`);
|
||
|
}
|
||
|
return {
|
||
|
run,
|
||
|
example,
|
||
|
};
|
||
|
}
|
||
|
function _resolveData(data, options) {
|
||
|
let isUUID = false;
|
||
|
try {
|
||
|
if (typeof data === "string") {
|
||
|
(0, _uuid_js_1.assertUuid)(data);
|
||
|
isUUID = true;
|
||
|
}
|
||
|
}
|
||
|
catch (_) {
|
||
|
isUUID = false;
|
||
|
}
|
||
|
if (typeof data === "string" && isUUID) {
|
||
|
return options.client.listExamples({
|
||
|
datasetId: data,
|
||
|
});
|
||
|
}
|
||
|
if (typeof data === "string") {
|
||
|
return options.client.listExamples({
|
||
|
datasetName: data,
|
||
|
});
|
||
|
}
|
||
|
return data;
|
||
|
}
|
||
|
async function wrapSummaryEvaluators(evaluators, optionsArray) {
|
||
|
async function _wrap(evaluator) {
|
||
|
const evalName = evaluator.name || "BatchEvaluator";
|
||
|
const wrapperInner = (runs, examples) => {
|
||
|
const wrapperSuperInner = (0, traceable_js_1.traceable)((_runs_, _examples_) => {
|
||
|
return Promise.resolve(evaluator(runs, examples));
|
||
|
}, { ...optionsArray, name: evalName });
|
||
|
return Promise.resolve(wrapperSuperInner(`Runs[] (Length=${runs.length})`, `Examples[] (Length=${examples.length})`));
|
||
|
};
|
||
|
return wrapperInner;
|
||
|
}
|
||
|
const results = [];
|
||
|
for (let i = 0; i < evaluators.length; i++) {
|
||
|
results.push(await _wrap(evaluators[i]));
|
||
|
}
|
||
|
return results;
|
||
|
}
|
||
|
function _resolveEvaluators(evaluators) {
|
||
|
const results = [];
|
||
|
for (const evaluator of evaluators) {
|
||
|
if ("evaluateRun" in evaluator) {
|
||
|
results.push(evaluator);
|
||
|
// todo fix this by porting LangChainStringEvaluator to langsmith sdk
|
||
|
}
|
||
|
else if (evaluator.name === "LangChainStringEvaluator") {
|
||
|
throw new Error("Not yet implemented");
|
||
|
}
|
||
|
else {
|
||
|
results.push((0, evaluator_js_1.runEvaluator)(evaluator));
|
||
|
}
|
||
|
}
|
||
|
return results;
|
||
|
}
|
||
|
async function _resolveExperiment(experiment, runs, client) {
|
||
|
// TODO: Remove this, handle outside the manager
|
||
|
if (experiment !== null) {
|
||
|
if (!experiment.name) {
|
||
|
throw new Error("Experiment name must be defined if provided.");
|
||
|
}
|
||
|
return [experiment, undefined];
|
||
|
}
|
||
|
// If we have runs, that means the experiment was already started.
|
||
|
if (runs !== null) {
|
||
|
const results = [];
|
||
|
for await (const item of (0, atee_js_1.atee)(runs)) {
|
||
|
results.push(item);
|
||
|
}
|
||
|
const [runsClone, runsOriginal] = results;
|
||
|
const runsCloneIterator = runsClone[Symbol.asyncIterator]();
|
||
|
// todo: this is `any`. does it work properly?
|
||
|
const firstRun = await runsCloneIterator
|
||
|
.next()
|
||
|
.then((result) => result.value);
|
||
|
const retrievedExperiment = await client.readProject(firstRun.sessionId);
|
||
|
if (!retrievedExperiment.name) {
|
||
|
throw new Error("Experiment name not found for provided runs.");
|
||
|
}
|
||
|
return [retrievedExperiment, runsOriginal];
|
||
|
}
|
||
|
return [undefined, undefined];
|
||
|
}
|
||
|
function _isCallable(target) {
|
||
|
return Boolean(typeof target === "function" ||
|
||
|
("invoke" in target && typeof target.invoke === "function"));
|
||
|
}
|