261 lines
11 KiB
JavaScript
261 lines
11 KiB
JavaScript
"use strict";
|
|
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
}
|
|
Object.defineProperty(o, k2, desc);
|
|
}) : (function(o, m, k, k2) {
|
|
if (k2 === undefined) k2 = k;
|
|
o[k2] = m[k];
|
|
}));
|
|
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
}) : function(o, v) {
|
|
o["default"] = v;
|
|
});
|
|
var __importStar = (this && this.__importStar) || function (mod) {
|
|
if (mod && mod.__esModule) return mod;
|
|
var result = {};
|
|
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
__setModuleDefault(result, mod);
|
|
return result;
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
exports.UpstashVectorStore = void 0;
|
|
const uuid = __importStar(require("uuid"));
|
|
const vectorstores_1 = require("@langchain/core/vectorstores");
|
|
const documents_1 = require("@langchain/core/documents");
|
|
const chunk_array_1 = require("@langchain/core/utils/chunk_array");
|
|
const testing_1 = require("@langchain/core/utils/testing");
|
|
const async_caller_1 = require("@langchain/core/utils/async_caller");
|
|
const CONCURRENT_UPSERT_LIMIT = 1000;
|
|
/**
|
|
* The main class that extends the 'VectorStore' class. It provides
|
|
* methods for interacting with Upstash index, such as adding documents,
|
|
* deleting documents, performing similarity search and more.
|
|
*/
|
|
class UpstashVectorStore extends vectorstores_1.VectorStore {
|
|
_vectorstoreType() {
|
|
return "upstash";
|
|
}
|
|
constructor(embeddings, args) {
|
|
super(embeddings, args);
|
|
Object.defineProperty(this, "index", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "caller", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "useUpstashEmbeddings", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "filter", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "namespace", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
// Special case where the embeddings instance is a FakeEmbeddings instance. In this case, we need to disable "instanceof" rule.
|
|
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
if (embeddings instanceof testing_1.FakeEmbeddings) {
|
|
this.useUpstashEmbeddings = true;
|
|
}
|
|
const { index, namespace, ...asyncCallerArgs } = args;
|
|
this.index = index;
|
|
this.caller = new async_caller_1.AsyncCaller(asyncCallerArgs);
|
|
this.filter = args.filter;
|
|
this.namespace = namespace;
|
|
}
|
|
/**
|
|
* This method adds documents to Upstash database. Documents are first converted to vectors
|
|
* using the provided embeddings instance, and then upserted to the database.
|
|
* @param documents Array of Document objects to be added to the database.
|
|
* @param options Optional object containing array of ids for the documents.
|
|
* @returns Promise that resolves with the ids of the provided documents when the upsert operation is done.
|
|
*/
|
|
async addDocuments(documents, options) {
|
|
const texts = documents.map(({ pageContent }) => pageContent);
|
|
if (this.useUpstashEmbeddings || options?.useUpstashEmbeddings) {
|
|
return this._addData(documents, options);
|
|
}
|
|
const embeddings = await this.embeddings.embedDocuments(texts);
|
|
return this.addVectors(embeddings, documents, options);
|
|
}
|
|
/**
|
|
* This method adds the provided vectors to Upstash database.
|
|
* @param vectors Array of vectors to be added to the Upstash database.
|
|
* @param documents Array of Document objects, each associated with a vector.
|
|
* @param options Optional object containing the array of ids foor the vectors.
|
|
* @returns Promise that resolves with the ids of the provided documents when the upsert operation is done.
|
|
*/
|
|
async addVectors(vectors, documents, options) {
|
|
const documentIds = options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v4());
|
|
const upstashVectors = vectors.map((vector, index) => {
|
|
const metadata = {
|
|
_pageContentLC: documents[index].pageContent,
|
|
...documents[index].metadata,
|
|
};
|
|
const id = documentIds[index];
|
|
return {
|
|
id,
|
|
vector,
|
|
metadata,
|
|
};
|
|
});
|
|
const namespace = this.index.namespace(this.namespace ?? "");
|
|
const vectorChunks = (0, chunk_array_1.chunkArray)(upstashVectors, CONCURRENT_UPSERT_LIMIT);
|
|
const batchRequests = vectorChunks.map((chunk) => this.caller.call(async () => namespace.upsert(chunk)));
|
|
await Promise.all(batchRequests);
|
|
return documentIds;
|
|
}
|
|
/**
|
|
* This method adds the provided documents to Upstash database. The pageContent of the documents will be embedded by Upstash Embeddings.
|
|
* @param documents Array of Document objects to be added to the Upstash database.
|
|
* @param options Optional object containing the array of ids for the documents.
|
|
* @returns Promise that resolves with the ids of the provided documents when the upsert operation is done.
|
|
*/
|
|
async _addData(documents, options) {
|
|
const documentIds = options?.ids ?? Array.from({ length: documents.length }, () => uuid.v4());
|
|
const upstashVectorsWithData = documents.map((document, index) => {
|
|
const metadata = {
|
|
_pageContentLC: documents[index].pageContent,
|
|
...documents[index].metadata,
|
|
};
|
|
const id = documentIds[index];
|
|
return {
|
|
id,
|
|
data: document.pageContent,
|
|
metadata,
|
|
};
|
|
});
|
|
const namespace = this.index.namespace(this.namespace ?? "");
|
|
const vectorChunks = (0, chunk_array_1.chunkArray)(upstashVectorsWithData, CONCURRENT_UPSERT_LIMIT);
|
|
const batchRequests = vectorChunks.map((chunk) => this.caller.call(async () => namespace.upsert(chunk)));
|
|
await Promise.all(batchRequests);
|
|
return documentIds;
|
|
}
|
|
/**
|
|
* This method deletes documents from the Upstash database. You can either
|
|
* provide the target ids, or delete all vectors in the database.
|
|
* @param params Object containing either array of ids of the documents or boolean deleteAll.
|
|
* @returns Promise that resolves when the specified documents have been deleted from the database.
|
|
*/
|
|
async delete(params) {
|
|
const namespace = this.index.namespace(this.namespace ?? "");
|
|
if (params.deleteAll) {
|
|
await namespace.reset();
|
|
}
|
|
else if (params.ids) {
|
|
await namespace.delete(params.ids);
|
|
}
|
|
}
|
|
async _runUpstashQuery(query, k, filter, options) {
|
|
let queryResult = [];
|
|
const namespace = this.index.namespace(this.namespace ?? "");
|
|
if (typeof query === "string") {
|
|
queryResult = await namespace.query({
|
|
data: query,
|
|
topK: k,
|
|
includeMetadata: true,
|
|
filter,
|
|
...options,
|
|
});
|
|
}
|
|
else {
|
|
queryResult = await namespace.query({
|
|
vector: query,
|
|
topK: k,
|
|
includeMetadata: true,
|
|
filter,
|
|
...options,
|
|
});
|
|
}
|
|
return queryResult;
|
|
}
|
|
/**
|
|
* This method performs a similarity search in the Upstash database
|
|
* over the existing vectors.
|
|
* @param query Query vector for the similarity search.
|
|
* @param k The number of similar vectors to return as result.
|
|
* @returns Promise that resolves with an array of tuples, each containing
|
|
* Document object and similarity score. The length of the result will be
|
|
* maximum of 'k' and vectors in the index.
|
|
*/
|
|
async similaritySearchVectorWithScore(query, k, filter) {
|
|
const results = await this._runUpstashQuery(query, k, filter);
|
|
const searchResult = results.map((res) => {
|
|
const { _pageContentLC, ...metadata } = (res.metadata ??
|
|
{});
|
|
return [
|
|
new documents_1.Document({
|
|
metadata,
|
|
pageContent: _pageContentLC,
|
|
}),
|
|
res.score,
|
|
];
|
|
});
|
|
return searchResult;
|
|
}
|
|
/**
|
|
* This method creates a new UpstashVector instance from an array of texts.
|
|
* The texts are initially converted to Document instances and added to Upstash
|
|
* database.
|
|
* @param texts The texts to create the documents from.
|
|
* @param metadatas The metadata values associated with the texts.
|
|
* @param embeddings Embedding interface of choice, to create the text embeddings.
|
|
* @param dbConfig Object containing the Upstash database configs.
|
|
* @returns Promise that resolves with a new UpstashVector instance.
|
|
*/
|
|
static async fromTexts(texts, metadatas, embeddings, dbConfig) {
|
|
const docs = [];
|
|
for (let i = 0; i < texts.length; i += 1) {
|
|
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
|
|
const newDocument = new documents_1.Document({
|
|
pageContent: texts[i],
|
|
metadata,
|
|
});
|
|
docs.push(newDocument);
|
|
}
|
|
return this.fromDocuments(docs, embeddings, dbConfig);
|
|
}
|
|
/**
|
|
* This method creates a new UpstashVector instance from an array of Document instances.
|
|
* @param docs The docs to be added to Upstash database.
|
|
* @param embeddings Embedding interface of choice, to create the embeddings.
|
|
* @param dbConfig Object containing the Upstash database configs.
|
|
* @returns Promise that resolves with a new UpstashVector instance
|
|
*/
|
|
static async fromDocuments(docs, embeddings, dbConfig) {
|
|
const instance = new this(embeddings, dbConfig);
|
|
await instance.addDocuments(docs);
|
|
return instance;
|
|
}
|
|
/**
|
|
* This method creates a new UpstashVector instance from an existing index.
|
|
* @param embeddings Embedding interface of the choice, to create the embeddings.
|
|
* @param dbConfig Object containing the Upstash database configs.
|
|
* @returns
|
|
*/
|
|
static async fromExistingIndex(embeddings, dbConfig) {
|
|
const instance = new this(embeddings, dbConfig);
|
|
return instance;
|
|
}
|
|
}
|
|
exports.UpstashVectorStore = UpstashVectorStore;
|