import { VectorStore } from "@langchain/core/vectorstores"; import { SearchRequest, VectorQuery, VectorSearch, } from "couchbase"; import { Document } from "@langchain/core/documents"; import { v4 as uuid } from "uuid"; /** * Class for interacting with the Couchbase database. It extends the * VectorStore class and provides methods for adding vectors and * documents, and searching for similar vectors. * Initiate the class using initialize() method. */ export class CouchbaseVectorStore extends VectorStore { /** * The private constructor used to provide embedding to parent class. * Initialize the class using static initialize() method * @param embedding - object to generate embedding * @param config - the fields required to initialize a vector store */ constructor(embedding, config) { super(embedding, config); Object.defineProperty(this, "metadataKey", { enumerable: true, configurable: true, writable: true, value: "metadata" }); Object.defineProperty(this, "defaultTextKey", { enumerable: true, configurable: true, writable: true, value: "text" }); Object.defineProperty(this, "defaultScopedIndex", { enumerable: true, configurable: true, writable: true, value: true }); Object.defineProperty(this, "defaultEmbeddingKey", { enumerable: true, configurable: true, writable: true, value: "embedding" }); Object.defineProperty(this, "cluster", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "_bucket", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "_scope", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "_collection", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "bucketName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "scopeName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "collectionName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "indexName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "textKey", { enumerable: true, configurable: true, writable: true, value: this.defaultTextKey }); Object.defineProperty(this, "embeddingKey", { enumerable: true, configurable: true, writable: true, value: this.defaultEmbeddingKey }); Object.defineProperty(this, "scopedIndex", { enumerable: true, configurable: true, writable: true, value: void 0 }); /** * Formats couchbase metadata by removing `metadata.` from initials * @param fields - all the fields of row * @returns - formatted metadata fields */ Object.defineProperty(this, "formatMetadata", { enumerable: true, configurable: true, writable: true, value: (fields) => { delete fields[this.textKey]; const metadataFields = {}; // eslint-disable-next-line guard-for-in for (const key in fields) { const newKey = key.replace(`${this.metadataKey}.`, ""); metadataFields[newKey] = fields[key]; } return metadataFields; } }); } /** * initialize class for interacting with the Couchbase database. * It extends the VectorStore class and provides methods * for adding vectors and documents, and searching for similar vectors. * This also verifies the params * * @param embeddings - object to generate embedding * @param config - the fields required to initialize a vector store */ static async initialize(embeddings, config) { const store = new CouchbaseVectorStore(embeddings, config); const { cluster, bucketName, scopeName, collectionName, indexName, textKey, embeddingKey, scopedIndex, } = config; store.cluster = cluster; store.bucketName = bucketName; store.scopeName = scopeName; store.collectionName = collectionName; store.indexName = indexName; if (textKey) { store.textKey = textKey; } else { store.textKey = store.defaultTextKey; } if (embeddingKey) { store.embeddingKey = embeddingKey; } else { store.embeddingKey = store.defaultEmbeddingKey; } if (scopedIndex !== undefined) { store.scopedIndex = scopedIndex; } else { store.scopedIndex = store.defaultScopedIndex; } try { store._bucket = store.cluster.bucket(store.bucketName); store._scope = store._bucket.scope(store.scopeName); store._collection = store._scope.collection(store.collectionName); } catch (err) { throw new Error("Error connecting to couchbase, Please check connection and credentials"); } try { if (!(await store.checkBucketExists()) || !(await store.checkIndexExists()) || !(await store.checkScopeAndCollectionExists())) { throw new Error("Error while initializing vector store"); } } catch (err) { throw new Error(`Error while initializing vector store: ${err}`); } return store; } /** * An asynchrononous method to verify the search indexes. * It retrieves all indexes and checks if specified index is present. * * @throws - If the specified index does not exist in the database. * * @returns - returns promise true if no error is found */ async checkIndexExists() { if (this.scopedIndex) { const allIndexes = await this._scope.searchIndexes().getAllIndexes(); const indexNames = allIndexes.map((index) => index.name); if (!indexNames.includes(this.indexName)) { throw new Error(`Index ${this.indexName} does not exist. Please create the index before searching.`); } } else { const allIndexes = await this.cluster.searchIndexes().getAllIndexes(); const indexNames = allIndexes.map((index) => index.name); if (!indexNames.includes(this.indexName)) { throw new Error(`Index ${this.indexName} does not exist. Please create the index before searching.`); } } return true; } /** * An asynchronous method to verify the existence of a bucket. * It retrieves the bucket using the bucket manager and checks if the specified bucket is present. * * @throws - If the specified bucket does not exist in the database. * * @returns - Returns a promise that resolves to true if no error is found, indicating the bucket exists. */ async checkBucketExists() { const bucketManager = this.cluster.buckets(); try { await bucketManager.getBucket(this.bucketName); return true; } catch (error) { throw new Error(`Bucket ${this.bucketName} does not exist. Please create the bucket before searching.`); } } /** * An asynchronous method to verify the existence of a scope and a collection within that scope. * It retrieves all scopes and collections in the bucket, and checks if the specified scope and collection are present. * * @throws - If the specified scope does not exist in the bucket, or if the specified collection does not exist in the scope. * * @returns - Returns a promise that resolves to true if no error is found, indicating the scope and collection exist. */ async checkScopeAndCollectionExists() { const scopeCollectionMap = {}; // Get a list of all scopes in the bucket const scopes = await this._bucket.collections().getAllScopes(); for (const scope of scopes) { scopeCollectionMap[scope.name] = []; // Get a list of all the collections in the scope for (const collection of scope.collections) { scopeCollectionMap[scope.name].push(collection.name); } } // Check if the scope exists if (!Object.keys(scopeCollectionMap).includes(this.scopeName)) { throw new Error(`Scope ${this.scopeName} not found in Couchbase bucket ${this.bucketName}`); } // Check if the collection exists in the scope if (!scopeCollectionMap[this.scopeName].includes(this.collectionName)) { throw new Error(`Collection ${this.collectionName} not found in scope ${this.scopeName} in Couchbase bucket ${this.bucketName}`); } return true; } _vectorstoreType() { return "couchbase"; } /** * Performs a similarity search on the vectors in the Couchbase database and returns the documents and their corresponding scores. * * @param queryEmbeddings - Embedding vector to look up documents similar to. * @param k - Number of documents to return. Defaults to 4. * @param filter - Optional search filter that are passed to Couchbase search. Defaults to empty object. * - `fields`: Optional list of fields to include in the * metadata of results. Note that these need to be stored in the index. * If nothing is specified, defaults to all the fields stored in the index. * - `searchOptions`: Optional search options that are passed to Couchbase search. Defaults to empty object. * * @returns - Promise of list of [document, score] that are the most similar to the query vector. * * @throws If the search operation fails. */ async similaritySearchVectorWithScore(queryEmbeddings, k = 4, filter = {}) { let { fields } = filter; const { searchOptions } = filter; if (!fields) { fields = ["*"]; } if (!(fields.length === 1 && fields[0] === "*") && !fields.includes(this.textKey)) { fields.push(this.textKey); } const searchRequest = new SearchRequest(VectorSearch.fromVectorQuery(new VectorQuery(this.embeddingKey, queryEmbeddings).numCandidates(k))); let searchIterator; const docsWithScore = []; try { if (this.scopedIndex) { searchIterator = this._scope.search(this.indexName, searchRequest, { limit: k, fields, raw: searchOptions, }); } else { searchIterator = this.cluster.search(this.indexName, searchRequest, { limit: k, fields, raw: searchOptions, }); } const searchRows = (await searchIterator).rows; for (const row of searchRows) { const text = row.fields[this.textKey]; const metadataFields = this.formatMetadata(row.fields); const searchScore = row.score; const doc = new Document({ pageContent: text, metadata: metadataFields, }); docsWithScore.push([doc, searchScore]); } } catch (err) { console.log("error received"); throw new Error(`Search failed with error: ${err}`); } return docsWithScore; } /** * Return documents that are most similar to the vector embedding. * * @param queryEmbeddings - Embedding to look up documents similar to. * @param k - The number of similar documents to return. Defaults to 4. * @param filter - Optional search filter that are passed to Couchbase search. Defaults to empty object. * - `fields`: Optional list of fields to include in the * metadata of results. Note that these need to be stored in the index. * If nothing is specified, defaults to all the fields stored in the index. * - `searchOptions`: Optional search options that are passed to Couchbase search. Defaults to empty object. * * @returns - A promise that resolves to an array of documents that match the similarity search. */ async similaritySearchByVector(queryEmbeddings, k = 4, filter = {}) { const docsWithScore = await this.similaritySearchVectorWithScore(queryEmbeddings, k, filter); const docs = []; for (const doc of docsWithScore) { docs.push(doc[0]); } return docs; } /** * Return documents that are most similar to the query. * * @param query - Query to look up for similar documents * @param k - The number of similar documents to return. Defaults to 4. * @param filter - Optional search filter that are passed to Couchbase search. Defaults to empty object. * - `fields`: Optional list of fields to include in the * metadata of results. Note that these need to be stored in the index. * If nothing is specified, defaults to all the fields stored in the index. * - `searchOptions`: Optional search options that are passed to Couchbase search. Defaults to empty object. * * @returns - Promise of list of documents that are most similar to the query. */ async similaritySearch(query, k = 4, filter = {}) { const queryEmbeddings = await this.embeddings.embedQuery(query); const docsWithScore = await this.similaritySearchVectorWithScore(queryEmbeddings, k, filter); const docs = []; for (const doc of docsWithScore) { docs.push(doc[0]); } return docs; } /** * Return documents that are most similar to the query with their scores. * * @param query - Query to look up for similar documents * @param k - The number of similar documents to return. Defaults to 4. * @param filter - Optional search filter that are passed to Couchbase search. Defaults to empty object. * - `fields`: Optional list of fields to include in the * metadata of results. Note that these need to be stored in the index. * If nothing is specified, defaults to all the fields stored in the index. * - `searchOptions`: Optional search options that are passed to Couchbase search. Defaults to empty object. * * @returns - Promise of list of documents that are most similar to the query. */ async similaritySearchWithScore(query, k = 4, filter = {}) { const queryEmbeddings = await this.embeddings.embedQuery(query); const docsWithScore = await this.similaritySearchVectorWithScore(queryEmbeddings, k, filter); return docsWithScore; } /** * upsert documents asynchronously into a couchbase collection * @param documentsToInsert Documents to be inserted into couchbase collection with embeddings, original text and metadata * @returns DocIds of the inserted documents */ async upsertDocuments(documentsToInsert) { // Create promises for each document to be upserted const upsertDocumentsPromises = documentsToInsert.map((document) => { const currentDocumentKey = Object.keys(document)[0]; return this._collection .upsert(currentDocumentKey, document[currentDocumentKey]) .then(() => currentDocumentKey) .catch((e) => { console.error("error received while upserting document", e); throw new Error(`Upsert failed with error: ${e}`); }); }); try { // Upsert all documents asynchronously const docIds = await Promise.all(upsertDocumentsPromises); const successfulDocIds = []; for (const id of docIds) { if (id) { successfulDocIds.push(id); } } return successfulDocIds; } catch (e) { console.error("An error occurred with Promise.all at upserting all documents", e); throw e; } } /** * Add vectors and corresponding documents to a couchbase collection * If the document IDs are passed, the existing documents (if any) will be * overwritten with the new ones. * @param vectors - The vectors to be added to the collection. * @param documents - The corresponding documents to be added to the collection. * @param options - Optional parameters for adding vectors. * This may include the IDs and metadata of the documents to be added. Defaults to an empty object. * * @returns - A promise that resolves to an array of document IDs that were added to the collection. */ async addVectors(vectors, documents, options = {}) { // Get document ids. if ids are not available then use UUIDs for each document let ids = options ? options.ids : undefined; if (ids === undefined) { ids = Array.from({ length: documents.length }, () => uuid()); } // Get metadata for each document. if metadata is not available, use empty object for each document let metadata = options ? options.metadata : undefined; if (metadata === undefined) { metadata = Array.from({ length: documents.length }, () => ({})); } const documentsToInsert = ids.map((id, index) => ({ [id]: { [this.textKey]: documents[index].pageContent, [this.embeddingKey]: vectors[index], [this.metadataKey]: metadata[index], }, })); let docIds = []; try { docIds = await this.upsertDocuments(documentsToInsert); } catch (err) { console.error("Error while adding vectors", err); throw err; } return docIds; } /** * Run texts through the embeddings and persist in vectorstore. * If the document IDs are passed, the existing documents (if any) will be * overwritten with the new ones. * @param documents - The corresponding documents to be added to the collection. * @param options - Optional parameters for adding documents. * This may include the IDs and metadata of the documents to be added. Defaults to an empty object. * * @returns - A promise that resolves to an array of document IDs that were added to the collection. */ async addDocuments(documents, options = {}) { const texts = documents.map(({ pageContent }) => pageContent); const metadatas = documents.map((doc) => doc.metadata); if (!options.metadata) { options.metadata = metadatas; } return this.addVectors(await this.embeddings.embedDocuments(texts), documents, options); } /** * Create a new CouchbaseVectorStore from a set of documents. * This function will initialize a new store, add the documents to it, and then return the store. * @param documents - The documents to be added to the new store. * @param embeddings - The embeddings to be used for the documents. * @param config - The configuration for the new CouchbaseVectorStore. This includes the options for adding vectors. * * @returns - A promise that resolves to the new CouchbaseVectorStore that contains the added documents. */ static async fromDocuments(documents, embeddings, config) { const store = await this.initialize(embeddings, config); await store.addDocuments(documents, config.addVectorOptions); return store; } /** * Create a new CouchbaseVectorStore from a set of texts. * This function will convert each text and its corresponding metadata into a Document, * initialize a new store, add the documents to it, and then return the store. * @param texts - The texts to be converted into Documents and added to the new store. * @param metadatas - The metadata for each text. If an array is passed, each text will have its corresponding metadata. * If not, all texts will have the same metadata. * @param embeddings - The embeddings to be used for the documents. * @param config - The configuration for the new CouchbaseVectorStore. This includes the options for adding vectors. * * @returns - A promise that resolves to the new CouchbaseVectorStore that contains the added documents. */ static async fromTexts(texts, metadatas, embeddings, config) { const docs = []; for (let i = 0; i < texts.length; i += 1) { const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; const newDoc = new Document({ pageContent: texts[i], metadata, }); docs.push(newDoc); } return await this.fromDocuments(docs, embeddings, config); } /** * Delete documents asynchronously from the collection. * This function will attempt to remove each document in the provided list of IDs from the collection. * If an error occurs during the deletion of a document, an error will be thrown with the ID of the document and the error message. * @param ids - An array of document IDs to be deleted from the collection. * * @returns - A promise that resolves when all documents have been attempted to be deleted. If a document could not be deleted, an error is thrown. */ async delete(ids) { const deleteDocumentsPromises = ids.map((id) => this._collection.remove(id).catch((err) => { throw new Error(`Error while deleting document - Document Id: ${id}, Error: ${err}`); })); try { await Promise.all(deleteDocumentsPromises); } catch (err) { throw new Error(`Error while deleting documents, Error: ${err}`); } } }