agsamantha/node_modules/@langchain/community/dist/vectorstores/couchbase.js
2024-10-02 15:15:21 -05:00

530 lines
23 KiB
JavaScript

import { VectorStore } from "@langchain/core/vectorstores";
import { SearchRequest, VectorQuery, VectorSearch, } from "couchbase";
import { Document } from "@langchain/core/documents";
import { v4 as uuid } from "uuid";
/**
* Class for interacting with the Couchbase database. It extends the
* VectorStore class and provides methods for adding vectors and
* documents, and searching for similar vectors.
* Initiate the class using initialize() method.
*/
export class CouchbaseVectorStore extends VectorStore {
/**
* The private constructor used to provide embedding to parent class.
* Initialize the class using static initialize() method
* @param embedding - object to generate embedding
* @param config - the fields required to initialize a vector store
*/
constructor(embedding, config) {
super(embedding, config);
Object.defineProperty(this, "metadataKey", {
enumerable: true,
configurable: true,
writable: true,
value: "metadata"
});
Object.defineProperty(this, "defaultTextKey", {
enumerable: true,
configurable: true,
writable: true,
value: "text"
});
Object.defineProperty(this, "defaultScopedIndex", {
enumerable: true,
configurable: true,
writable: true,
value: true
});
Object.defineProperty(this, "defaultEmbeddingKey", {
enumerable: true,
configurable: true,
writable: true,
value: "embedding"
});
Object.defineProperty(this, "cluster", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_bucket", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_scope", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "_collection", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "bucketName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "scopeName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "collectionName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "indexName", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "textKey", {
enumerable: true,
configurable: true,
writable: true,
value: this.defaultTextKey
});
Object.defineProperty(this, "embeddingKey", {
enumerable: true,
configurable: true,
writable: true,
value: this.defaultEmbeddingKey
});
Object.defineProperty(this, "scopedIndex", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
/**
* Formats couchbase metadata by removing `metadata.` from initials
* @param fields - all the fields of row
* @returns - formatted metadata fields
*/
Object.defineProperty(this, "formatMetadata", {
enumerable: true,
configurable: true,
writable: true,
value: (fields) => {
delete fields[this.textKey];
const metadataFields = {};
// eslint-disable-next-line guard-for-in
for (const key in fields) {
const newKey = key.replace(`${this.metadataKey}.`, "");
metadataFields[newKey] = fields[key];
}
return metadataFields;
}
});
}
/**
* initialize class for interacting with the Couchbase database.
* It extends the VectorStore class and provides methods
* for adding vectors and documents, and searching for similar vectors.
* This also verifies the params
*
* @param embeddings - object to generate embedding
* @param config - the fields required to initialize a vector store
*/
static async initialize(embeddings, config) {
const store = new CouchbaseVectorStore(embeddings, config);
const { cluster, bucketName, scopeName, collectionName, indexName, textKey, embeddingKey, scopedIndex, } = config;
store.cluster = cluster;
store.bucketName = bucketName;
store.scopeName = scopeName;
store.collectionName = collectionName;
store.indexName = indexName;
if (textKey) {
store.textKey = textKey;
}
else {
store.textKey = store.defaultTextKey;
}
if (embeddingKey) {
store.embeddingKey = embeddingKey;
}
else {
store.embeddingKey = store.defaultEmbeddingKey;
}
if (scopedIndex !== undefined) {
store.scopedIndex = scopedIndex;
}
else {
store.scopedIndex = store.defaultScopedIndex;
}
try {
store._bucket = store.cluster.bucket(store.bucketName);
store._scope = store._bucket.scope(store.scopeName);
store._collection = store._scope.collection(store.collectionName);
}
catch (err) {
throw new Error("Error connecting to couchbase, Please check connection and credentials");
}
try {
if (!(await store.checkBucketExists()) ||
!(await store.checkIndexExists()) ||
!(await store.checkScopeAndCollectionExists())) {
throw new Error("Error while initializing vector store");
}
}
catch (err) {
throw new Error(`Error while initializing vector store: ${err}`);
}
return store;
}
/**
* An asynchrononous method to verify the search indexes.
* It retrieves all indexes and checks if specified index is present.
*
* @throws - If the specified index does not exist in the database.
*
* @returns - returns promise true if no error is found
*/
async checkIndexExists() {
if (this.scopedIndex) {
const allIndexes = await this._scope.searchIndexes().getAllIndexes();
const indexNames = allIndexes.map((index) => index.name);
if (!indexNames.includes(this.indexName)) {
throw new Error(`Index ${this.indexName} does not exist. Please create the index before searching.`);
}
}
else {
const allIndexes = await this.cluster.searchIndexes().getAllIndexes();
const indexNames = allIndexes.map((index) => index.name);
if (!indexNames.includes(this.indexName)) {
throw new Error(`Index ${this.indexName} does not exist. Please create the index before searching.`);
}
}
return true;
}
/**
* An asynchronous method to verify the existence of a bucket.
* It retrieves the bucket using the bucket manager and checks if the specified bucket is present.
*
* @throws - If the specified bucket does not exist in the database.
*
* @returns - Returns a promise that resolves to true if no error is found, indicating the bucket exists.
*/
async checkBucketExists() {
const bucketManager = this.cluster.buckets();
try {
await bucketManager.getBucket(this.bucketName);
return true;
}
catch (error) {
throw new Error(`Bucket ${this.bucketName} does not exist. Please create the bucket before searching.`);
}
}
/**
* An asynchronous method to verify the existence of a scope and a collection within that scope.
* It retrieves all scopes and collections in the bucket, and checks if the specified scope and collection are present.
*
* @throws - If the specified scope does not exist in the bucket, or if the specified collection does not exist in the scope.
*
* @returns - Returns a promise that resolves to true if no error is found, indicating the scope and collection exist.
*/
async checkScopeAndCollectionExists() {
const scopeCollectionMap = {};
// Get a list of all scopes in the bucket
const scopes = await this._bucket.collections().getAllScopes();
for (const scope of scopes) {
scopeCollectionMap[scope.name] = [];
// Get a list of all the collections in the scope
for (const collection of scope.collections) {
scopeCollectionMap[scope.name].push(collection.name);
}
}
// Check if the scope exists
if (!Object.keys(scopeCollectionMap).includes(this.scopeName)) {
throw new Error(`Scope ${this.scopeName} not found in Couchbase bucket ${this.bucketName}`);
}
// Check if the collection exists in the scope
if (!scopeCollectionMap[this.scopeName].includes(this.collectionName)) {
throw new Error(`Collection ${this.collectionName} not found in scope ${this.scopeName} in Couchbase bucket ${this.bucketName}`);
}
return true;
}
_vectorstoreType() {
return "couchbase";
}
/**
* Performs a similarity search on the vectors in the Couchbase database and returns the documents and their corresponding scores.
*
* @param queryEmbeddings - Embedding vector to look up documents similar to.
* @param k - Number of documents to return. Defaults to 4.
* @param filter - Optional search filter that are passed to Couchbase search. Defaults to empty object.
* - `fields`: Optional list of fields to include in the
* metadata of results. Note that these need to be stored in the index.
* If nothing is specified, defaults to all the fields stored in the index.
* - `searchOptions`: Optional search options that are passed to Couchbase search. Defaults to empty object.
*
* @returns - Promise of list of [document, score] that are the most similar to the query vector.
*
* @throws If the search operation fails.
*/
async similaritySearchVectorWithScore(queryEmbeddings, k = 4, filter = {}) {
let { fields } = filter;
const { searchOptions } = filter;
if (!fields) {
fields = ["*"];
}
if (!(fields.length === 1 && fields[0] === "*") &&
!fields.includes(this.textKey)) {
fields.push(this.textKey);
}
const searchRequest = new SearchRequest(VectorSearch.fromVectorQuery(new VectorQuery(this.embeddingKey, queryEmbeddings).numCandidates(k)));
let searchIterator;
const docsWithScore = [];
try {
if (this.scopedIndex) {
searchIterator = this._scope.search(this.indexName, searchRequest, {
limit: k,
fields,
raw: searchOptions,
});
}
else {
searchIterator = this.cluster.search(this.indexName, searchRequest, {
limit: k,
fields,
raw: searchOptions,
});
}
const searchRows = (await searchIterator).rows;
for (const row of searchRows) {
const text = row.fields[this.textKey];
const metadataFields = this.formatMetadata(row.fields);
const searchScore = row.score;
const doc = new Document({
pageContent: text,
metadata: metadataFields,
});
docsWithScore.push([doc, searchScore]);
}
}
catch (err) {
console.log("error received");
throw new Error(`Search failed with error: ${err}`);
}
return docsWithScore;
}
/**
* Return documents that are most similar to the vector embedding.
*
* @param queryEmbeddings - Embedding to look up documents similar to.
* @param k - The number of similar documents to return. Defaults to 4.
* @param filter - Optional search filter that are passed to Couchbase search. Defaults to empty object.
* - `fields`: Optional list of fields to include in the
* metadata of results. Note that these need to be stored in the index.
* If nothing is specified, defaults to all the fields stored in the index.
* - `searchOptions`: Optional search options that are passed to Couchbase search. Defaults to empty object.
*
* @returns - A promise that resolves to an array of documents that match the similarity search.
*/
async similaritySearchByVector(queryEmbeddings, k = 4, filter = {}) {
const docsWithScore = await this.similaritySearchVectorWithScore(queryEmbeddings, k, filter);
const docs = [];
for (const doc of docsWithScore) {
docs.push(doc[0]);
}
return docs;
}
/**
* Return documents that are most similar to the query.
*
* @param query - Query to look up for similar documents
* @param k - The number of similar documents to return. Defaults to 4.
* @param filter - Optional search filter that are passed to Couchbase search. Defaults to empty object.
* - `fields`: Optional list of fields to include in the
* metadata of results. Note that these need to be stored in the index.
* If nothing is specified, defaults to all the fields stored in the index.
* - `searchOptions`: Optional search options that are passed to Couchbase search. Defaults to empty object.
*
* @returns - Promise of list of documents that are most similar to the query.
*/
async similaritySearch(query, k = 4, filter = {}) {
const queryEmbeddings = await this.embeddings.embedQuery(query);
const docsWithScore = await this.similaritySearchVectorWithScore(queryEmbeddings, k, filter);
const docs = [];
for (const doc of docsWithScore) {
docs.push(doc[0]);
}
return docs;
}
/**
* Return documents that are most similar to the query with their scores.
*
* @param query - Query to look up for similar documents
* @param k - The number of similar documents to return. Defaults to 4.
* @param filter - Optional search filter that are passed to Couchbase search. Defaults to empty object.
* - `fields`: Optional list of fields to include in the
* metadata of results. Note that these need to be stored in the index.
* If nothing is specified, defaults to all the fields stored in the index.
* - `searchOptions`: Optional search options that are passed to Couchbase search. Defaults to empty object.
*
* @returns - Promise of list of documents that are most similar to the query.
*/
async similaritySearchWithScore(query, k = 4, filter = {}) {
const queryEmbeddings = await this.embeddings.embedQuery(query);
const docsWithScore = await this.similaritySearchVectorWithScore(queryEmbeddings, k, filter);
return docsWithScore;
}
/**
* upsert documents asynchronously into a couchbase collection
* @param documentsToInsert Documents to be inserted into couchbase collection with embeddings, original text and metadata
* @returns DocIds of the inserted documents
*/
async upsertDocuments(documentsToInsert) {
// Create promises for each document to be upserted
const upsertDocumentsPromises = documentsToInsert.map((document) => {
const currentDocumentKey = Object.keys(document)[0];
return this._collection
.upsert(currentDocumentKey, document[currentDocumentKey])
.then(() => currentDocumentKey)
.catch((e) => {
console.error("error received while upserting document", e);
throw new Error(`Upsert failed with error: ${e}`);
});
});
try {
// Upsert all documents asynchronously
const docIds = await Promise.all(upsertDocumentsPromises);
const successfulDocIds = [];
for (const id of docIds) {
if (id) {
successfulDocIds.push(id);
}
}
return successfulDocIds;
}
catch (e) {
console.error("An error occurred with Promise.all at upserting all documents", e);
throw e;
}
}
/**
* Add vectors and corresponding documents to a couchbase collection
* If the document IDs are passed, the existing documents (if any) will be
* overwritten with the new ones.
* @param vectors - The vectors to be added to the collection.
* @param documents - The corresponding documents to be added to the collection.
* @param options - Optional parameters for adding vectors.
* This may include the IDs and metadata of the documents to be added. Defaults to an empty object.
*
* @returns - A promise that resolves to an array of document IDs that were added to the collection.
*/
async addVectors(vectors, documents, options = {}) {
// Get document ids. if ids are not available then use UUIDs for each document
let ids = options ? options.ids : undefined;
if (ids === undefined) {
ids = Array.from({ length: documents.length }, () => uuid());
}
// Get metadata for each document. if metadata is not available, use empty object for each document
let metadata = options ? options.metadata : undefined;
if (metadata === undefined) {
metadata = Array.from({ length: documents.length }, () => ({}));
}
const documentsToInsert = ids.map((id, index) => ({
[id]: {
[this.textKey]: documents[index].pageContent,
[this.embeddingKey]: vectors[index],
[this.metadataKey]: metadata[index],
},
}));
let docIds = [];
try {
docIds = await this.upsertDocuments(documentsToInsert);
}
catch (err) {
console.error("Error while adding vectors", err);
throw err;
}
return docIds;
}
/**
* Run texts through the embeddings and persist in vectorstore.
* If the document IDs are passed, the existing documents (if any) will be
* overwritten with the new ones.
* @param documents - The corresponding documents to be added to the collection.
* @param options - Optional parameters for adding documents.
* This may include the IDs and metadata of the documents to be added. Defaults to an empty object.
*
* @returns - A promise that resolves to an array of document IDs that were added to the collection.
*/
async addDocuments(documents, options = {}) {
const texts = documents.map(({ pageContent }) => pageContent);
const metadatas = documents.map((doc) => doc.metadata);
if (!options.metadata) {
options.metadata = metadatas;
}
return this.addVectors(await this.embeddings.embedDocuments(texts), documents, options);
}
/**
* Create a new CouchbaseVectorStore from a set of documents.
* This function will initialize a new store, add the documents to it, and then return the store.
* @param documents - The documents to be added to the new store.
* @param embeddings - The embeddings to be used for the documents.
* @param config - The configuration for the new CouchbaseVectorStore. This includes the options for adding vectors.
*
* @returns - A promise that resolves to the new CouchbaseVectorStore that contains the added documents.
*/
static async fromDocuments(documents, embeddings, config) {
const store = await this.initialize(embeddings, config);
await store.addDocuments(documents, config.addVectorOptions);
return store;
}
/**
* Create a new CouchbaseVectorStore from a set of texts.
* This function will convert each text and its corresponding metadata into a Document,
* initialize a new store, add the documents to it, and then return the store.
* @param texts - The texts to be converted into Documents and added to the new store.
* @param metadatas - The metadata for each text. If an array is passed, each text will have its corresponding metadata.
* If not, all texts will have the same metadata.
* @param embeddings - The embeddings to be used for the documents.
* @param config - The configuration for the new CouchbaseVectorStore. This includes the options for adding vectors.
*
* @returns - A promise that resolves to the new CouchbaseVectorStore that contains the added documents.
*/
static async fromTexts(texts, metadatas, embeddings, config) {
const docs = [];
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas;
const newDoc = new Document({
pageContent: texts[i],
metadata,
});
docs.push(newDoc);
}
return await this.fromDocuments(docs, embeddings, config);
}
/**
* Delete documents asynchronously from the collection.
* This function will attempt to remove each document in the provided list of IDs from the collection.
* If an error occurs during the deletion of a document, an error will be thrown with the ID of the document and the error message.
* @param ids - An array of document IDs to be deleted from the collection.
*
* @returns - A promise that resolves when all documents have been attempted to be deleted. If a document could not be deleted, an error is thrown.
*/
async delete(ids) {
const deleteDocumentsPromises = ids.map((id) => this._collection.remove(id).catch((err) => {
throw new Error(`Error while deleting document - Document Id: ${id}, Error: ${err}`);
}));
try {
await Promise.all(deleteDocumentsPromises);
}
catch (err) {
throw new Error(`Error while deleting documents, Error: ${err}`);
}
}
}