agsamantha/node_modules/@langchain/community/dist/vectorstores/cassandra.js

/* eslint-disable prefer-template */
import { v4 as uuidv4 } from "uuid";
import { VectorStore, } from "@langchain/core/vectorstores";
import { Document } from "@langchain/core/documents";
import { maximalMarginalRelevance } from "@langchain/core/utils/math";
import { CassandraTable, } from "../utils/cassandra.js";
/**
 * Class for interacting with the Cassandra database. It extends the
 * VectorStore class and provides methods for adding vectors and
 * documents, searching for similar vectors, and creating instances from
 * texts or documents.
 */
export class CassandraStore extends VectorStore {
    _vectorstoreType() {
        return "cassandra";
    }
    _cleanArgs(args) {
        const { table, dimensions, primaryKey, nonKeyColumns, indices, metadataColumns, vectorType = "cosine", } = args;
        if (!table || !dimensions) {
            throw new Error("Missing required arguments");
        }
        // Utility function to ensure the argument is treated as an array
        function _toArray(value) {
            return Array.isArray(value) ? value : [value];
        }
        const indicesArg = indices || [];
        // Use the primary key if provided, else default to a single auto-generated UUID column
        let primaryKeyArg;
        if (primaryKey) {
            primaryKeyArg = _toArray(primaryKey);
        }
        else {
            primaryKeyArg = [
                { name: this.idColumnAutoName, type: "uuid", partition: true },
            ];
        }
        // The combined nonKeyColumns and metadataColumns, de-duped by name
        const combinedColumns = [
            ..._toArray(nonKeyColumns || []),
            ..._toArray(metadataColumns || []),
        ];
        const deduplicatedColumns = combinedColumns.filter((col, index, self) => self.findIndex((c) => c.name === col.name) === index);
        const nonKeyColumnsArg = [...deduplicatedColumns];
        // If no metadata columns are specified, add a default metadata column consistent with Langchain Python
        if (nonKeyColumnsArg.length === 0) {
            nonKeyColumnsArg.push({
                name: this.metadataColumnDefaultName,
                type: "map<text, text>",
            });
            indicesArg.push({
                name: `idx_${this.metadataColumnDefaultName}_${table}_keys`,
                value: `(keys(${this.metadataColumnDefaultName}))`,
            });
            indicesArg.push({
                name: `idx_${this.metadataColumnDefaultName}_${table}_entries`,
                value: `(entries(${this.metadataColumnDefaultName}))`,
            });
        }
        const addDefaultNonKeyColumnIfNeeded = (defaultColumn) => {
            const column = nonKeyColumnsArg.find((col) => col.name === defaultColumn.name);
            if (!column) {
                nonKeyColumnsArg.push(defaultColumn);
            }
        };
        addDefaultNonKeyColumnIfNeeded({ name: this.textColumnName, type: "text" });
        addDefaultNonKeyColumnIfNeeded({
            name: this.vectorColumnName,
            type: `VECTOR<FLOAT,${dimensions}>`,
            alias: this.embeddingColumnAlias,
        });
        // If no index is specified for the vector column, add a default index
        if (!indicesArg.some((index) => new RegExp(`\\(\\s*${this.vectorColumnName.toLowerCase()}\\s*\\)`).test(index.value.toLowerCase()))) {
            indicesArg.push({
                name: `idx_${this.vectorColumnName}_${table}`,
                value: `(${this.vectorColumnName})`,
                options: `{'similarity_function': '${vectorType.toLowerCase()}'}`,
            });
        }
        // Metadata the user will see excludes vector column and text column
        const metadataColumnsArg = [...primaryKeyArg, ...nonKeyColumnsArg].filter((column) => column.name !== this.vectorColumnName &&
            column.name !== this.textColumnName);
        return {
            ...args,
            vectorType,
            primaryKey: primaryKeyArg,
            nonKeyColumns: nonKeyColumnsArg,
            metadataColumns: metadataColumnsArg,
            indices: indicesArg,
        };
    }
    _getColumnByName(columns, columnName) {
        const columnsArray = Array.isArray(columns) ? columns : [columns];
        const column = columnsArray.find((col) => col.name === columnName);
        if (!column) {
            throw new Error(`Column ${columnName} not found`);
        }
        return column;
    }
    constructor(embeddings, args) {
        super(embeddings, args);
        Object.defineProperty(this, "table", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        Object.defineProperty(this, "idColumnAutoName", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: "id"
        });
        Object.defineProperty(this, "idColumnAutoGenerated", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        Object.defineProperty(this, "vectorColumnName", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: "vector"
        });
        Object.defineProperty(this, "vectorColumn", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        Object.defineProperty(this, "textColumnName", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: "text"
        });
        Object.defineProperty(this, "textColumn", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        Object.defineProperty(this, "metadataColumnDefaultName", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: "metadata"
        });
        Object.defineProperty(this, "metadataColumns", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        Object.defineProperty(this, "similarityColumn", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: void 0
        });
        Object.defineProperty(this, "embeddingColumnAlias", {
            enumerable: true,
            configurable: true,
            writable: true,
            value: "embedding"
        });
        const cleanedArgs = this._cleanArgs(args);
        // This check here to help the compiler understand that nonKeyColumns will always
        // have values after the _cleanArgs call. It is the cleanest way to handle the fact
        // that the compiler is not able to make this determination, no matter how hard we try!
        if (!cleanedArgs.nonKeyColumns || cleanedArgs.nonKeyColumns.length === 0) {
            throw new Error("No non-key columns provided");
        }
        this.vectorColumn = this._getColumnByName(cleanedArgs.nonKeyColumns, this.vectorColumnName);
        this.textColumn = this._getColumnByName(cleanedArgs.nonKeyColumns, this.textColumnName);
        this.similarityColumn = {
            name: `similarity_${cleanedArgs.vectorType}(${this.vectorColumnName},?)`,
            alias: "similarity_score",
            type: "",
        };
        this.idColumnAutoGenerated = !args.primaryKey;
        this.metadataColumns = cleanedArgs.metadataColumns;
        this.table = new CassandraTable(cleanedArgs);
    }
    /**
     * Method to save vectors to the Cassandra database.
     * @param vectors Vectors to save.
     * @param documents The documents associated with the vectors.
     * @returns Promise that resolves when the vectors have been added.
     */
    async addVectors(vectors, documents) {
        if (vectors.length === 0) {
            return;
        }
        // Prepare the values for upsert
        const values = vectors.map((vector, index) => {
            const document = documents[index];
            const docMetadata = document.metadata || {};
            // If idColumnAutoGenerated is true and ID is not provided, generate a UUID
            if (this.idColumnAutoGenerated &&
                (docMetadata[this.idColumnAutoName] === undefined ||
                    docMetadata[this.idColumnAutoName] === "")) {
                docMetadata[this.idColumnAutoName] = uuidv4();
            }
            // Construct the row
            const row = [];
            // Add values for each metadata column
            this.metadataColumns.forEach((col) => {
                row.push(docMetadata[col.name] || null);
            });
            // Add the text content and vector
            row.push(document.pageContent);
            row.push(new Float32Array(vector));
            return row;
        });
        const columns = [
            ...this.metadataColumns,
            { name: this.textColumnName, type: "" },
            { name: this.vectorColumnName, type: "" },
        ];
        return this.table.upsert(values, columns);
    }
    getCassandraTable() {
        return this.table;
    }
    /**
     * Method to add documents to the Cassandra database.
     * @param documents The documents to add.
     * @returns Promise that resolves when the documents have been added.
     */
    async addDocuments(documents) {
        return this.addVectors(await this.embeddings.embedDocuments(documents.map((d) => d.pageContent)), documents);
    }
    /**
     * Helper method to search for vectors that are similar to a given query vector.
     * @param query The query vector.
     * @param k The number of similar Documents to return.
     * @param filter Optional filter to be applied as a WHERE clause.
     * @param includeEmbedding Whether to include the embedding vectors in the results.
     * @returns Promise that resolves with an array of tuples, each containing a Document and a score.
     */
    async search(query, k, filter, includeEmbedding) {
        const vectorAsFloat32Array = new Float32Array(query);
        const similarityColumnWithBinds = {
            ...this.similarityColumn,
            binds: [vectorAsFloat32Array],
        };
        const queryCols = [
            ...this.metadataColumns,
            this.textColumn,
            similarityColumnWithBinds,
        ];
        if (includeEmbedding) {
            queryCols.push(this.vectorColumn);
        }
        const orderBy = {
            name: this.vectorColumnName,
            operator: "ANN OF",
            value: [vectorAsFloat32Array],
        };
        const queryResultSet = await this.table.select(queryCols, filter, [orderBy], k);
        return queryResultSet?.rows.map((row) => {
            const textContent = row[this.textColumnName];
            const sanitizedRow = { ...row };
            delete sanitizedRow[this.textColumnName];
            delete sanitizedRow.similarity_score;
            Object.keys(sanitizedRow).forEach((key) => {
                if (sanitizedRow[key] === null) {
                    delete sanitizedRow[key];
                }
            });
            return [
                new Document({ pageContent: textContent, metadata: sanitizedRow }),
                row.similarity_score,
            ];
        });
    }
    /**
     * Method to search for vectors that are similar to a given query vector.
     * @param query The query vector.
     * @param k The number of similar Documents to return.
     * @param filter Optional filter to be applied as a WHERE clause.
     * @returns Promise that resolves with an array of tuples, each containing a Document and a score.
     */
    async similaritySearchVectorWithScore(query, k, filter) {
        return this.search(query, k, filter, false);
    }
    /**
     * Method to search for vectors that are similar to a given query vector, but with
     * the results selected using the maximal marginal relevance.
     * @param query The query string.
     * @param options.k The number of similar Documents to return.
     * @param options.fetchK=4*k The number of records to fetch before passing to the MMR algorithm.
     * @param options.lambda=0.5 The degree of diversity among the results between 0 (maximum diversity) and 1 (minimum diversity).
     * @param options.filter Optional filter to be applied as a WHERE clause.
     * @returns List of documents selected by maximal marginal relevance.
     */
    async maxMarginalRelevanceSearch(query, options) {
        const { k, fetchK = 4 * k, lambda = 0.5, filter } = options;
        const queryEmbedding = await this.embeddings.embedQuery(query);
        const queryResults = await this.search(queryEmbedding, fetchK, filter, true);
        const embeddingList = queryResults.map((doc) => doc[0].metadata[this.embeddingColumnAlias]);
        const mmrIndexes = maximalMarginalRelevance(queryEmbedding, embeddingList, lambda, k);
        return mmrIndexes.map((idx) => {
            const doc = queryResults[idx][0];
            delete doc.metadata[this.embeddingColumnAlias];
            return doc;
        });
    }
    /**
     * Static method to create an instance of CassandraStore from texts.
     * @param texts The texts to use.
     * @param metadatas The metadata associated with the texts.
     * @param embeddings The embeddings to use.
     * @param args The arguments for the CassandraStore.
     * @returns Promise that resolves with a new instance of CassandraStore.
     */
    static async fromTexts(texts, metadatas, embeddings, args) {
        const docs = [];
        for (let index = 0; index < texts.length; index += 1) {
            const metadata = Array.isArray(metadatas) ? metadatas[index] : metadatas;
            const doc = new Document({
                pageContent: texts[index],
                metadata,
            });
            docs.push(doc);
        }
        return CassandraStore.fromDocuments(docs, embeddings, args);
    }
    /**
     * Static method to create an instance of CassandraStore from documents.
     * @param docs The documents to use.
     * @param embeddings The embeddings to use.
     * @param args The arguments for the CassandraStore.
     * @returns Promise that resolves with a new instance of CassandraStore.
     */
    static async fromDocuments(docs, embeddings, args) {
        const instance = new this(embeddings, args);
        await instance.addDocuments(docs);
        return instance;
    }
    /**
     * Static method to create an instance of CassandraStore from an existing
     * index.
     * @param embeddings The embeddings to use.
     * @param args The arguments for the CassandraStore.
     * @returns Promise that resolves with a new instance of CassandraStore.
     */
    static async fromExistingIndex(embeddings, args) {
        const instance = new this(embeddings, args);
        return instance;
    }
}