agsamantha/node_modules/langchain/dist/retrievers/matryoshka_retriever.d.ts

import { DocumentInterface } from "@langchain/core/documents";
import { Embeddings } from "@langchain/core/embeddings";
import { VectorStore, VectorStoreRetriever, VectorStoreRetrieverInput } from "@langchain/core/vectorstores";
/**
 * Type for options when adding a document to the VectorStore.
 */
type AddDocumentOptions = Record<string, any>;
export interface MatryoshkaRetrieverFields {
    /**
     * The number of documents to retrieve from the small store.
     * @default 50
     */
    smallK?: number;
    /**
     * The number of documents to retrieve from the large store.
     * @default 8
     */
    largeK?: number;
    /**
     * The metadata key to store the larger embeddings.
     * @default "lc_large_embedding"
     */
    largeEmbeddingKey?: string;
    /**
     * The embedding model to use when generating the large
     * embeddings.
     */
    largeEmbeddingModel: Embeddings;
    /**
     * The type of search to perform using the large embeddings.
     * @default "cosine"
     */
    searchType?: "cosine" | "innerProduct" | "euclidean";
}
/**
 * A retriever that uses two sets of embeddings to perform adaptive retrieval. Based
 * off of the "Matryoshka embeddings: faster OpenAI vector search using Adaptive Retrieval"
 * blog post {@link https://supabase.com/blog/matryoshka-embeddings}.
 *
 *
 * This class performs "Adaptive Retrieval" for searching text embeddings efficiently using the
 * Matryoshka Representation Learning (MRL) technique. It retrieves documents similar to a query
 * embedding in two steps:
 *
 * First-pass: Uses a lower dimensional sub-vector from the MRL embedding for an initial, fast,
 * but less accurate search.
 *
 * Second-pass: Re-ranks the top results from the first pass using the full, high-dimensional
 * embedding for higher accuracy.
 *
 *
 * This code implements MRL embeddings for efficient vector search by combining faster,
 * lower-dimensional initial search with accurate, high-dimensional re-ranking.
 */
export declare class MatryoshkaRetriever<Store extends VectorStore = VectorStore> extends VectorStoreRetriever<Store> {
    smallK: number;
    largeK: number;
    largeEmbeddingKey: string;
    largeEmbeddingModel: Embeddings;
    searchType: "cosine" | "innerProduct" | "euclidean";
    constructor(fields: MatryoshkaRetrieverFields & VectorStoreRetrieverInput<Store>);
    /**
     * Ranks documents based on their similarity to a query embedding using larger embeddings.
     *
     * This method takes a query embedding and a list of documents (smallResults) as input. Each document
     * in the smallResults array has previously been associated with a large embedding stored in its metadata.
     * Depending on the `searchType` (cosine, innerProduct, or euclidean), it calculates the similarity scores
     * between the query embedding and each document's large embedding. It then ranks the documents based on
     * these similarity scores, from the most similar to the least similar.
     *
     * The method returns a promise that resolves to an array of the top `largeK` documents, where `largeK`
     * is a class property defining the number of documents to return. This subset of documents is determined
     * by sorting the entire list of documents based on their similarity scores and then selecting the top
     * `largeK` documents.
     *
     * @param {number[]} embeddedQuery The embedding of the query, represented as an array of numbers.
     * @param {DocumentInterface[]} smallResults An array of documents, each with metadata that includes a large embedding for similarity comparison.
     * @returns {Promise<DocumentInterface[]>} A promise that resolves to an array of the top `largeK` ranked documents based on their similarity to the query embedding.
     */
    private _rankByLargeEmbeddings;
    _getRelevantDocuments(query: string): Promise<DocumentInterface[]>;
    /**
     * Override the default `addDocuments` method to embed the documents twice,
     * once using the larger embeddings model, and then again using the default
     * embedding model linked to the vector store.
     *
     * @param {DocumentInterface[]} documents - An array of documents to add to the vector store.
     * @param {AddDocumentOptions} options - An optional object containing additional options for adding documents.
     * @returns {Promise<string[] | void>} A promise that resolves to an array of the document IDs that were added to the vector store.
     */
    addDocuments: (documents: DocumentInterface[], options?: AddDocumentOptions) => Promise<string[] | void>;
}
export {};
update readme+reqs 2024-10-02 15:15:21 -05:00			`import { DocumentInterface } from "@langchain/core/documents";`
			`import { Embeddings } from "@langchain/core/embeddings";`
			`import { VectorStore, VectorStoreRetriever, VectorStoreRetrieverInput } from "@langchain/core/vectorstores";`
			`/**`
			`* Type for options when adding a document to the VectorStore.`
			`*/`
			`type AddDocumentOptions = Record<string, any>;`
			`export interface MatryoshkaRetrieverFields {`
			`/**`
			`* The number of documents to retrieve from the small store.`
			`* @default 50`
			`*/`
			`smallK?: number;`
			`/**`
			`* The number of documents to retrieve from the large store.`
			`* @default 8`
			`*/`
			`largeK?: number;`
			`/**`
			`* The metadata key to store the larger embeddings.`
			`* @default "lc_large_embedding"`
			`*/`
			`largeEmbeddingKey?: string;`
			`/**`
			`* The embedding model to use when generating the large`
			`* embeddings.`
			`*/`
			`largeEmbeddingModel: Embeddings;`
			`/**`
			`* The type of search to perform using the large embeddings.`
			`* @default "cosine"`
			`*/`
			`searchType?: "cosine" \| "innerProduct" \| "euclidean";`
			`}`
			`/**`
			`* A retriever that uses two sets of embeddings to perform adaptive retrieval. Based`
			`* off of the "Matryoshka embeddings: faster OpenAI vector search using Adaptive Retrieval"`
			`* blog post {@link https://supabase.com/blog/matryoshka-embeddings}.`
			`*`
			`*`
			`* This class performs "Adaptive Retrieval" for searching text embeddings efficiently using the`
			`* Matryoshka Representation Learning (MRL) technique. It retrieves documents similar to a query`
			`* embedding in two steps:`
			`*`
			`* First-pass: Uses a lower dimensional sub-vector from the MRL embedding for an initial, fast,`
			`* but less accurate search.`
			`*`
			`* Second-pass: Re-ranks the top results from the first pass using the full, high-dimensional`
			`* embedding for higher accuracy.`
			`*`
			`*`
			`* This code implements MRL embeddings for efficient vector search by combining faster,`
			`* lower-dimensional initial search with accurate, high-dimensional re-ranking.`
			`*/`
			`export declare class MatryoshkaRetriever<Store extends VectorStore = VectorStore> extends VectorStoreRetriever<Store> {`
			`smallK: number;`
			`largeK: number;`
			`largeEmbeddingKey: string;`
			`largeEmbeddingModel: Embeddings;`
			`searchType: "cosine" \| "innerProduct" \| "euclidean";`
			`constructor(fields: MatryoshkaRetrieverFields & VectorStoreRetrieverInput<Store>);`
			`/**`
			`* Ranks documents based on their similarity to a query embedding using larger embeddings.`
			`*`
			`* This method takes a query embedding and a list of documents (smallResults) as input. Each document`
			`* in the smallResults array has previously been associated with a large embedding stored in its metadata.`
			* Depending on the `searchType` (cosine, innerProduct, or euclidean), it calculates the similarity scores
			`* between the query embedding and each document's large embedding. It then ranks the documents based on`
			`* these similarity scores, from the most similar to the least similar.`
			`*`
			* The method returns a promise that resolves to an array of the top `largeK` documents, where `largeK`
			`* is a class property defining the number of documents to return. This subset of documents is determined`
			`* by sorting the entire list of documents based on their similarity scores and then selecting the top`
			* `largeK` documents.
			`*`
			`* @param {number[]} embeddedQuery The embedding of the query, represented as an array of numbers.`
			`* @param {DocumentInterface[]} smallResults An array of documents, each with metadata that includes a large embedding for similarity comparison.`
			* @returns {Promise<DocumentInterface[]>} A promise that resolves to an array of the top `largeK` ranked documents based on their similarity to the query embedding.
			`*/`
			`private _rankByLargeEmbeddings;`
			`_getRelevantDocuments(query: string): Promise<DocumentInterface[]>;`
			`/**`
			* Override the default `addDocuments` method to embed the documents twice,
			`* once using the larger embeddings model, and then again using the default`
			`* embedding model linked to the vector store.`
			`*`
			`* @param {DocumentInterface[]} documents - An array of documents to add to the vector store.`
			`* @param {AddDocumentOptions} options - An optional object containing additional options for adding documents.`
			`* @returns {Promise<string[] \| void>} A promise that resolves to an array of the document IDs that were added to the vector store.`
			`*/`
			`addDocuments: (documents: DocumentInterface[], options?: AddDocumentOptions) => Promise<string[] \| void>;`
			`}`
			`export {};`