agsamantha/node_modules/langchain/dist/retrievers/matryoshka_retriever.d.ts

94 lines
4.5 KiB
TypeScript
Raw Normal View History

2024-10-02 15:15:21 -05:00
import { DocumentInterface } from "@langchain/core/documents";
import { Embeddings } from "@langchain/core/embeddings";
import { VectorStore, VectorStoreRetriever, VectorStoreRetrieverInput } from "@langchain/core/vectorstores";
/**
* Type for options when adding a document to the VectorStore.
*/
type AddDocumentOptions = Record<string, any>;
export interface MatryoshkaRetrieverFields {
/**
* The number of documents to retrieve from the small store.
* @default 50
*/
smallK?: number;
/**
* The number of documents to retrieve from the large store.
* @default 8
*/
largeK?: number;
/**
* The metadata key to store the larger embeddings.
* @default "lc_large_embedding"
*/
largeEmbeddingKey?: string;
/**
* The embedding model to use when generating the large
* embeddings.
*/
largeEmbeddingModel: Embeddings;
/**
* The type of search to perform using the large embeddings.
* @default "cosine"
*/
searchType?: "cosine" | "innerProduct" | "euclidean";
}
/**
* A retriever that uses two sets of embeddings to perform adaptive retrieval. Based
* off of the "Matryoshka embeddings: faster OpenAI vector search using Adaptive Retrieval"
* blog post {@link https://supabase.com/blog/matryoshka-embeddings}.
*
*
* This class performs "Adaptive Retrieval" for searching text embeddings efficiently using the
* Matryoshka Representation Learning (MRL) technique. It retrieves documents similar to a query
* embedding in two steps:
*
* First-pass: Uses a lower dimensional sub-vector from the MRL embedding for an initial, fast,
* but less accurate search.
*
* Second-pass: Re-ranks the top results from the first pass using the full, high-dimensional
* embedding for higher accuracy.
*
*
* This code implements MRL embeddings for efficient vector search by combining faster,
* lower-dimensional initial search with accurate, high-dimensional re-ranking.
*/
export declare class MatryoshkaRetriever<Store extends VectorStore = VectorStore> extends VectorStoreRetriever<Store> {
smallK: number;
largeK: number;
largeEmbeddingKey: string;
largeEmbeddingModel: Embeddings;
searchType: "cosine" | "innerProduct" | "euclidean";
constructor(fields: MatryoshkaRetrieverFields & VectorStoreRetrieverInput<Store>);
/**
* Ranks documents based on their similarity to a query embedding using larger embeddings.
*
* This method takes a query embedding and a list of documents (smallResults) as input. Each document
* in the smallResults array has previously been associated with a large embedding stored in its metadata.
* Depending on the `searchType` (cosine, innerProduct, or euclidean), it calculates the similarity scores
* between the query embedding and each document's large embedding. It then ranks the documents based on
* these similarity scores, from the most similar to the least similar.
*
* The method returns a promise that resolves to an array of the top `largeK` documents, where `largeK`
* is a class property defining the number of documents to return. This subset of documents is determined
* by sorting the entire list of documents based on their similarity scores and then selecting the top
* `largeK` documents.
*
* @param {number[]} embeddedQuery The embedding of the query, represented as an array of numbers.
* @param {DocumentInterface[]} smallResults An array of documents, each with metadata that includes a large embedding for similarity comparison.
* @returns {Promise<DocumentInterface[]>} A promise that resolves to an array of the top `largeK` ranked documents based on their similarity to the query embedding.
*/
private _rankByLargeEmbeddings;
_getRelevantDocuments(query: string): Promise<DocumentInterface[]>;
/**
* Override the default `addDocuments` method to embed the documents twice,
* once using the larger embeddings model, and then again using the default
* embedding model linked to the vector store.
*
* @param {DocumentInterface[]} documents - An array of documents to add to the vector store.
* @param {AddDocumentOptions} options - An optional object containing additional options for adding documents.
* @returns {Promise<string[] | void>} A promise that resolves to an array of the document IDs that were added to the vector store.
*/
addDocuments: (documents: DocumentInterface[], options?: AddDocumentOptions) => Promise<string[] | void>;
}
export {};