import * as uuid from "uuid"; import { VectorStore } from "@langchain/core/vectorstores"; import { Document } from "@langchain/core/documents"; /** * Chroma vector store integration. * * Setup: * Install `@langchain/community` and `chromadb`. * * ```bash * npm install @langchain/community chromadb * ``` * * ## [Constructor args](https://api.js.langchain.com/classes/langchain_community_vectorstores_chroma.Chroma.html#constructor) * *
* Instantiate * * ```typescript * import { Chroma } from '@langchain/community/vectorstores/chroma'; * // Or other embeddings * import { OpenAIEmbeddings } from '@langchain/openai'; * * const embeddings = new OpenAIEmbeddings({ * model: "text-embedding-3-small", * }) * * const vectorStore = new Chroma( * embeddings, * { * collectionName: "foo", * url: "http://localhost:8000", // URL of the Chroma server * } * ); * ``` *
* *
* *
* Add documents * * ```typescript * import type { Document } from '@langchain/core/documents'; * * const document1 = { pageContent: "foo", metadata: { baz: "bar" } }; * const document2 = { pageContent: "thud", metadata: { bar: "baz" } }; * const document3 = { pageContent: "i will be deleted :(", metadata: {} }; * * const documents: Document[] = [document1, document2, document3]; * const ids = ["1", "2", "3"]; * await vectorStore.addDocuments(documents, { ids }); * ``` *
* *
* *
* Delete documents * * ```typescript * await vectorStore.delete({ ids: ["3"] }); * ``` *
* *
* *
* Similarity search * * ```typescript * const results = await vectorStore.similaritySearch("thud", 1); * for (const doc of results) { * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`); * } * // Output: * thud [{"baz":"bar"}] * ``` *
* *
* * *
* Similarity search with filter * * ```typescript * const resultsWithFilter = await vectorStore.similaritySearch("thud", 1, { baz: "bar" }); * * for (const doc of resultsWithFilter) { * console.log(`* ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`); * } * // Output: * foo [{"baz":"bar"}] * ``` *
* *
* * *
* Similarity search with score * * ```typescript * const resultsWithScore = await vectorStore.similaritySearchWithScore("qux", 1); * for (const [doc, score] of resultsWithScore) { * console.log(`* [SIM=${score.toFixed(6)}] ${doc.pageContent} [${JSON.stringify(doc.metadata, null)}]`); * } * // Output: * [SIM=0.000000] qux [{"bar":"baz","baz":"bar"}] * ``` *
* *
* *
* As a retriever * * ```typescript * const retriever = vectorStore.asRetriever({ * searchType: "mmr", // Leave blank for standard similarity search * k: 1, * }); * const resultAsRetriever = await retriever.invoke("thud"); * console.log(resultAsRetriever); * * // Output: [Document({ metadata: { "baz":"bar" }, pageContent: "thud" })] * ``` *
* *
*/ export class Chroma extends VectorStore { _vectorstoreType() { return "chroma"; } constructor(embeddings, args) { super(embeddings, args); Object.defineProperty(this, "index", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "collection", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "collectionName", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "collectionMetadata", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "numDimensions", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "clientParams", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "url", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "filter", { enumerable: true, configurable: true, writable: true, value: void 0 }); this.numDimensions = args.numDimensions; this.embeddings = embeddings; this.collectionName = ensureCollectionName(args.collectionName); this.collectionMetadata = args.collectionMetadata; this.clientParams = args.clientParams; if ("index" in args) { this.index = args.index; } else if ("url" in args) { this.url = args.url || "http://localhost:8000"; } this.filter = args.filter; } /** * Adds documents to the Chroma database. The documents are first * converted to vectors using the `embeddings` instance, and then added to * the database. * @param documents An array of `Document` instances to be added to the database. * @param options Optional. An object containing an array of `ids` for the documents. * @returns A promise that resolves when the documents have been added to the database. */ async addDocuments(documents, options) { const texts = documents.map(({ pageContent }) => pageContent); return this.addVectors(await this.embeddings.embedDocuments(texts), documents, options); } /** * Ensures that a collection exists in the Chroma database. If the * collection does not exist, it is created. * @returns A promise that resolves with the `Collection` instance. */ async ensureCollection() { if (!this.collection) { if (!this.index) { const chromaClient = new (await Chroma.imports()).ChromaClient({ path: this.url, ...(this.clientParams ?? {}), }); this.index = chromaClient; } try { this.collection = await this.index.getOrCreateCollection({ name: this.collectionName, ...(this.collectionMetadata && { metadata: this.collectionMetadata }), }); } catch (err) { throw new Error(`Chroma getOrCreateCollection error: ${err}`); } } return this.collection; } /** * Adds vectors to the Chroma database. The vectors are associated with * the provided documents. * @param vectors An array of vectors to be added to the database. * @param documents An array of `Document` instances associated with the vectors. * @param options Optional. An object containing an array of `ids` for the vectors. * @returns A promise that resolves with an array of document IDs when the vectors have been added to the database. */ async addVectors(vectors, documents, options) { if (vectors.length === 0) { return []; } if (this.numDimensions === undefined) { this.numDimensions = vectors[0].length; } if (vectors.length !== documents.length) { throw new Error(`Vectors and metadatas must have the same length`); } if (vectors[0].length !== this.numDimensions) { throw new Error(`Vectors must have the same length as the number of dimensions (${this.numDimensions})`); } const documentIds = options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v1()); const collection = await this.ensureCollection(); const mappedMetadatas = documents.map(({ metadata }) => { let locFrom; let locTo; if (metadata?.loc) { if (metadata.loc.lines?.from !== undefined) locFrom = metadata.loc.lines.from; if (metadata.loc.lines?.to !== undefined) locTo = metadata.loc.lines.to; } const newMetadata = { ...metadata, ...(locFrom !== undefined && { locFrom }), ...(locTo !== undefined && { locTo }), }; if (newMetadata.loc) delete newMetadata.loc; return newMetadata; }); await collection.upsert({ ids: documentIds, embeddings: vectors, metadatas: mappedMetadatas, documents: documents.map(({ pageContent }) => pageContent), }); return documentIds; } /** * Deletes documents from the Chroma database. The documents to be deleted * can be specified by providing an array of `ids` or a `filter` object. * @param params An object containing either an array of `ids` of the documents to be deleted or a `filter` object to specify the documents to be deleted. * @returns A promise that resolves when the specified documents have been deleted from the database. */ async delete(params) { const collection = await this.ensureCollection(); if (Array.isArray(params.ids)) { await collection.delete({ ids: params.ids }); } else if (params.filter) { await collection.delete({ where: { ...params.filter }, }); } else { throw new Error(`You must provide one of "ids or "filter".`); } } /** * Searches for vectors in the Chroma database that are similar to the * provided query vector. The search can be filtered using the provided * `filter` object or the `filter` property of the `Chroma` instance. * @param query The query vector. * @param k The number of similar vectors to return. * @param filter Optional. A `filter` object to filter the search results. * @returns A promise that resolves with an array of tuples, each containing a `Document` instance and a similarity score. */ async similaritySearchVectorWithScore(query, k, filter) { if (filter && this.filter) { throw new Error("cannot provide both `filter` and `this.filter`"); } const _filter = filter ?? this.filter; const collection = await this.ensureCollection(); // similaritySearchVectorWithScore supports one query vector at a time // chroma supports multiple query vectors at a time const result = await collection.query({ queryEmbeddings: query, nResults: k, where: { ..._filter }, }); const { ids, distances, documents, metadatas } = result; if (!ids || !distances || !documents || !metadatas) { return []; } // get the result data from the first and only query vector const [firstIds] = ids; const [firstDistances] = distances; const [firstDocuments] = documents; const [firstMetadatas] = metadatas; const results = []; for (let i = 0; i < firstIds.length; i += 1) { let metadata = firstMetadatas?.[i] ?? {}; if (metadata.locFrom && metadata.locTo) { metadata = { ...metadata, loc: { lines: { from: metadata.locFrom, to: metadata.locTo, }, }, }; delete metadata.locFrom; delete metadata.locTo; } results.push([ new Document({ pageContent: firstDocuments?.[i] ?? "", metadata, }), firstDistances[i], ]); } return results; } /** * Creates a new `Chroma` instance from an array of text strings. The text * strings are converted to `Document` instances and added to the Chroma * database. * @param texts An array of text strings. * @param metadatas An array of metadata objects or a single metadata object. If an array is provided, it must have the same length as the `texts` array. * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. * @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database. * @returns A promise that resolves with a new `Chroma` instance. */ static async fromTexts(texts, metadatas, embeddings, dbConfig) { const docs = []; for (let i = 0; i < texts.length; i += 1) { const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas; const newDoc = new Document({ pageContent: texts[i], metadata, }); docs.push(newDoc); } return this.fromDocuments(docs, embeddings, dbConfig); } /** * Creates a new `Chroma` instance from an array of `Document` instances. * The documents are added to the Chroma database. * @param docs An array of `Document` instances. * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. * @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database. * @returns A promise that resolves with a new `Chroma` instance. */ static async fromDocuments(docs, embeddings, dbConfig) { const instance = new this(embeddings, dbConfig); await instance.addDocuments(docs); return instance; } /** * Creates a new `Chroma` instance from an existing collection in the * Chroma database. * @param embeddings An `Embeddings` instance used to generate embeddings for the documents. * @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database. * @returns A promise that resolves with a new `Chroma` instance. */ static async fromExistingCollection(embeddings, dbConfig) { const instance = new this(embeddings, dbConfig); await instance.ensureCollection(); return instance; } /** @ignore */ static async imports() { try { const { ChromaClient } = await import("chromadb"); return { ChromaClient }; } catch (e) { throw new Error("Please install chromadb as a dependency with, e.g. `npm install -S chromadb`"); } } } /** * Generates a unique collection name if none is provided. */ function ensureCollectionName(collectionName) { if (!collectionName) { return `langchain-${uuid.v4()}`; } return collectionName; }