agsamantha/node_modules/@langchain/community/dist/retrievers/amazon_kendra.js

274 lines
10 KiB
JavaScript
Raw Normal View History

2024-10-02 20:15:21 +00:00
import { KendraClient, QueryCommand, RetrieveCommand, } from "@aws-sdk/client-kendra";
import { BaseRetriever } from "@langchain/core/retrievers";
import { Document } from "@langchain/core/documents";
/**
* @deprecated The AmazonKendraRetriever integration has been moved to the `@langchain/aws` package. Import from `@langchain/aws` instead.
*
* Class for interacting with Amazon Kendra, an intelligent search service
* provided by AWS. Extends the BaseRetriever class.
* @example
* ```typescript
* const retriever = new AmazonKendraRetriever({
* topK: 10,
* indexId: "YOUR_INDEX_ID",
* region: "us-east-2",
* clientOptions: {
* credentials: {
* accessKeyId: "YOUR_ACCESS_KEY_ID",
* secretAccessKey: "YOUR_SECRET_ACCESS_KEY",
* },
* },
* });
*
* const docs = await retriever.getRelevantDocuments("How are clouds formed?");
* ```
*/
export class AmazonKendraRetriever extends BaseRetriever {
static lc_name() {
return "AmazonKendraRetriever";
}
constructor({ indexId, topK = 10, clientOptions, attributeFilter, region, }) {
super();
Object.defineProperty(this, "lc_namespace", {
enumerable: true,
configurable: true,
writable: true,
value: ["langchain", "retrievers", "amazon_kendra"]
});
Object.defineProperty(this, "indexId", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "topK", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "kendraClient", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "attributeFilter", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
if (!region) {
throw new Error("Please pass regionName field to the constructor!");
}
if (!indexId) {
throw new Error("Please pass Kendra Index Id to the constructor");
}
this.topK = topK;
this.kendraClient = new KendraClient({
region,
...clientOptions,
});
this.attributeFilter = attributeFilter;
this.indexId = indexId;
}
// A method to combine title and excerpt into a single string.
/**
* Combines title and excerpt into a single string.
* @param title The title of the document.
* @param excerpt An excerpt from the document.
* @returns A single string combining the title and excerpt.
*/
combineText(title, excerpt) {
let text = "";
if (title) {
text += `Document Title: ${title}\n`;
}
if (excerpt) {
text += `Document Excerpt: \n${excerpt}\n`;
}
return text;
}
// A method to clean the result text by replacing sequences of whitespace with a single space and removing ellipses.
/**
* Cleans the result text by replacing sequences of whitespace with a
* single space and removing ellipses.
* @param resText The result text to clean.
* @returns The cleaned result text.
*/
cleanResult(resText) {
const res = resText.replace(/\s+/g, " ").replace(/\.\.\./g, "");
return res;
}
// A method to extract the attribute value from a DocumentAttributeValue object.
/**
* Extracts the attribute value from a DocumentAttributeValue object.
* @param value The DocumentAttributeValue object to extract the value from.
* @returns The extracted attribute value.
*/
getDocAttributeValue(value) {
if (value.DateValue) {
return value.DateValue;
}
if (value.LongValue) {
return value.LongValue;
}
if (value.StringListValue) {
return value.StringListValue;
}
if (value.StringValue) {
return value.StringValue;
}
return "";
}
// A method to extract the attribute key-value pairs from an array of DocumentAttribute objects.
/**
* Extracts the attribute key-value pairs from an array of
* DocumentAttribute objects.
* @param documentAttributes The array of DocumentAttribute objects to extract the key-value pairs from.
* @returns An object containing the extracted attribute key-value pairs.
*/
getDocAttributes(documentAttributes) {
const attributes = {};
if (documentAttributes) {
for (const attr of documentAttributes) {
if (attr.Key && attr.Value) {
attributes[attr.Key] = this.getDocAttributeValue(attr.Value);
}
}
}
return attributes;
}
// A method to convert a RetrieveResultItem object into a Document object.
/**
* Converts a RetrieveResultItem object into a Document object.
* @param item The RetrieveResultItem object to convert.
* @returns A Document object.
*/
convertRetrieverItem(item) {
const title = item.DocumentTitle || "";
const excerpt = item.Content ? this.cleanResult(item.Content) : "";
const pageContent = this.combineText(title, excerpt);
const source = item.DocumentURI;
const attributes = this.getDocAttributes(item.DocumentAttributes);
const metadata = {
source,
title,
excerpt,
document_attributes: attributes,
};
return new Document({ pageContent, metadata });
}
// A method to extract the top-k documents from a RetrieveCommandOutput object.
/**
* Extracts the top-k documents from a RetrieveCommandOutput object.
* @param response The RetrieveCommandOutput object to extract the documents from.
* @param pageSize The number of documents to extract.
* @returns An array of Document objects.
*/
getRetrieverDocs(response, pageSize) {
if (!response.ResultItems)
return [];
const { length } = response.ResultItems;
const count = length < pageSize ? length : pageSize;
return response.ResultItems.slice(0, count).map((item) => this.convertRetrieverItem(item));
}
// A method to extract the excerpt text from a QueryResultItem object.
/**
* Extracts the excerpt text from a QueryResultItem object.
* @param item The QueryResultItem object to extract the excerpt text from.
* @returns The extracted excerpt text.
*/
getQueryItemExcerpt(item) {
if (item.AdditionalAttributes &&
item.AdditionalAttributes.length &&
item.AdditionalAttributes[0].Key === "AnswerText") {
if (!item.AdditionalAttributes) {
return "";
}
if (!item.AdditionalAttributes[0]) {
return "";
}
return this.cleanResult(item.AdditionalAttributes[0].Value?.TextWithHighlightsValue?.Text || "");
}
else if (item.DocumentExcerpt) {
return this.cleanResult(item.DocumentExcerpt.Text || "");
}
else {
return "";
}
}
// A method to convert a QueryResultItem object into a Document object.
/**
* Converts a QueryResultItem object into a Document object.
* @param item The QueryResultItem object to convert.
* @returns A Document object.
*/
convertQueryItem(item) {
const title = item.DocumentTitle?.Text || "";
const excerpt = this.getQueryItemExcerpt(item);
const pageContent = this.combineText(title, excerpt);
const source = item.DocumentURI;
const attributes = this.getDocAttributes(item.DocumentAttributes);
const metadata = {
source,
title,
excerpt,
document_attributes: attributes,
};
return new Document({ pageContent, metadata });
}
// A method to extract the top-k documents from a QueryCommandOutput object.
/**
* Extracts the top-k documents from a QueryCommandOutput object.
* @param response The QueryCommandOutput object to extract the documents from.
* @param pageSize The number of documents to extract.
* @returns An array of Document objects.
*/
getQueryDocs(response, pageSize) {
if (!response.ResultItems)
return [];
const { length } = response.ResultItems;
const count = length < pageSize ? length : pageSize;
return response.ResultItems.slice(0, count).map((item) => this.convertQueryItem(item));
}
// A method to send a retrieve or query request to Kendra and return the top-k documents.
/**
* Sends a retrieve or query request to Kendra and returns the top-k
* documents.
* @param query The query to send to Kendra.
* @param topK The number of top documents to return.
* @param attributeFilter Optional filter to apply when retrieving documents.
* @returns A Promise that resolves to an array of Document objects.
*/
async queryKendra(query, topK, attributeFilter) {
const retrieveCommand = new RetrieveCommand({
IndexId: this.indexId,
QueryText: query,
PageSize: topK,
AttributeFilter: attributeFilter,
});
const retrieveResponse = await this.kendraClient.send(retrieveCommand);
const retriveLength = retrieveResponse.ResultItems?.length;
if (retriveLength === 0) {
// Retrieve API returned 0 results, call query API
const queryCommand = new QueryCommand({
IndexId: this.indexId,
QueryText: query,
PageSize: topK,
AttributeFilter: attributeFilter,
});
const queryResponse = await this.kendraClient.send(queryCommand);
return this.getQueryDocs(queryResponse, this.topK);
}
else {
return this.getRetrieverDocs(retrieveResponse, this.topK);
}
}
async _getRelevantDocuments(query) {
const docs = await this.queryKendra(query, this.topK, this.attributeFilter);
return docs;
}
}