273 lines
10 KiB
JavaScript
273 lines
10 KiB
JavaScript
import { KendraClient, QueryCommand, RetrieveCommand, } from "@aws-sdk/client-kendra";
|
|
import { BaseRetriever } from "@langchain/core/retrievers";
|
|
import { Document } from "@langchain/core/documents";
|
|
/**
|
|
* @deprecated The AmazonKendraRetriever integration has been moved to the `@langchain/aws` package. Import from `@langchain/aws` instead.
|
|
*
|
|
* Class for interacting with Amazon Kendra, an intelligent search service
|
|
* provided by AWS. Extends the BaseRetriever class.
|
|
* @example
|
|
* ```typescript
|
|
* const retriever = new AmazonKendraRetriever({
|
|
* topK: 10,
|
|
* indexId: "YOUR_INDEX_ID",
|
|
* region: "us-east-2",
|
|
* clientOptions: {
|
|
* credentials: {
|
|
* accessKeyId: "YOUR_ACCESS_KEY_ID",
|
|
* secretAccessKey: "YOUR_SECRET_ACCESS_KEY",
|
|
* },
|
|
* },
|
|
* });
|
|
*
|
|
* const docs = await retriever.getRelevantDocuments("How are clouds formed?");
|
|
* ```
|
|
*/
|
|
export class AmazonKendraRetriever extends BaseRetriever {
|
|
static lc_name() {
|
|
return "AmazonKendraRetriever";
|
|
}
|
|
constructor({ indexId, topK = 10, clientOptions, attributeFilter, region, }) {
|
|
super();
|
|
Object.defineProperty(this, "lc_namespace", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: ["langchain", "retrievers", "amazon_kendra"]
|
|
});
|
|
Object.defineProperty(this, "indexId", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "topK", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "kendraClient", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
Object.defineProperty(this, "attributeFilter", {
|
|
enumerable: true,
|
|
configurable: true,
|
|
writable: true,
|
|
value: void 0
|
|
});
|
|
if (!region) {
|
|
throw new Error("Please pass regionName field to the constructor!");
|
|
}
|
|
if (!indexId) {
|
|
throw new Error("Please pass Kendra Index Id to the constructor");
|
|
}
|
|
this.topK = topK;
|
|
this.kendraClient = new KendraClient({
|
|
region,
|
|
...clientOptions,
|
|
});
|
|
this.attributeFilter = attributeFilter;
|
|
this.indexId = indexId;
|
|
}
|
|
// A method to combine title and excerpt into a single string.
|
|
/**
|
|
* Combines title and excerpt into a single string.
|
|
* @param title The title of the document.
|
|
* @param excerpt An excerpt from the document.
|
|
* @returns A single string combining the title and excerpt.
|
|
*/
|
|
combineText(title, excerpt) {
|
|
let text = "";
|
|
if (title) {
|
|
text += `Document Title: ${title}\n`;
|
|
}
|
|
if (excerpt) {
|
|
text += `Document Excerpt: \n${excerpt}\n`;
|
|
}
|
|
return text;
|
|
}
|
|
// A method to clean the result text by replacing sequences of whitespace with a single space and removing ellipses.
|
|
/**
|
|
* Cleans the result text by replacing sequences of whitespace with a
|
|
* single space and removing ellipses.
|
|
* @param resText The result text to clean.
|
|
* @returns The cleaned result text.
|
|
*/
|
|
cleanResult(resText) {
|
|
const res = resText.replace(/\s+/g, " ").replace(/\.\.\./g, "");
|
|
return res;
|
|
}
|
|
// A method to extract the attribute value from a DocumentAttributeValue object.
|
|
/**
|
|
* Extracts the attribute value from a DocumentAttributeValue object.
|
|
* @param value The DocumentAttributeValue object to extract the value from.
|
|
* @returns The extracted attribute value.
|
|
*/
|
|
getDocAttributeValue(value) {
|
|
if (value.DateValue) {
|
|
return value.DateValue;
|
|
}
|
|
if (value.LongValue) {
|
|
return value.LongValue;
|
|
}
|
|
if (value.StringListValue) {
|
|
return value.StringListValue;
|
|
}
|
|
if (value.StringValue) {
|
|
return value.StringValue;
|
|
}
|
|
return "";
|
|
}
|
|
// A method to extract the attribute key-value pairs from an array of DocumentAttribute objects.
|
|
/**
|
|
* Extracts the attribute key-value pairs from an array of
|
|
* DocumentAttribute objects.
|
|
* @param documentAttributes The array of DocumentAttribute objects to extract the key-value pairs from.
|
|
* @returns An object containing the extracted attribute key-value pairs.
|
|
*/
|
|
getDocAttributes(documentAttributes) {
|
|
const attributes = {};
|
|
if (documentAttributes) {
|
|
for (const attr of documentAttributes) {
|
|
if (attr.Key && attr.Value) {
|
|
attributes[attr.Key] = this.getDocAttributeValue(attr.Value);
|
|
}
|
|
}
|
|
}
|
|
return attributes;
|
|
}
|
|
// A method to convert a RetrieveResultItem object into a Document object.
|
|
/**
|
|
* Converts a RetrieveResultItem object into a Document object.
|
|
* @param item The RetrieveResultItem object to convert.
|
|
* @returns A Document object.
|
|
*/
|
|
convertRetrieverItem(item) {
|
|
const title = item.DocumentTitle || "";
|
|
const excerpt = item.Content ? this.cleanResult(item.Content) : "";
|
|
const pageContent = this.combineText(title, excerpt);
|
|
const source = item.DocumentURI;
|
|
const attributes = this.getDocAttributes(item.DocumentAttributes);
|
|
const metadata = {
|
|
source,
|
|
title,
|
|
excerpt,
|
|
document_attributes: attributes,
|
|
};
|
|
return new Document({ pageContent, metadata });
|
|
}
|
|
// A method to extract the top-k documents from a RetrieveCommandOutput object.
|
|
/**
|
|
* Extracts the top-k documents from a RetrieveCommandOutput object.
|
|
* @param response The RetrieveCommandOutput object to extract the documents from.
|
|
* @param pageSize The number of documents to extract.
|
|
* @returns An array of Document objects.
|
|
*/
|
|
getRetrieverDocs(response, pageSize) {
|
|
if (!response.ResultItems)
|
|
return [];
|
|
const { length } = response.ResultItems;
|
|
const count = length < pageSize ? length : pageSize;
|
|
return response.ResultItems.slice(0, count).map((item) => this.convertRetrieverItem(item));
|
|
}
|
|
// A method to extract the excerpt text from a QueryResultItem object.
|
|
/**
|
|
* Extracts the excerpt text from a QueryResultItem object.
|
|
* @param item The QueryResultItem object to extract the excerpt text from.
|
|
* @returns The extracted excerpt text.
|
|
*/
|
|
getQueryItemExcerpt(item) {
|
|
if (item.AdditionalAttributes &&
|
|
item.AdditionalAttributes.length &&
|
|
item.AdditionalAttributes[0].Key === "AnswerText") {
|
|
if (!item.AdditionalAttributes) {
|
|
return "";
|
|
}
|
|
if (!item.AdditionalAttributes[0]) {
|
|
return "";
|
|
}
|
|
return this.cleanResult(item.AdditionalAttributes[0].Value?.TextWithHighlightsValue?.Text || "");
|
|
}
|
|
else if (item.DocumentExcerpt) {
|
|
return this.cleanResult(item.DocumentExcerpt.Text || "");
|
|
}
|
|
else {
|
|
return "";
|
|
}
|
|
}
|
|
// A method to convert a QueryResultItem object into a Document object.
|
|
/**
|
|
* Converts a QueryResultItem object into a Document object.
|
|
* @param item The QueryResultItem object to convert.
|
|
* @returns A Document object.
|
|
*/
|
|
convertQueryItem(item) {
|
|
const title = item.DocumentTitle?.Text || "";
|
|
const excerpt = this.getQueryItemExcerpt(item);
|
|
const pageContent = this.combineText(title, excerpt);
|
|
const source = item.DocumentURI;
|
|
const attributes = this.getDocAttributes(item.DocumentAttributes);
|
|
const metadata = {
|
|
source,
|
|
title,
|
|
excerpt,
|
|
document_attributes: attributes,
|
|
};
|
|
return new Document({ pageContent, metadata });
|
|
}
|
|
// A method to extract the top-k documents from a QueryCommandOutput object.
|
|
/**
|
|
* Extracts the top-k documents from a QueryCommandOutput object.
|
|
* @param response The QueryCommandOutput object to extract the documents from.
|
|
* @param pageSize The number of documents to extract.
|
|
* @returns An array of Document objects.
|
|
*/
|
|
getQueryDocs(response, pageSize) {
|
|
if (!response.ResultItems)
|
|
return [];
|
|
const { length } = response.ResultItems;
|
|
const count = length < pageSize ? length : pageSize;
|
|
return response.ResultItems.slice(0, count).map((item) => this.convertQueryItem(item));
|
|
}
|
|
// A method to send a retrieve or query request to Kendra and return the top-k documents.
|
|
/**
|
|
* Sends a retrieve or query request to Kendra and returns the top-k
|
|
* documents.
|
|
* @param query The query to send to Kendra.
|
|
* @param topK The number of top documents to return.
|
|
* @param attributeFilter Optional filter to apply when retrieving documents.
|
|
* @returns A Promise that resolves to an array of Document objects.
|
|
*/
|
|
async queryKendra(query, topK, attributeFilter) {
|
|
const retrieveCommand = new RetrieveCommand({
|
|
IndexId: this.indexId,
|
|
QueryText: query,
|
|
PageSize: topK,
|
|
AttributeFilter: attributeFilter,
|
|
});
|
|
const retrieveResponse = await this.kendraClient.send(retrieveCommand);
|
|
const retriveLength = retrieveResponse.ResultItems?.length;
|
|
if (retriveLength === 0) {
|
|
// Retrieve API returned 0 results, call query API
|
|
const queryCommand = new QueryCommand({
|
|
IndexId: this.indexId,
|
|
QueryText: query,
|
|
PageSize: topK,
|
|
AttributeFilter: attributeFilter,
|
|
});
|
|
const queryResponse = await this.kendraClient.send(queryCommand);
|
|
return this.getQueryDocs(queryResponse, this.topK);
|
|
}
|
|
else {
|
|
return this.getRetrieverDocs(retrieveResponse, this.topK);
|
|
}
|
|
}
|
|
async _getRelevantDocuments(query) {
|
|
const docs = await this.queryKendra(query, this.topK, this.attributeFilter);
|
|
return docs;
|
|
}
|
|
}
|