agsamantha/node_modules/@langchain/community/dist/document_loaders/fs/chatgpt.js

82 lines
2.6 KiB
JavaScript
Raw Normal View History

2024-10-02 20:15:21 +00:00
import { Document } from "@langchain/core/documents";
import { TextLoader } from "langchain/document_loaders/fs/text";
function concatenateRows(message, title) {
/**
* Combine message information in a readable format ready to be used.
* @param {ChatGPTMessage} message - Message to be concatenated
* @param {string} title - Title of the conversation
*
* @returns {string} Concatenated message
*/
if (!message) {
return "";
}
const sender = message.author ? message.author.role : "unknown";
const text = message.content.parts[0];
const date = new Date(message.create_time * 1000)
.toISOString()
.slice(0, 19)
.replace("T", " ");
return `${title} - ${sender} on ${date}: ${text}\n\n`;
}
export class ChatGPTLoader extends TextLoader {
constructor(filePathOrBlob, numLogs = 0) {
super(filePathOrBlob);
Object.defineProperty(this, "numLogs", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
this.numLogs = numLogs;
}
async parse(raw) {
let data;
try {
data = JSON.parse(raw);
}
catch (e) {
console.error(e);
throw new Error("Failed to parse JSON");
}
const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data;
return truncatedData.map((d) => Object.values(d.mapping)
.filter((msg, idx) => !(idx === 0 && msg.message.author.role === "system"))
.map((msg) => concatenateRows(msg.message, d.title))
.join(""));
}
async load() {
let text;
let metadata;
if (typeof this.filePathOrBlob === "string") {
const { readFile } = await TextLoader.imports();
try {
text = await readFile(this.filePathOrBlob, "utf8");
}
catch (e) {
console.error(e);
throw new Error("Failed to read file");
}
metadata = { source: this.filePathOrBlob };
}
else {
try {
text = await this.filePathOrBlob.text();
}
catch (e) {
console.error(e);
throw new Error("Failed to read blob");
}
metadata = { source: "blob", blobType: this.filePathOrBlob.type };
}
const parsed = await this.parse(text);
return parsed.map((pageContent, i) => new Document({
pageContent,
metadata: {
...metadata,
logIndex: i + 1,
},
}));
}
}