agsamantha/node_modules/@langchain/community/dist/document_loaders/web/playwright.d.ts
2024-10-02 15:15:21 -05:00

54 lines
2.4 KiB
TypeScript

import type { LaunchOptions, Page, Browser, Response } from "playwright";
import { Document } from "@langchain/core/documents";
import { BaseDocumentLoader } from "@langchain/core/document_loaders/base";
import type { DocumentLoader } from "@langchain/core/document_loaders/base";
export { Page, Browser, Response };
export type PlaywrightGotoOptions = {
referer?: string;
timeout?: number;
waitUntil?: "load" | "domcontentloaded" | "networkidle" | "commit";
};
/**
* Type representing a function for evaluating JavaScript code on a web
* page using Playwright. Takes a Page, Browser, and Response object as
* parameters and returns a Promise that resolves to a string.
*/
export type PlaywrightEvaluate = (page: Page, browser: Browser, response: Response | null) => Promise<string>;
export type PlaywrightWebBaseLoaderOptions = {
launchOptions?: LaunchOptions;
gotoOptions?: PlaywrightGotoOptions;
evaluate?: PlaywrightEvaluate;
};
/**
* Class representing a document loader for scraping web pages using
* Playwright. Extends the BaseDocumentLoader class and implements the
* DocumentLoader interface.
*/
export declare class PlaywrightWebBaseLoader extends BaseDocumentLoader implements DocumentLoader {
webPath: string;
options: PlaywrightWebBaseLoaderOptions | undefined;
constructor(webPath: string, options?: PlaywrightWebBaseLoaderOptions);
static _scrape(url: string, options?: PlaywrightWebBaseLoaderOptions): Promise<string>;
/**
* Method that calls the _scrape method to perform the scraping of the web
* page specified by the webPath property. Returns a Promise that resolves
* to the scraped HTML content of the web page.
* @returns Promise that resolves to the scraped HTML content of the web page.
*/
scrape(): Promise<string>;
/**
* Method that calls the scrape method and returns the scraped HTML
* content as a Document object. Returns a Promise that resolves to an
* array of Document objects.
* @returns Promise that resolves to an array of Document objects.
*/
load(): Promise<Document[]>;
/**
* Static method that imports the necessary Playwright modules. Returns a
* Promise that resolves to an object containing the imported modules.
* @returns Promise that resolves to an object containing the imported modules.
*/
static imports(): Promise<{
chromium: typeof import("playwright").chromium;
}>;
}