agsamantha/node_modules/@langchain/community/dist/document_loaders/web/puppeteer.d.ts

81 lines
3.2 KiB
TypeScript
Raw Permalink Normal View History

2024-10-02 15:15:21 -05:00
import type { launch, WaitForOptions, Page, Browser, PuppeteerLaunchOptions } from "puppeteer";
import { Document } from "@langchain/core/documents";
import { BaseDocumentLoader } from "@langchain/core/document_loaders/base";
import type { DocumentLoader } from "@langchain/core/document_loaders/base";
export { Page, Browser };
export type PuppeteerGotoOptions = WaitForOptions & {
referer?: string;
referrerPolicy?: string;
};
/**
* Type representing a function for evaluating JavaScript code on a web
* page using Puppeteer. It takes a Page and Browser object as parameters
* and returns a Promise that resolves to a string.
*/
export type PuppeteerEvaluate = (page: Page, browser: Browser) => Promise<string>;
export type PuppeteerWebBaseLoaderOptions = {
launchOptions?: PuppeteerLaunchOptions;
gotoOptions?: PuppeteerGotoOptions;
evaluate?: PuppeteerEvaluate;
};
/**
* Class that extends the BaseDocumentLoader class and implements the
* DocumentLoader interface. It represents a document loader for scraping
* web pages using Puppeteer.
* @example
* ```typescript
* const loader = new PuppeteerWebBaseLoader("https:exampleurl.com", {
* launchOptions: {
* headless: true,
* },
* gotoOptions: {
* waitUntil: "domcontentloaded",
* },
* });
* const screenshot = await loader.screenshot();
* ```
*/
export declare class PuppeteerWebBaseLoader extends BaseDocumentLoader implements DocumentLoader {
webPath: string;
options: PuppeteerWebBaseLoaderOptions | undefined;
constructor(webPath: string, options?: PuppeteerWebBaseLoaderOptions);
static _scrape(url: string, options?: PuppeteerWebBaseLoaderOptions): Promise<string>;
/**
* Method that calls the _scrape method to perform the scraping of the web
* page specified by the webPath property.
* @returns Promise that resolves to the scraped HTML content of the web page.
*/
scrape(): Promise<string>;
/**
* Method that calls the scrape method and returns the scraped HTML
* content as a Document object.
* @returns Promise that resolves to an array of Document objects.
*/
load(): Promise<Document[]>;
/**
* Static class method used to screenshot a web page and return
* it as a {@link Document} object where the pageContent property
* is the screenshot encoded in base64.
*
* @param {string} url
* @param {PuppeteerWebBaseLoaderOptions} options
* @returns {Document} A document object containing the screenshot of the page encoded in base64.
*/
static _screenshot(url: string, options?: PuppeteerWebBaseLoaderOptions): Promise<Document>;
/**
* Screenshot a web page and return it as a {@link Document} object where
* the pageContent property is the screenshot encoded in base64.
*
* @returns {Promise<Document>} A document object containing the screenshot of the page encoded in base64.
*/
screenshot(): Promise<Document>;
/**
* Static method that imports the necessary Puppeteer modules. It returns
* a Promise that resolves to an object containing the imported modules.
* @returns Promise that resolves to an object containing the imported Puppeteer modules.
*/
static imports(): Promise<{
launch: typeof launch;
}>;
}