41 lines
1.8 KiB
TypeScript
41 lines
1.8 KiB
TypeScript
|
import { Document } from "@langchain/core/documents";
|
||
|
import { CheerioWebBaseLoader } from "./cheerio.js";
|
||
|
/**
|
||
|
* A class that extends the CheerioWebBaseLoader class. It represents a
|
||
|
* loader for loading web pages from the Hacker News website.
|
||
|
*/
|
||
|
export declare class HNLoader extends CheerioWebBaseLoader {
|
||
|
webPath: string;
|
||
|
constructor(webPath: string);
|
||
|
/**
|
||
|
* An asynchronous method that loads the web page. If the webPath includes
|
||
|
* "item", it calls the loadComments() method to load the comments from
|
||
|
* the web page. Otherwise, it calls the loadResults() method to load the
|
||
|
* results from the web page.
|
||
|
* @returns A Promise that resolves to an array of Document instances.
|
||
|
*/
|
||
|
load(): Promise<Document[]>;
|
||
|
/**
|
||
|
* A private method that loads the comments from the web page. It selects
|
||
|
* the elements with the class "athing comtr" using the $ function
|
||
|
* provided by Cheerio. It also extracts the title of the web page from
|
||
|
* the element with the id "pagespace". It creates Document instances for
|
||
|
* each comment, with the comment text as the page content and the source
|
||
|
* and title as metadata.
|
||
|
* @param $ A CheerioAPI instance.
|
||
|
* @returns An array of Document instances.
|
||
|
*/
|
||
|
private loadComments;
|
||
|
/**
|
||
|
* A private method that loads the results from the web page. It selects
|
||
|
* the elements with the class "athing" using the $ function provided by
|
||
|
* Cheerio. It extracts the ranking, link, title, and other metadata from
|
||
|
* each result item. It creates Document instances for each result item,
|
||
|
* with the title as the page content and the source, title, link, and
|
||
|
* ranking as metadata.
|
||
|
* @param $ A CheerioAPI instance.
|
||
|
* @returns An array of Document instances.
|
||
|
*/
|
||
|
private loadResults;
|
||
|
}
|