agsamantha/node_modules/@langchain/community/dist/utils/cassandra.js
2024-10-02 15:15:21 -05:00

914 lines
46 KiB
JavaScript

import { AsyncCaller, } from "@langchain/core/utils/async_caller";
import { Client, } from "cassandra-driver";
import fs from "node:fs/promises";
import * as path from "node:path";
import * as os from "node:os";
/**
* Provides a centralized and streamlined factory for creating and configuring instances of the Cassandra client.
* This class abstracts the complexities involved in instantiating and configuring Cassandra client instances,
* enabling straightforward integration with Cassandra databases. It supports customization through various
* configuration options, allowing for the creation of clients tailored to specific needs, such as connecting
* to different clusters or utilizing specialized authentication and connection options.
*
* Key Features:
* - Simplifies the Cassandra client creation process with method-based configurations.
* - Supports customization for connecting to various Cassandra environments, including cloud-based services like Astra.
* - Ensures consistent and optimal client configuration, incorporating best practices.
*
* Example Usage (Apache Cassandra®):
* ```
* const cassandraArgs = {
* contactPoints: ['h1', 'h2'],
* localDataCenter: 'datacenter1',
* credentials: {
* username: <...> as string,
* password: <...> as string,
* },
* };
* const cassandraClient = CassandraClientFactory.getClient(cassandraArgs);
* ```
*
* Example Usage (DataStax AstraDB):
* ```
* const astraArgs = {
* serviceProviderArgs: {
* astra: {
* token: <...> as string,
* endpoint: <...> as string,
* },
* },
* };
* const cassandraClient = CassandraClientFactory.getClient(astraArgs);
* ``` *
*/
export class CassandraClientFactory {
/**
* Asynchronously obtains a configured Cassandra client based on the provided arguments.
* This method processes the given CassandraClientArgs to produce a configured Client instance
* from the cassandra-driver, suitable for interacting with Cassandra databases.
*
* @param args The configuration arguments for the Cassandra client, including any service provider-specific options.
* @returns A Promise resolving to a Client object configured according to the specified arguments.
*/
static async getClient(args) {
const modifiedArgs = await this.processArgs(args);
return new Client(modifiedArgs);
}
/**
* Processes the provided CassandraClientArgs for creating a Cassandra client.
*
* @param args The arguments for creating the Cassandra client, including service provider configurations.
* @returns A Promise resolving to the processed CassandraClientArgs, ready for client initialization.
* @throws Error if the configuration is unsupported, specifically if serviceProviderArgs are provided
* but do not include valid configurations for Astra.
*/
static processArgs(args) {
if (!args.serviceProviderArgs) {
return Promise.resolve(args);
}
if (args.serviceProviderArgs && args.serviceProviderArgs.astra) {
return CassandraClientFactory.processAstraArgs(args);
}
throw new Error("Unsupported configuration for Cassandra client.");
}
/**
* Asynchronously processes and validates the Astra service provider arguments within the
* Cassandra client configuration. This includes ensuring the presence of necessary Astra
* configurations like endpoint or datacenterID, setting up default secure connect bundle paths,
* and initializing default credentials if not provided.
*
* @param args The arguments for creating the Cassandra client with Astra configurations.
* @returns A Promise resolving to the modified CassandraClientArgs with Astra configurations processed.
* @throws Error if Astra configuration is incomplete or if both endpoint and datacenterID are missing.
*/
static async processAstraArgs(args) {
const astraArgs = args.serviceProviderArgs?.astra;
if (!astraArgs) {
throw new Error("Astra configuration is not provided in args.");
}
if (!astraArgs.endpoint && !astraArgs.datacenterID) {
throw new Error("Astra endpoint or datacenterID must be provided in args.");
}
// Extract datacenterID and regionName from endpoint if provided
if (astraArgs.endpoint) {
const endpoint = new URL(astraArgs.endpoint.toString());
const hostnameParts = endpoint.hostname.split("-");
const domainSuffix = ".apps.astra.datastax.com";
if (hostnameParts[hostnameParts.length - 1].endsWith(domainSuffix)) {
astraArgs.datacenterID =
astraArgs.datacenterID || hostnameParts.slice(0, 5).join("-");
// Extract regionName by joining elements from index 5 to the end, and then remove the domain suffix
const fullRegionName = hostnameParts.slice(5).join("-");
astraArgs.regionName =
astraArgs.regionName || fullRegionName.replace(domainSuffix, "");
}
}
// Initialize cloud configuration if not already defined
const modifiedArgs = {
...args,
cloud: args.cloud || { secureConnectBundle: "" },
};
// Set default bundle location if it is not set
if (!modifiedArgs.cloud.secureConnectBundle) {
modifiedArgs.cloud.secureConnectBundle =
await CassandraClientFactory.getAstraDefaultBundleLocation(astraArgs);
}
// Ensure secure connect bundle exists
await CassandraClientFactory.setAstraBundle(astraArgs, modifiedArgs.cloud.secureConnectBundle);
// Ensure credentials are set
modifiedArgs.credentials = modifiedArgs.credentials || {
username: "token",
password: astraArgs.token,
};
return modifiedArgs;
}
/**
* Get the default bundle filesystem location for the Astra Secure Connect Bundle.
*
* @param astraArgs The Astra service provider arguments.
* @returns The default bundle file path.
*/
static async getAstraDefaultBundleLocation(astraArgs) {
const dir = path.join(os.tmpdir(), "cassandra-astra");
await fs.mkdir(dir, { recursive: true });
let scbFileName = `astra-secure-connect-${astraArgs.datacenterID}`;
if (astraArgs.regionName) {
scbFileName += `-${astraArgs.regionName}`;
}
scbFileName += ".zip";
const scbPath = path.join(dir, scbFileName);
return scbPath;
}
/**
* Ensures the Astra secure connect bundle specified by the path exists and is up to date.
* If the file does not exist or is deemed outdated (more than 360 days old), a new secure
* connect bundle is downloaded and saved to the specified path.
*
* @param astraArgs The Astra service provider arguments, including the datacenterID and optional regionName.
* @param scbPath The path (or URL) where the secure connect bundle is expected to be located.
* @returns A Promise that resolves when the secure connect bundle is verified or updated successfully.
* @throws Error if the bundle cannot be retrieved or saved to the specified path.
*/
static async setAstraBundle(astraArgs, scbPath) {
// If scbPath is a URL, we assume the URL is correct and do nothing further.
// But if it is a string, we need to check if the file exists and download it if necessary.
if (typeof scbPath === "string") {
try {
// Check if the file exists
const stats = await fs.stat(scbPath);
// Calculate the age of the file in days
const fileAgeInDays = (Date.now() - stats.mtime.getTime()) / (1000 * 60 * 60 * 24);
// File is more than 360 days old, download a fresh copy
if (fileAgeInDays > 360) {
await CassandraClientFactory.downloadAstraSecureConnectBundle(astraArgs, scbPath);
}
}
catch (error) {
if (typeof error === "object" &&
error !== null &&
"code" in error &&
error.code === "ENOENT") {
// Handle file not found error (ENOENT)
await CassandraClientFactory.downloadAstraSecureConnectBundle(astraArgs, scbPath);
}
else {
throw error;
}
}
}
}
/**
* Downloads the Astra secure connect bundle based on the provided Astra service provider arguments
* and saves it to the specified file path. If a regionName is specified and matches one of the
* available bundles, the regional bundle is preferred. Otherwise, the first available bundle URL is used.
*
* @param astraArgs - The Astra service provider arguments, including datacenterID and optional regionName.
* @param scbPath - The file path where the secure connect bundle should be saved.
* @returns A promise that resolves once the secure connect bundle is successfully downloaded and saved.
* @throws Error if there's an issue retrieving the bundle URLs or saving the bundle to the file path.
*/
static async downloadAstraSecureConnectBundle(astraArgs, scbPath) {
if (!astraArgs.datacenterID) {
throw new Error("Astra datacenterID is not provided in args.");
}
// First POST request gets all bundle locations for the database_id
const bundleURLTemplate = astraArgs.bundleUrlTemplate
? astraArgs.bundleUrlTemplate
: "https://api.astra.datastax.com/v2/databases/{database_id}/secureBundleURL?all=true";
const url = bundleURLTemplate.replace("{database_id}", astraArgs.datacenterID);
const postResponse = await fetch(url, {
method: "POST",
headers: {
Authorization: `Bearer ${astraArgs.token}`,
"Content-Type": "application/json",
},
});
if (!postResponse.ok) {
throw new Error(`HTTP error! Status: ${postResponse.status}`);
}
const postData = await postResponse.json();
if (!postData || !Array.isArray(postData) || postData.length === 0) {
throw new Error("Failed to get secure bundle URLs.");
}
// Find the download URL for the region, if specified
let { downloadURL } = postData[0];
if (astraArgs.regionName) {
const regionalBundle = postData.find((bundle) => bundle.region === astraArgs.regionName);
if (regionalBundle) {
downloadURL = regionalBundle.downloadURL;
}
}
// GET request to download the file itself, and write to disk
const getResponse = await fetch(downloadURL);
if (!getResponse.ok) {
throw new Error(`HTTP error! Status: ${getResponse.status}`);
}
const bundleData = await getResponse.arrayBuffer();
await fs.writeFile(scbPath, Buffer.from(bundleData));
}
}
/**
* Represents a Cassandra table, encapsulating functionality for schema definition, data manipulation, and querying.
* This class provides a high-level abstraction over Cassandra's table operations, including creating tables,
* inserting, updating, selecting, and deleting records. It leverages the CassandraClient for executing
* operations and supports asynchronous interactions with the database.
*
* Key features include:
* - Table and keyspace management: Allows for specifying table schema, including primary keys, columns,
* and indices, and handles the creation of these elements within the specified keyspace.
* - Data manipulation: Offers methods for inserting (upserting) and deleting data in batches or individually,
* with support for asynchronous operation and concurrency control.
* - Querying: Enables selecting data with flexible filtering, sorting, and pagination options.
*
* The class is designed to be instantiated with a set of configuration arguments (`CassandraTableArgs`)
* that define the table's structure and operational parameters, providing a streamlined interface for
* interacting with Cassandra tables in a structured and efficient manner.
*
* Usage Example:
* ```typescript
* const tableArgs: CassandraTableArgs = {
* table: 'my_table',
* keyspace: 'my_keyspace',
* primaryKey: [{ name: 'id', type: 'uuid', partition: true }],
* nonKeyColumns: [{ name: 'data', type: 'text' }],
* };
* const cassandraClient = new CassandraClient(clientConfig);
* const myTable = new CassandraTable(tableArgs, cassandraClient);
* ```
*
* This class simplifies Cassandra database interactions, making it easier to perform robust data operations
* while maintaining clear separation of concerns and promoting code reusability.
*/
export class CassandraTable {
/**
* Initializes a new instance of the CassandraTable class with specified configuration.
* This includes setting up the table schema (primary key, columns, and indices) and
* preparing the environment for executing queries against a Cassandra database.
*
* @param args Configuration arguments defining the table schema and operational settings.
* @param client Optional. A Cassandra Client instance. If not provided, one will be created
* using the configuration specified in `args`.
*/
constructor(args, client) {
Object.defineProperty(this, "client", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "keyspace", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "table", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "primaryKey", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "nonKeyColumns", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "indices", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "withClause", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "batchSize", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "initializationPromise", {
enumerable: true,
configurable: true,
writable: true,
value: null
});
Object.defineProperty(this, "asyncCaller", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "constructorArgs", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
const { keyspace, table, primaryKey, nonKeyColumns, withClause = "", indices = [], batchSize = 1, maxConcurrency = 25, } = args;
// Set constructor args, which would include default values
this.constructorArgs = {
withClause,
indices,
batchSize,
maxConcurrency,
...args,
};
this.asyncCaller = new AsyncCaller(this.constructorArgs);
// Assign properties
this.keyspace = keyspace;
this.table = table;
this.primaryKey = Array.isArray(primaryKey) ? primaryKey : [primaryKey];
this.nonKeyColumns = Array.isArray(nonKeyColumns)
? nonKeyColumns
: [nonKeyColumns];
this.withClause = withClause.trim().replace(/^with\s*/i, "");
this.indices = indices;
this.batchSize = batchSize;
// Start initialization but don't wait for it to complete here
this.initialize(client).catch((error) => {
console.error("Error during CassandraStore initialization:", error);
});
}
/**
* Executes a SELECT query on the Cassandra table with optional filtering, ordering, and pagination.
* Allows for specifying columns to return, filter conditions, sort order, and limits on the number of results.
*
* @param columns Optional. Columns to include in the result set. If omitted, all columns are selected.
* @param filter Optional. Conditions to apply to the query for filtering results.
* @param orderBy Optional. Criteria to sort the result set.
* @param limit Optional. Maximum number of records to return.
* @param allowFiltering Optional. Enables ALLOW FILTERING option for queries that cannot be executed directly due to Cassandra's query restrictions.
* @param fetchSize Optional. The number of rows to fetch per page (for pagination).
* @param pagingState Optional. The paging state from a previous query execution, used for pagination.
* @returns A Promise resolving to the query result set.
*/
async select(columns, filter, orderBy, limit, allowFiltering, fetchSize, pagingState) {
await this.initialize();
// Ensure we have an array of Filter from the public interface
const filters = this.asFilters(filter);
// If no columns are specified, use all columns
const queryColumns = columns || [...this.primaryKey, ...this.nonKeyColumns];
const queryStr = this.buildSearchQuery(queryColumns, filters, orderBy, limit, allowFiltering);
const queryParams = [];
queryColumns.forEach(({ binds }) => {
if (binds !== undefined && binds !== null) {
if (Array.isArray(binds)) {
queryParams.push(...binds);
}
else {
queryParams.push(binds);
}
}
});
if (filters) {
filters.forEach(({ value }) => {
if (Array.isArray(value)) {
queryParams.push(...value);
}
else {
queryParams.push(value);
}
});
}
if (orderBy) {
orderBy.forEach(({ value }) => {
if (value !== undefined && value !== null) {
if (Array.isArray(value)) {
queryParams.push(...value);
}
else {
queryParams.push(value);
}
}
});
}
if (limit) {
queryParams.push(limit);
}
const execOptions = {
prepare: true,
fetchSize: fetchSize || undefined,
pageState: pagingState || undefined,
};
return this.client.execute(queryStr, queryParams, execOptions);
}
/**
* Validates the correspondence between provided values and specified columns for database operations.
* This method checks if the number of values matches the number of specified columns, ensuring
* data integrity before executing insert or update operations. It also defaults to using all table columns
* if specific columns are not provided. Throws an error if the validation fails.
*
* @param values An array of values or an array of arrays of values to be inserted or updated. Each
* inner array represents a set of values corresponding to one row in the table.
* @param columns Optional. An array of `Column` objects specifying the columns to be used for the operation.
* If not provided, the method defaults to using both primary key and non-key columns of the table.
* @returns An array of `Column` objects that have been validated for the operation.
* @throws Error if the number of provided values does not match the number of specified columns.
* @private
*/
_columnCheck(values, columns) {
const cols = columns || [...this.primaryKey, ...this.nonKeyColumns];
if (!cols || cols.length === 0) {
throw new Error("Columns must be specified.");
}
const firstValueSet = Array.isArray(values[0]) ? values[0] : values;
if (firstValueSet && firstValueSet.length !== cols.length) {
throw new Error("The number of values must match the number of columns.");
}
return cols;
}
/**
* Inserts or updates records in the Cassandra table in batches, managing concurrency and batching size.
* This method organizes the provided values into batches and uses `_upsert` to perform the database operations.
*
* @param values An array of arrays, where each inner array contains values for a single record.
* @param columns Optional. Columns to be included in the insert/update operations. Defaults to all table columns.
* @param batchSize Optional. The size of each batch for the operation. Defaults to the class's batchSize property.
* @returns A Promise that resolves once all records have been upserted.
*/
async upsert(values, columns, batchSize = this.batchSize) {
if (values.length === 0) {
return;
}
// Ensure the store is initialized before proceeding
await this.initialize();
const upsertColumns = this._columnCheck(values, columns);
// Initialize an array to hold promises for each batch insert
const upsertPromises = [];
// Buffers to hold the current batch of vectors and documents
let currentBatch = [];
// Loop through each vector/document pair to insert; we use
// <= vectors.length to ensure the last batch is inserted
for (let i = 0; i <= values.length; i += 1) {
// Check if we're still within the array boundaries
if (i < values.length) {
// Add the current vector and document to the batch
currentBatch.push(values[i]);
}
// Check if we've reached the batch size or end of the array
if (currentBatch.length >= batchSize || i === values.length) {
// Only proceed if there are items in the current batch
if (currentBatch.length > 0) {
// Create copies of the current batch arrays to use in the async insert operation
const batch = [...currentBatch];
// Execute the insert using the AsyncCaller - it will handle concurrency and queueing.
upsertPromises.push(this.asyncCaller.call(() => this._upsert(batch, upsertColumns)));
// Clear the current buffers for the next iteration
currentBatch = [];
}
}
}
// Wait for all insert operations to complete.
await Promise.all(upsertPromises);
}
/**
* Deletes rows from the Cassandra table that match the specified WHERE clause conditions.
*
* @param whereClause Defines the conditions that must be met for rows to be deleted. Can be a single filter,
* an array of filters, or a key-value map translating to filter conditions.
* @returns A Promise that resolves when the DELETE operation has completed.
*/
async delete(whereClause) {
await this.initialize();
const filters = this.asFilters(whereClause);
const queryStr = `DELETE FROM ${this.keyspace}.${this.table} ${this.buildWhereClause(filters)}`;
const queryParams = filters.flatMap(({ value }) => {
if (Array.isArray(value)) {
return value;
}
else {
return [value];
}
});
return this.client.execute(queryStr, queryParams, {
prepare: true,
});
}
/**
* Retrieves the Node.js Cassandra client instance associated with this table.
* This method ensures that the client is initialized and ready for use, returning the
* Cassandra client object that can be used for database operations directly.
* It initializes the client if it has not already been initialized.
*
* @returns A Promise that resolves to the Cassandra Client instance used by this table for database interactions.
*/
async getClient() {
await this.initialize();
return this.client;
}
/**
* Constructs the PRIMARY KEY clause for a Cassandra CREATE TABLE statement based on the specified columns.
* This method organizes the provided columns into partition and clustering keys, forming the necessary syntax
* for the PRIMARY KEY clause in a Cassandra table schema definition. It supports complex primary key structures,
* including composite partition keys and clustering columns.
*
* - Partition columns are those marked with the `partition` property. If multiple partition columns are provided,
* they are grouped together in parentheses as a composite partition key.
* - Clustering columns are those not marked as partition keys and are listed after the partition key(s).
* They determine the sort order of rows within a partition.
*
* The method ensures the correct syntax for primary keys, handling both simple and composite key structures,
* and throws an error if no partition or clustering columns are provided.
*
* @param columns An array of `Column` objects representing the columns to be included in the primary key.
* Each column must have a `name` and may have a `partition` boolean indicating if it is part
* of the partition key.
* @returns The PRIMARY KEY clause as a string, ready to be included in a CREATE TABLE statement.
* @throws Error if no columns are marked as partition keys or if no columns are provided.
* @private
*/
buildPrimaryKey(columns) {
// Partition columns may be specified with optional attribute col.partition
const partitionColumns = columns
.filter((col) => col.partition)
.map((col) => col.name)
.join(", ");
// All columns not part of the partition key are clustering columns
const clusteringColumns = columns
.filter((col) => !col.partition)
.map((col) => col.name)
.join(", ");
let primaryKey = "";
// If partition columns are specified, they are included in a () wrapper
// If not, the clustering columns are used, and the first clustering column
// is the partition key per normal Cassandra behaviour.
if (partitionColumns && clusteringColumns) {
primaryKey = `PRIMARY KEY ((${partitionColumns}), ${clusteringColumns})`;
}
else if (partitionColumns) {
primaryKey = `PRIMARY KEY (${partitionColumns})`;
}
else if (clusteringColumns) {
primaryKey = `PRIMARY KEY (${clusteringColumns})`;
}
else {
throw new Error("No partition or clustering columns provided for PRIMARY KEY definition.");
}
return primaryKey;
}
/**
* Type guard that checks if a given object conforms to the `Filter` interface.
* This method is used to determine if an object can be treated as a filter for Cassandra
* query conditions. It evaluates the object's structure, specifically looking for `name`
* and `value` properties, which are essential for defining a filter in Cassandra queries.
*
* @param obj The object to be evaluated.
* @returns A boolean value indicating whether the object is a `Filter`. Returns `true`
* if the object has both `name` and `value` properties, signifying it meets the
* criteria for being used as a filter in database operations; otherwise, returns `false`.
* @private
*/
isFilter(obj) {
return (typeof obj === "object" && obj !== null && "name" in obj && "value" in obj);
}
/**
* Helper to convert Record<string,unknown> to a Filter[]
* @param record: a key-value Record collection
* @returns Record as a Filter[]
*/
convertToFilters(record) {
return Object.entries(record).map(([name, value]) => ({
name,
value,
operator: "=",
}));
}
/**
* Converts a key-value pair record into an array of `Filter` objects suitable for Cassandra query conditions.
* This utility method allows for a more flexible specification of filter conditions by transforming
* a simple object notation into the structured format expected by Cassandra query builders. Each key-value
* pair in the record is interpreted as a filter condition, where the key represents the column name and
* the value represents the filtering criterion.
*
* The method assumes a default equality operator for each filter. It is particularly useful for
* converting concise filter specifications into the detailed format required for constructing CQL queries.
*
* @param record A key-value pair object where each entry represents a filter condition, with the key
* as the column name and the value as the filter value. The value can be a single value
* or an array to support IN queries with multiple criteria.
* @returns An array of `Filter` objects, each representing a condition extracted from the input record.
* The array can be directly used in constructing query WHERE clauses.
* @private
*/
asFilters(record) {
if (!record) {
return [];
}
// If record is already an array
if (Array.isArray(record)) {
return record.flatMap((item) => {
// Check if item is a Filter before passing it to convertToFilters
if (this.isFilter(item)) {
return [item];
}
else {
// Here item is treated as Record<string, unknown>
return this.convertToFilters(item);
}
});
}
// If record is a single Filter object, return it in an array
if (this.isFilter(record)) {
return [record];
}
// If record is a Record<string, unknown>, convert it to an array of Filter
return this.convertToFilters(record);
}
/**
* Constructs the WHERE clause of a CQL query from an array of `Filter` objects.
* This method generates the conditional part of a Cassandra Query Language (CQL) statement,
* allowing for complex query constructions based on provided filters. Each filter in the array
* translates into a condition within the WHERE clause, with support for various comparison operators.
*
* The method handles the assembly of these conditions into a syntactically correct CQL WHERE clause,
* including the appropriate use of placeholders (?) for parameter binding in prepared statements.
* It supports a range of operators, defaulting to "=" (equality) if an operator is not explicitly specified
* in a filter. Filters with multiple values (e.g., for IN conditions) are also correctly formatted.
*
* @param filters Optional. An array of `Filter` objects representing the conditions to apply in the WHERE clause.
* Each `Filter` includes a column name (`name`), a value or array of values (`value`), and optionally,
* an operator (`operator`). If no filters are provided, an empty string is returned.
* @returns The constructed WHERE clause as a string, ready to be appended to a CQL query. If no filters
* are provided, returns an empty string, indicating no WHERE clause should be applied.
* @private
*/
buildWhereClause(filters) {
if (!filters || filters.length === 0) {
return "";
}
const whereConditions = filters.map(({ name, operator = "=", value }) => {
// Normalize the operator to handle case-insensitive comparison
const normalizedOperator = operator.toUpperCase();
// Convert value to an array if it's not one, to simplify processing
const valueArray = Array.isArray(value) ? value : [value];
if (valueArray.length === 1 && normalizedOperator !== "IN") {
return `${name} ${operator} ?`;
}
else {
// Remove quoted strings from 'name' to prevent counting '?' inside quotes as placeholders
const quotesPattern = /'[^']*'|"[^"]*"/g;
const modifiedName = name.replace(quotesPattern, "");
const nameQuestionMarkCount = (modifiedName.match(/\?/g) || []).length;
// Check if there are enough elements in the array for the right side of the operator,
// adjusted for any '?' placeholders within the 'name' itself
if (valueArray.length < nameQuestionMarkCount + 1) {
throw new Error("Insufficient bind variables for the filter condition.");
}
// Generate placeholders, considering any '?' placeholders that might have been part of 'name'
const effectiveLength = Math.max(valueArray.length - nameQuestionMarkCount, 1);
const placeholders = new Array(effectiveLength).fill("?").join(", ");
// Wrap placeolders in a () if the operator is IN
if (normalizedOperator === "IN") {
return `${name} ${operator} (${placeholders})`;
}
else {
return `${name} ${operator} ${placeholders}`;
}
}
});
return `WHERE ${whereConditions.join(" AND ")}`;
}
/**
* Generates the ORDER BY clause for a CQL query from an array of `Filter` objects.
* This method forms the sorting part of a Cassandra Query Language (CQL) statement,
* allowing for detailed control over the order of results based on specified column names
* and directions. Each filter in the array represents a column and direction to sort by.
*
* It is important to note that unlike the traditional use of `Filter` objects for filtering,
* in this context, they are repurposed to specify sorting criteria. The `name` field indicates
* the column to sort by, and the `operator` field is used to specify the sort direction (`ASC` or `DESC`).
* The `value` field is not utilized for constructing the ORDER BY clause and can be omitted.
*
* @param filters Optional. An array of `Filter` objects where each object specifies a column and
* direction for sorting. The `name` field of each filter represents the column name,
* and the `operator` field should contain the sorting direction (`ASC` or `DESC`).
* If no filters are provided, the method returns an empty string.
* @returns The constructed ORDER BY clause as a string, suitable for appending to a CQL query.
* If no sorting criteria are provided, returns an empty string, indicating no ORDER BY
* clause should be applied to the query.
* @private
*/
buildOrderByClause(filters) {
if (!filters || filters.length === 0) {
return "";
}
const orderBy = filters.map(({ name, operator, value }) => {
if (value) {
return `${name} ${operator} ?`;
}
else if (operator) {
return `${name} ${operator}`;
}
else {
return name;
}
});
return `ORDER BY ${orderBy.join(" , ")}`;
}
/**
* Constructs a CQL search query string for retrieving records from a Cassandra table.
* This method combines various query components, including selected columns, filters, sorting criteria,
* and pagination options, to form a complete and executable CQL query. It allows for fine-grained control
* over the query construction process, enabling the inclusion of conditional filtering, ordering of results,
* and limiting the number of returned records, with an optional allowance for filtering.
*
* The method meticulously constructs the SELECT part of the query using the provided columns, applies
* the WHERE clause based on given filters, sorts the result set according to the orderBy criteria, and
* restricts the number of results with the limit parameter. Additionally, it can enable the ALLOW FILTERING
* option for queries that require server-side filtering beyond the capabilities of primary and secondary indexes.
*
* @param queryColumns An array of `Column` objects specifying which columns to include in the result set.
* Each column can also have an alias defined for use in the query's result set.
* @param filters Optional. An array of `Filter` objects to apply as conditions in the WHERE clause of the query.
* @param orderBy Optional. An array of `Filter` objects specifying the ordering of the returned records.
* Although repurposed as `Filter` objects, here they define the column names and the sort direction (ASC/DESC).
* @param limit Optional. A numeric value specifying the maximum number of records the query should return.
* @param allowFiltering Optional. A boolean flag that, when true, includes the ALLOW FILTERING clause in the query,
* permitting Cassandra to execute queries that might not be efficiently indexable.
* @returns A string representing the fully constructed CQL search query, ready for execution against a Cassandra table.
* @private
*/
buildSearchQuery(queryColumns, filters, orderBy, limit, allowFiltering) {
const selectColumns = queryColumns
.map((col) => (col.alias ? `${col.name} AS ${col.alias}` : col.name))
.join(", ");
const whereClause = filters ? this.buildWhereClause(filters) : "";
const orderByClause = orderBy ? this.buildOrderByClause(orderBy) : "";
const limitClause = limit ? "LIMIT ?" : "";
const allowFilteringClause = allowFiltering ? "ALLOW FILTERING" : "";
const cqlQuery = `SELECT ${selectColumns} FROM ${this.keyspace}.${this.table} ${whereClause} ${orderByClause} ${limitClause} ${allowFilteringClause}`;
return cqlQuery;
}
/**
* Initializes the CassandraTable instance, ensuring it is ready for database operations.
* This method is responsible for setting up the internal Cassandra client, creating the table
* if it does not already exist, and preparing any indices as specified in the table configuration.
* The initialization process is performed only once; subsequent calls return the result of the
* initial setup. If a Cassandra `Client` instance is provided, it is used directly; otherwise,
* a new client is created based on the table's configuration.
*
* The initialization includes:
* - Assigning the provided or newly created Cassandra client to the internal client property.
* - Executing a CQL statement to create the table with the specified columns, primary key, and
* any additional options provided in the `withClause`.
* - Creating any custom indices as defined in the table's indices array.
*
* This method leverages the asynchronous nature of JavaScript to perform potentially time-consuming
* tasks, such as network requests to the Cassandra cluster, without blocking the execution thread.
*
* @param client Optional. A `Client` instance from the cassandra-driver package. If provided, this client
* is used for all database operations performed by the instance. Otherwise, a new client
* is instantiated based on the configuration provided at the CassandraTable instance creation.
* @returns A Promise that resolves once the initialization process has completed, indicating the instance
* is ready for database operations. If initialization has already occurred, the method returns
* immediately without repeating the setup process.
* @private
*/
async initialize(client) {
// If already initialized or initialization is in progress, return the existing promise
if (this.initializationPromise) {
return this.initializationPromise;
}
// Start the initialization process and store the promise
this.initializationPromise = this.performInitialization(client)
.then(() => {
// Initialization successful
})
.catch((error) => {
// Reset to allow retrying in case of failure
this.initializationPromise = null;
throw error;
});
return this.initializationPromise;
}
/**
* Performs the actual initialization tasks for the CassandraTable instance.
* This method is invoked by the `initialize` method to carry out the concrete steps necessary for preparing
* the CassandraTable instance for operation. It includes establishing the Cassandra client (either by utilizing
* an existing client passed as a parameter or by creating a new one based on the instance's configuration),
* and executing the required CQL statements to create the table and its indices according to the specifications
* provided during the instance's creation.
*
* The process encapsulates:
* 1. Assigning the provided Cassandra `Client` to the instance, or creating a new one if none is provided.
* 2. Creating the table with the specified schema if it does not exist. This involves constructing a CQL
* `CREATE TABLE` statement that includes columns, primary key configuration, and any specified table options.
* 3. Creating any indices specified in the instance's configuration using CQL `CREATE INDEX` statements, allowing
* for custom index options if provided.
*
* This method ensures that the table and its environment are correctly set up for subsequent database operations,
* encapsulating initialization logic to maintain separation of concerns and improve code readability and maintainability.
*
* @param client Optional. An instance of the Cassandra `Client` from the cassandra-driver package. If provided,
* this client is used for all interactions with the Cassandra database. If not provided, a new client
* is instantiated based on the provided configuration during the CassandraTable instance creation.
* @returns A Promise that resolves when all initialization steps have been successfully completed, indicating
* that the CassandraTable instance is fully prepared for database operations.
* @private
*/
async performInitialization(client) {
if (client) {
this.client = client;
}
else {
this.client = await CassandraClientFactory.getClient(this.constructorArgs);
}
const allColumns = [...this.primaryKey, ...this.nonKeyColumns];
let cql = "";
cql = `CREATE TABLE IF NOT EXISTS ${this.keyspace}.${this.table} (
${allColumns.length > 0
? `${allColumns.map((col) => `${col.name} ${col.type}`).join(", ")}`
: ""}
, ${this.buildPrimaryKey(this.primaryKey)}
) ${this.withClause ? `WITH ${this.withClause}` : ""};`;
await this.client.execute(cql);
// Helper function to format custom index OPTIONS clause
const _formatOptions = (options) => {
if (!options) {
return "";
}
let formattedOptions = options.trim();
if (!formattedOptions.toLowerCase().startsWith("with options =")) {
formattedOptions = `WITH OPTIONS = ${formattedOptions}`;
}
return formattedOptions;
};
for await (const { name, value, options } of this.indices) {
const optionsClause = _formatOptions(options);
cql = `CREATE CUSTOM INDEX IF NOT EXISTS idx_${this.table}_${name}
ON ${this.keyspace}.${this.table} ${value} USING 'StorageAttachedIndex' ${optionsClause};`;
await this.client.execute(cql);
}
}
/**
* Performs the actual insert or update operation (upsert) on the Cassandra table for a batch of values.
* This method constructs and executes a CQL INSERT statement for each value in the batch.
*
* @param values An array of arrays, where each inner array contains values corresponding to the specified columns.
* @param columns Optional. Specifies the columns into which the values should be inserted. Defaults to all columns.
* @returns A Promise that resolves when the operation has completed.
* @private
*/
async _upsert(values, columns) {
if (values.length === 0) {
return;
}
await this.initialize();
const upsertColumns = this._columnCheck(values, columns);
const upsertColumnNames = upsertColumns.map((col) => col.name);
const columnCount = upsertColumnNames.length;
const bindPlaceholders = Array(columnCount).fill("?").join(", ");
const upsertString = `INSERT INTO ${this.keyspace}.${this.table} (${upsertColumnNames.join(", ")}) VALUES (${bindPlaceholders})`;
// Initialize an array to hold query objects
const queries = [];
for (let i = 0; i < values.length; i += 1) {
const query = {
query: upsertString,
params: values[i],
};
// Add the query to the list
queries.push(query);
}
// Execute the queries: use a batch if multiple, otherwise execute a single query
if (queries.length === 1) {
await this.client.execute(queries[0].query, queries[0].params, {
prepare: true,
});
}
else {
await this.client.batch(queries, { prepare: true, logged: false });
}
}
}