diff --git a/.gitignore b/.gitignore index 819b228b..99381ce5 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ node_modules .nyc_output coverage_e2e coverage_unit +.clinic *.code-workspace dist *.DS_Store diff --git a/.prettierignore b/.prettierignore index f90cbaaa..92e9533a 100644 --- a/.prettierignore +++ b/.prettierignore @@ -5,6 +5,7 @@ node_modules .nyc_output coverage_e2e coverage_unit +.clinic dist thrift diff --git a/CHANGELOG.md b/CHANGELOG.md index 91c5d7b9..c103fe1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Release History +## 1.7.0 + +- Fixed behavior of `maxRows` option of `IOperation.fetchChunk()`. Now it will return chunks + of requested size (databricks/databricks-sql-nodejs#200) +- Improved CloudFetch memory usage and overall performance (databricks/databricks-sql-nodejs#204, + databricks/databricks-sql-nodejs#207, databricks/databricks-sql-nodejs#209) +- Remove protocol version check when using query parameters (databricks/databricks-sql-nodejs#213) +- Fix `IOperation.hasMoreRows()` behavior to avoid fetching data beyond the end of dataset. + Also, now it will work properly prior to fetching first chunk (databricks/databricks-sql-nodejs#205) + ## 1.6.1 - Make default logger singleton (databricks/databricks-sql-nodejs#199) diff --git a/CODEOWNERS b/CODEOWNERS index 0695e203..34f987d8 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1 +1 @@ -* @arikfr @superdupershant @yunbodeng-db @kravets-levko @susodapop @nithinkdb @andrefurlan-db +* @arikfr @superdupershant @yunbodeng-db @kravets-levko @susodapop @nithinkdb @andrefurlan-db @rcypher-databricks diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index 779492e6..5c25d540 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -5,7 +5,7 @@ import TCLIService from '../thrift/TCLIService'; import { TProtocolVersion } from '../thrift/TCLIService_types'; import IDBSQLClient, { ClientOptions, ConnectionOptions, OpenSessionRequest } from './contracts/IDBSQLClient'; import IDriver from './contracts/IDriver'; -import IClientContext from './contracts/IClientContext'; +import IClientContext, { ClientConfig } from './contracts/IClientContext'; import HiveDriver from './hive/HiveDriver'; import { Int64 } from './hive/Types'; import DBSQLSession from './DBSQLSession'; @@ -46,6 +46,8 @@ function getInitialNamespaceOptions(catalogName?: string, schemaName?: string) { export default class DBSQLClient extends EventEmitter implements IDBSQLClient, IClientContext { private static defaultLogger?: IDBSQLLogger; + private readonly config: ClientConfig; + private connectionProvider?: IConnectionProvider; private authProvider?: IAuthentication; @@ -69,8 +71,25 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I return this.defaultLogger; } + private static getDefaultConfig(): ClientConfig { + return { + arrowEnabled: true, + useArrowNativeTypes: true, + socketTimeout: 15 * 60 * 1000, // 15 minutes + + retryMaxAttempts: 30, + retriesTimeout: 900 * 1000, + retryDelayMin: 1 * 1000, + retryDelayMax: 60 * 1000, + + useCloudFetch: false, + cloudFetchConcurrentDownloads: 10, + }; + } + constructor(options?: ClientOptions) { super(); + this.config = DBSQLClient.getDefaultConfig(); this.logger = options?.logger ?? DBSQLClient.getDefaultLogger(); this.logger.log(LogLevel.info, 'Created DBSQLClient'); } @@ -129,7 +148,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I public async connect(options: ConnectionOptions, authProvider?: IAuthentication): Promise { this.authProvider = this.initAuthProvider(options, authProvider); - this.connectionProvider = new HttpConnection(this.getConnectionOptions(options)); + this.connectionProvider = new HttpConnection(this.getConnectionOptions(options), this); const thriftConnection = await this.connectionProvider.getThriftConnection(); @@ -196,6 +215,10 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I this.authProvider = undefined; } + public getConfig(): ClientConfig { + return this.config; + } + public getLogger(): IDBSQLLogger { return this.logger; } diff --git a/lib/DBSQLOperation/index.ts b/lib/DBSQLOperation/index.ts index 5198d726..8a0bf707 100644 --- a/lib/DBSQLOperation/index.ts +++ b/lib/DBSQLOperation/index.ts @@ -16,13 +16,15 @@ import { TOperationState, } from '../../thrift/TCLIService_types'; import Status from '../dto/Status'; -import FetchResultsHelper from './FetchResultsHelper'; import { LogLevel } from '../contracts/IDBSQLLogger'; import OperationStateError, { OperationStateErrorCode } from '../errors/OperationStateError'; -import IOperationResult from '../result/IOperationResult'; -import JsonResult from '../result/JsonResult'; -import ArrowResult from '../result/ArrowResult'; -import CloudFetchResult from '../result/CloudFetchResult'; +import IResultsProvider from '../result/IResultsProvider'; +import RowSetProvider from '../result/RowSetProvider'; +import JsonResultHandler from '../result/JsonResultHandler'; +import ArrowResultHandler from '../result/ArrowResultHandler'; +import CloudFetchResultHandler from '../result/CloudFetchResultHandler'; +import ArrowResultConverter from '../result/ArrowResultConverter'; +import ResultSlicer from '../result/ResultSlicer'; import { definedOrError } from '../utils'; import HiveDriverError from '../errors/HiveDriverError'; import IClientContext from '../contracts/IClientContext'; @@ -50,7 +52,7 @@ export default class DBSQLOperation implements IOperation { public onClose?: () => void; - private readonly _data: FetchResultsHelper; + private readonly _data: RowSetProvider; private readonly closeOperation?: TCloseOperationResp; @@ -66,9 +68,7 @@ export default class DBSQLOperation implements IOperation { // to `getOperationStatus()` may fail with irrelevant errors, e.g. HTTP 404 private operationStatus?: TGetOperationStatusResp; - private hasResultSet: boolean = false; - - private resultHandler?: IOperationResult; + private resultHandler?: ResultSlicer; constructor({ handle, directResults, context }: DBSQLOperationConstructorOptions) { this.operationHandle = handle; @@ -76,13 +76,12 @@ export default class DBSQLOperation implements IOperation { const useOnlyPrefetchedResults = Boolean(directResults?.closeOperation); - this.hasResultSet = this.operationHandle.hasResultSet; if (directResults?.operationStatus) { this.processOperationStatusResponse(directResults.operationStatus); } this.metadata = directResults?.resultSetMetadata; - this._data = new FetchResultsHelper( + this._data = new RowSetProvider( this.context, this.operationHandle, [directResults?.resultSet], @@ -107,9 +106,17 @@ export default class DBSQLOperation implements IOperation { */ public async fetchAll(options?: FetchOptions): Promise> { const data: Array> = []; + + const fetchChunkOptions = { + ...options, + // Tell slicer to return raw chunks. We're going to process all of them anyway, + // so no need to additionally buffer and slice chunks returned by server + disableBuffering: true, + }; + do { // eslint-disable-next-line no-await-in-loop - const chunk = await this.fetchChunk(options); + const chunk = await this.fetchChunk(fetchChunkOptions); data.push(chunk); } while (await this.hasMoreRows()); // eslint-disable-line no-await-in-loop this.context.getLogger().log(LogLevel.debug, `Fetched all data from operation with id: ${this.getId()}`); @@ -129,20 +136,39 @@ export default class DBSQLOperation implements IOperation { public async fetchChunk(options?: FetchOptions): Promise> { await this.failIfClosed(); - if (!this.hasResultSet) { + if (!this.operationHandle.hasResultSet) { return []; } await this.waitUntilReady(options); - const [resultHandler, data] = await Promise.all([ - this.getResultHandler(), - this._data.fetch(options?.maxRows || defaultMaxRows), - ]); + const resultHandler = await this.getResultHandler(); + await this.failIfClosed(); + + // All the library code is Promise-based, however, since Promises are microtasks, + // enqueueing a lot of promises may block macrotasks execution for a while. + // Usually, there are no much microtasks scheduled, however, when fetching query + // results (especially CloudFetch ones) it's quite easy to block event loop for + // long enough to break a lot of things. For example, with CloudFetch, after first + // set of files are downloaded and being processed immediately one by one, event + // loop easily gets blocked for enough time to break connection pool. `http.Agent` + // stops receiving socket events, and marks all sockets invalid on the next attempt + // to use them. See these similar issues that helped to debug this particular case - + // https://github.com/nodejs/node/issues/47130 and https://github.com/node-fetch/node-fetch/issues/1735 + // This simple fix allows to clean up a microtasks queue and allow Node to process + // macrotasks as well, allowing the normal operation of other code. Also, this + // fix is added to `fetchChunk` method because, unlike other methods, `fetchChunk` is + // a potential source of issues described above + await new Promise((resolve) => { + setTimeout(resolve, 0); + }); + const result = resultHandler.fetchNext({ + limit: options?.maxRows || defaultMaxRows, + disableBuffering: options?.disableBuffering, + }); await this.failIfClosed(); - const result = await resultHandler.getValue(data ? [data] : []); this.context .getLogger() .log( @@ -234,20 +260,15 @@ export default class DBSQLOperation implements IOperation { return false; } - // Return early if there are still data available for fetching - if (this._data.hasMoreRows) { - return true; - } - // If we fetched all the data from server - check if there's anything buffered in result handler const resultHandler = await this.getResultHandler(); - return resultHandler.hasPendingData(); + return resultHandler.hasMore(); } public async getSchema(options?: GetSchemaOptions): Promise { await this.failIfClosed(); - if (!this.hasResultSet) { + if (!this.operationHandle.hasResultSet) { return null; } @@ -342,24 +363,36 @@ export default class DBSQLOperation implements IOperation { return this.metadata; } - private async getResultHandler(): Promise { + private async getResultHandler(): Promise> { const metadata = await this.fetchMetadata(); const resultFormat = definedOrError(metadata.resultFormat); if (!this.resultHandler) { + let resultSource: IResultsProvider> | undefined; + switch (resultFormat) { case TSparkRowSetType.COLUMN_BASED_SET: - this.resultHandler = new JsonResult(this.context, metadata.schema); + resultSource = new JsonResultHandler(this.context, this._data, metadata.schema); break; case TSparkRowSetType.ARROW_BASED_SET: - this.resultHandler = new ArrowResult(this.context, metadata.schema, metadata.arrowSchema); + resultSource = new ArrowResultConverter( + this.context, + new ArrowResultHandler(this.context, this._data, metadata.arrowSchema), + metadata.schema, + ); break; case TSparkRowSetType.URL_BASED_SET: - this.resultHandler = new CloudFetchResult(this.context, metadata.schema); - break; - default: - this.resultHandler = undefined; + resultSource = new ArrowResultConverter( + this.context, + new CloudFetchResultHandler(this.context, this._data), + metadata.schema, + ); break; + // no default + } + + if (resultSource) { + this.resultHandler = new ResultSlicer(this.context, resultSource); } } @@ -376,7 +409,7 @@ export default class DBSQLOperation implements IOperation { this.state = response.operationState ?? this.state; if (typeof response.hasResultSet === 'boolean') { - this.hasResultSet = response.hasResultSet; + this.operationHandle.hasResultSet = response.hasResultSet; } const isInProgress = [ diff --git a/lib/DBSQLSession.ts b/lib/DBSQLSession.ts index ccba880a..aa23f7d1 100644 --- a/lib/DBSQLSession.ts +++ b/lib/DBSQLSession.ts @@ -2,7 +2,6 @@ import * as fs from 'fs'; import * as path from 'path'; import { stringify, NIL, parse } from 'uuid'; import fetch, { HeadersInit } from 'node-fetch'; -import { Thrift } from 'thrift'; import { TSessionHandle, TStatus, @@ -10,7 +9,6 @@ import { TSparkDirectResults, TSparkArrowTypes, TSparkParameter, - TProtocolVersion, } from '../thrift/TCLIService_types'; import { Int64 } from './hive/Types'; import IDBSQLSession, { @@ -33,11 +31,10 @@ import { definedOrError } from './utils'; import CloseableCollection from './utils/CloseableCollection'; import { LogLevel } from './contracts/IDBSQLLogger'; import HiveDriverError from './errors/HiveDriverError'; -import globalConfig from './globalConfig'; import StagingError from './errors/StagingError'; import { DBSQLParameter, DBSQLParameterValue } from './DBSQLParameter'; import ParameterError from './errors/ParameterError'; -import IClientContext from './contracts/IClientContext'; +import IClientContext, { ClientConfig } from './contracts/IClientContext'; const defaultMaxRows = 100000; @@ -59,11 +56,11 @@ function getDirectResultsOptions(maxRows: number | null = defaultMaxRows) { }; } -function getArrowOptions(): { +function getArrowOptions(config: ClientConfig): { canReadArrowResult: boolean; useArrowNativeTypes?: TSparkArrowTypes; } { - const { arrowEnabled = true, useArrowNativeTypes = true } = globalConfig; + const { arrowEnabled = true, useArrowNativeTypes = true } = config; if (!arrowEnabled) { return { @@ -99,16 +96,6 @@ function getQueryParameters( return []; } - if ( - !sessionHandle.serverProtocolVersion || - sessionHandle.serverProtocolVersion < TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V8 - ) { - throw new Thrift.TProtocolException( - Thrift.TProtocolExceptionType.BAD_VERSION, - 'Parameterized operations are not supported by this server. Support will begin with server version DBR 14.1', - ); - } - const result: Array = []; if (namedParameters !== undefined) { @@ -187,14 +174,15 @@ export default class DBSQLSession implements IDBSQLSession { public async executeStatement(statement: string, options: ExecuteStatementOptions = {}): Promise { await this.failIfClosed(); const driver = await this.context.getDriver(); + const clientConfig = this.context.getConfig(); const operationPromise = driver.executeStatement({ sessionHandle: this.sessionHandle, statement, queryTimeout: options.queryTimeout, runAsync: true, ...getDirectResultsOptions(options.maxRows), - ...getArrowOptions(), - canDownloadResult: options.useCloudFetch ?? globalConfig.useCloudFetch, + ...getArrowOptions(clientConfig), + canDownloadResult: options.useCloudFetch ?? clientConfig.useCloudFetch, parameters: getQueryParameters(this.sessionHandle, options.namedParameters, options.ordinalParameters), }); const response = await this.handleResponse(operationPromise); diff --git a/lib/connection/connections/HttpConnection.ts b/lib/connection/connections/HttpConnection.ts index 79f24c3d..9631322a 100644 --- a/lib/connection/connections/HttpConnection.ts +++ b/lib/connection/connections/HttpConnection.ts @@ -6,21 +6,24 @@ import { ProxyAgent } from 'proxy-agent'; import IConnectionProvider from '../contracts/IConnectionProvider'; import IConnectionOptions, { ProxyOptions } from '../contracts/IConnectionOptions'; -import globalConfig from '../../globalConfig'; +import IClientContext from '../../contracts/IClientContext'; import ThriftHttpConnection from './ThriftHttpConnection'; export default class HttpConnection implements IConnectionProvider { private readonly options: IConnectionOptions; + private readonly context: IClientContext; + private headers: HeadersInit = {}; private connection?: ThriftHttpConnection; private agent?: http.Agent; - constructor(options: IConnectionOptions) { + constructor(options: IConnectionOptions, context: IClientContext) { this.options = options; + this.context = context; } public setHeaders(headers: HeadersInit) { @@ -44,11 +47,15 @@ export default class HttpConnection implements IConnectionProvider { } private getAgentDefaultOptions(): http.AgentOptions { + const clientConfig = this.context.getConfig(); + + const cloudFetchExtraSocketsCount = clientConfig.useCloudFetch ? clientConfig.cloudFetchConcurrentDownloads : 0; + return { keepAlive: true, - maxSockets: 5, + maxSockets: 5 + cloudFetchExtraSocketsCount, keepAliveMsecs: 10000, - timeout: this.options.socketTimeout ?? globalConfig.socketTimeout, + timeout: this.options.socketTimeout ?? clientConfig.socketTimeout, }; } @@ -89,6 +96,7 @@ export default class HttpConnection implements IConnectionProvider { public async getThriftConnection(): Promise { if (!this.connection) { const { options } = this; + const clientConfig = this.context.getConfig(); const agent = await this.getAgent(); this.connection = new ThriftHttpConnection( @@ -99,7 +107,7 @@ export default class HttpConnection implements IConnectionProvider { }, { agent, - timeout: options.socketTimeout ?? globalConfig.socketTimeout, + timeout: options.socketTimeout ?? clientConfig.socketTimeout, headers: { ...options.headers, ...this.headers, diff --git a/lib/contracts/IClientContext.ts b/lib/contracts/IClientContext.ts index 8df0f7e9..062d0795 100644 --- a/lib/contracts/IClientContext.ts +++ b/lib/contracts/IClientContext.ts @@ -3,7 +3,23 @@ import IDriver from './IDriver'; import IConnectionProvider from '../connection/contracts/IConnectionProvider'; import TCLIService from '../../thrift/TCLIService'; +export interface ClientConfig { + arrowEnabled?: boolean; + useArrowNativeTypes?: boolean; + socketTimeout: number; + + retryMaxAttempts: number; + retriesTimeout: number; // in milliseconds + retryDelayMin: number; // in milliseconds + retryDelayMax: number; // in milliseconds + + useCloudFetch: boolean; + cloudFetchConcurrentDownloads: number; +} + export default interface IClientContext { + getConfig(): ClientConfig; + getLogger(): IDBSQLLogger; getConnectionProvider(): Promise; diff --git a/lib/contracts/IOperation.ts b/lib/contracts/IOperation.ts index a9ed45fe..123d4da3 100644 --- a/lib/contracts/IOperation.ts +++ b/lib/contracts/IOperation.ts @@ -14,6 +14,9 @@ export interface FinishedOptions extends WaitUntilReadyOptions { export interface FetchOptions extends WaitUntilReadyOptions { maxRows?: number; + // Disables internal buffer used to ensure a consistent chunks size. + // When set to `true`, returned chunks size may vary (and may differ from `maxRows`) + disableBuffering?: boolean; } export interface GetSchemaOptions extends WaitUntilReadyOptions { diff --git a/lib/globalConfig.ts b/lib/globalConfig.ts deleted file mode 100644 index ad477d8b..00000000 --- a/lib/globalConfig.ts +++ /dev/null @@ -1,27 +0,0 @@ -interface GlobalConfig { - arrowEnabled?: boolean; - useArrowNativeTypes?: boolean; - socketTimeout: number; - - retryMaxAttempts: number; - retriesTimeout: number; // in milliseconds - retryDelayMin: number; // in milliseconds - retryDelayMax: number; // in milliseconds - - useCloudFetch: boolean; - cloudFetchConcurrentDownloads: number; -} - -export default { - arrowEnabled: true, - useArrowNativeTypes: true, - socketTimeout: 15 * 60 * 1000, // 15 minutes - - retryMaxAttempts: 30, - retriesTimeout: 900 * 1000, - retryDelayMin: 1 * 1000, - retryDelayMax: 60 * 1000, - - useCloudFetch: false, - cloudFetchConcurrentDownloads: 10, -} satisfies GlobalConfig; diff --git a/lib/hive/Commands/BaseCommand.ts b/lib/hive/Commands/BaseCommand.ts index 3fff1946..f059d9e1 100644 --- a/lib/hive/Commands/BaseCommand.ts +++ b/lib/hive/Commands/BaseCommand.ts @@ -1,16 +1,16 @@ import { Thrift } from 'thrift'; import TCLIService from '../../../thrift/TCLIService'; import HiveDriverError from '../../errors/HiveDriverError'; -import globalConfig from '../../globalConfig'; +import IClientContext, { ClientConfig } from '../../contracts/IClientContext'; interface CommandExecutionInfo { startTime: number; // in milliseconds attempt: number; } -function getRetryDelay(attempt: number): number { +function getRetryDelay(attempt: number, config: ClientConfig): number { const scale = Math.max(1, 1.5 ** (attempt - 1)); // ensure scale >= 1 - return Math.min(globalConfig.retryDelayMin * scale, globalConfig.retryDelayMax); + return Math.min(config.retryDelayMin * scale, config.retryDelayMax); } function delay(milliseconds: number): Promise { @@ -22,8 +22,11 @@ function delay(milliseconds: number): Promise { export default abstract class BaseCommand { protected client: TCLIService.Client; - constructor(client: TCLIService.Client) { + protected context: IClientContext; + + constructor(client: TCLIService.Client, context: IClientContext) { this.client = client; + this.context = context; } protected executeCommand(request: object, command: Function | void): Promise { @@ -49,19 +52,21 @@ export default abstract class BaseCommand { case 503: // Service Unavailable info.attempt += 1; + const clientConfig = this.context.getConfig(); + // Delay interval depends on current attempt - the more attempts we do // the longer the interval will be // TODO: Respect `Retry-After` header (PECO-729) - const retryDelay = getRetryDelay(info.attempt); + const retryDelay = getRetryDelay(info.attempt, clientConfig); - const attemptsExceeded = info.attempt >= globalConfig.retryMaxAttempts; + const attemptsExceeded = info.attempt >= clientConfig.retryMaxAttempts; if (attemptsExceeded) { throw new HiveDriverError( `Hive driver: ${error.statusCode} when connecting to resource. Max retry count exceeded.`, ); } - const timeoutExceeded = Date.now() - info.startTime + retryDelay >= globalConfig.retriesTimeout; + const timeoutExceeded = Date.now() - info.startTime + retryDelay >= clientConfig.retriesTimeout; if (timeoutExceeded) { throw new HiveDriverError( `Hive driver: ${error.statusCode} when connecting to resource. Retry timeout exceeded.`, diff --git a/lib/hive/HiveDriver.ts b/lib/hive/HiveDriver.ts index 9b7384b1..7afd03a5 100644 --- a/lib/hive/HiveDriver.ts +++ b/lib/hive/HiveDriver.ts @@ -58,127 +58,127 @@ export default class HiveDriver implements IDriver { async openSession(request: TOpenSessionReq) { const client = await this.context.getClient(); - const action = new OpenSessionCommand(client); + const action = new OpenSessionCommand(client, this.context); return action.execute(request); } async closeSession(request: TCloseSessionReq) { const client = await this.context.getClient(); - const command = new CloseSessionCommand(client); + const command = new CloseSessionCommand(client, this.context); return command.execute(request); } async executeStatement(request: TExecuteStatementReq) { const client = await this.context.getClient(); - const command = new ExecuteStatementCommand(client); + const command = new ExecuteStatementCommand(client, this.context); return command.execute(request); } async getResultSetMetadata(request: TGetResultSetMetadataReq) { const client = await this.context.getClient(); - const command = new GetResultSetMetadataCommand(client); + const command = new GetResultSetMetadataCommand(client, this.context); return command.execute(request); } async fetchResults(request: TFetchResultsReq) { const client = await this.context.getClient(); - const command = new FetchResultsCommand(client); + const command = new FetchResultsCommand(client, this.context); return command.execute(request); } async getInfo(request: TGetInfoReq) { const client = await this.context.getClient(); - const command = new GetInfoCommand(client); + const command = new GetInfoCommand(client, this.context); return command.execute(request); } async getTypeInfo(request: TGetTypeInfoReq) { const client = await this.context.getClient(); - const command = new GetTypeInfoCommand(client); + const command = new GetTypeInfoCommand(client, this.context); return command.execute(request); } async getCatalogs(request: TGetCatalogsReq) { const client = await this.context.getClient(); - const command = new GetCatalogsCommand(client); + const command = new GetCatalogsCommand(client, this.context); return command.execute(request); } async getSchemas(request: TGetSchemasReq) { const client = await this.context.getClient(); - const command = new GetSchemasCommand(client); + const command = new GetSchemasCommand(client, this.context); return command.execute(request); } async getTables(request: TGetTablesReq) { const client = await this.context.getClient(); - const command = new GetTablesCommand(client); + const command = new GetTablesCommand(client, this.context); return command.execute(request); } async getTableTypes(request: TGetTableTypesReq) { const client = await this.context.getClient(); - const command = new GetTableTypesCommand(client); + const command = new GetTableTypesCommand(client, this.context); return command.execute(request); } async getColumns(request: TGetColumnsReq) { const client = await this.context.getClient(); - const command = new GetColumnsCommand(client); + const command = new GetColumnsCommand(client, this.context); return command.execute(request); } async getFunctions(request: TGetFunctionsReq) { const client = await this.context.getClient(); - const command = new GetFunctionsCommand(client); + const command = new GetFunctionsCommand(client, this.context); return command.execute(request); } async getPrimaryKeys(request: TGetPrimaryKeysReq) { const client = await this.context.getClient(); - const command = new GetPrimaryKeysCommand(client); + const command = new GetPrimaryKeysCommand(client, this.context); return command.execute(request); } async getCrossReference(request: TGetCrossReferenceReq) { const client = await this.context.getClient(); - const command = new GetCrossReferenceCommand(client); + const command = new GetCrossReferenceCommand(client, this.context); return command.execute(request); } async getOperationStatus(request: TGetOperationStatusReq) { const client = await this.context.getClient(); - const command = new GetOperationStatusCommand(client); + const command = new GetOperationStatusCommand(client, this.context); return command.execute(request); } async cancelOperation(request: TCancelOperationReq) { const client = await this.context.getClient(); - const command = new CancelOperationCommand(client); + const command = new CancelOperationCommand(client, this.context); return command.execute(request); } async closeOperation(request: TCloseOperationReq) { const client = await this.context.getClient(); - const command = new CloseOperationCommand(client); + const command = new CloseOperationCommand(client, this.context); return command.execute(request); } async getDelegationToken(request: TGetDelegationTokenReq) { const client = await this.context.getClient(); - const command = new GetDelegationTokenCommand(client); + const command = new GetDelegationTokenCommand(client, this.context); return command.execute(request); } async cancelDelegationToken(request: TCancelDelegationTokenReq) { const client = await this.context.getClient(); - const command = new CancelDelegationTokenCommand(client); + const command = new CancelDelegationTokenCommand(client, this.context); return command.execute(request); } async renewDelegationToken(request: TRenewDelegationTokenReq) { const client = await this.context.getClient(); - const command = new RenewDelegationTokenCommand(client); + const command = new RenewDelegationTokenCommand(client, this.context); return command.execute(request); } } diff --git a/lib/result/ArrowResult.ts b/lib/result/ArrowResultConverter.ts similarity index 61% rename from lib/result/ArrowResult.ts rename to lib/result/ArrowResultConverter.ts index b44ae305..1235b2b9 100644 --- a/lib/result/ArrowResult.ts +++ b/lib/result/ArrowResultConverter.ts @@ -1,6 +1,6 @@ import { Buffer } from 'buffer'; import { - tableFromIPC, + Table, Schema, Field, TypeMap, @@ -9,11 +9,13 @@ import { StructRow, MapRow, Vector, + RecordBatch, + RecordBatchReader, util as arrowUtils, } from 'apache-arrow'; -import { TRowSet, TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types'; +import { TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; -import IOperationResult from './IOperationResult'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; import { getSchemaColumns, convertThriftValue } from './utils'; const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils; @@ -21,49 +23,79 @@ const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils; type ArrowSchema = Schema; type ArrowSchemaField = Field>; -export default class ArrowResult implements IOperationResult { +export default class ArrowResultConverter implements IResultsProvider> { protected readonly context: IClientContext; + private readonly source: IResultsProvider>; + private readonly schema: Array; - private readonly arrowSchema?: Buffer; + private reader?: IterableIterator>; + + private pendingRecordBatch?: RecordBatch; - constructor(context: IClientContext, schema?: TTableSchema, arrowSchema?: Buffer) { + constructor(context: IClientContext, source: IResultsProvider>, schema?: TTableSchema) { this.context = context; + this.source = source; this.schema = getSchemaColumns(schema); - this.arrowSchema = arrowSchema; } - async hasPendingData() { - return false; + public async hasMore() { + if (this.schema.length === 0) { + return false; + } + if (this.pendingRecordBatch) { + return true; + } + return this.source.hasMore(); } - async getValue(data?: Array) { - if (this.schema.length === 0 || !this.arrowSchema || !data) { + public async fetchNext(options: ResultsProviderFetchNextOptions) { + if (this.schema.length === 0) { return []; } - const batches = await this.getBatches(data); - if (batches.length === 0) { - return []; + // eslint-disable-next-line no-constant-condition + while (true) { + // It's not possible to know if iterator has more items until trying + // to get the next item. But we need to know if iterator is empty right + // after getting the next item. Therefore, after creating the iterator, + // we get one item more and store it in `pendingRecordBatch`. Next time, + // we use that stored item, and prefetch the next one. Prefetched item + // is therefore the next item we are going to return, so it can be used + // to know if we actually can return anything next time + const recordBatch = this.pendingRecordBatch; + this.pendingRecordBatch = this.prefetch(); + + if (recordBatch) { + const table = new Table(recordBatch); + return this.getRows(table.schema, table.toArray()); + } + + // eslint-disable-next-line no-await-in-loop + const batches = await this.source.fetchNext(options); + if (batches.length === 0) { + this.reader = undefined; + break; + } + + const reader = RecordBatchReader.from(batches); + this.reader = reader[Symbol.iterator](); + this.pendingRecordBatch = this.prefetch(); } - const table = tableFromIPC([this.arrowSchema, ...batches]); - return this.getRows(table.schema, table.toArray()); + return []; } - protected async getBatches(data: Array): Promise> { - const result: Array = []; + private prefetch(): RecordBatch | undefined { + const item = this.reader?.next() ?? { done: true, value: undefined }; - data.forEach((rowSet) => { - rowSet.arrowBatches?.forEach((arrowBatch) => { - if (arrowBatch.batch) { - result.push(arrowBatch.batch); - } - }); - }); + if (item.done || item.value === undefined) { + this.reader = undefined; + return undefined; + } - return result; + return item.value; } private getRows(schema: ArrowSchema, rows: Array): Array { diff --git a/lib/result/ArrowResultHandler.ts b/lib/result/ArrowResultHandler.ts new file mode 100644 index 00000000..6978a7d6 --- /dev/null +++ b/lib/result/ArrowResultHandler.ts @@ -0,0 +1,46 @@ +import { Buffer } from 'buffer'; +import { TRowSet } from '../../thrift/TCLIService_types'; +import IClientContext from '../contracts/IClientContext'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; + +export default class ArrowResultHandler implements IResultsProvider> { + protected readonly context: IClientContext; + + private readonly source: IResultsProvider; + + private readonly arrowSchema?: Buffer; + + constructor(context: IClientContext, source: IResultsProvider, arrowSchema?: Buffer) { + this.context = context; + this.source = source; + this.arrowSchema = arrowSchema; + } + + public async hasMore() { + if (!this.arrowSchema) { + return false; + } + return this.source.hasMore(); + } + + public async fetchNext(options: ResultsProviderFetchNextOptions) { + if (!this.arrowSchema) { + return []; + } + + const rowSet = await this.source.fetchNext(options); + + const batches: Array = []; + rowSet?.arrowBatches?.forEach((arrowBatch) => { + if (arrowBatch.batch) { + batches.push(arrowBatch.batch); + } + }); + + if (batches.length === 0) { + return []; + } + + return [this.arrowSchema, ...batches]; + } +} diff --git a/lib/result/CloudFetchResult.ts b/lib/result/CloudFetchResult.ts deleted file mode 100644 index 31fbd633..00000000 --- a/lib/result/CloudFetchResult.ts +++ /dev/null @@ -1,63 +0,0 @@ -import { Buffer } from 'buffer'; -import fetch, { RequestInfo, RequestInit } from 'node-fetch'; -import { TRowSet, TSparkArrowResultLink, TTableSchema } from '../../thrift/TCLIService_types'; -import IClientContext from '../contracts/IClientContext'; -import ArrowResult from './ArrowResult'; -import globalConfig from '../globalConfig'; - -export default class CloudFetchResult extends ArrowResult { - private pendingLinks: Array = []; - - private downloadedBatches: Array = []; - - constructor(context: IClientContext, schema?: TTableSchema) { - // Arrow schema returned in metadata is not needed for CloudFetch results: - // each batch already contains schema and could be decoded as is - super(context, schema, Buffer.alloc(0)); - } - - async hasPendingData() { - return this.pendingLinks.length > 0 || this.downloadedBatches.length > 0; - } - - protected async getBatches(data: Array): Promise> { - data.forEach((item) => { - item.resultLinks?.forEach((link) => { - this.pendingLinks.push(link); - }); - }); - - if (this.downloadedBatches.length === 0) { - const links = this.pendingLinks.splice(0, globalConfig.cloudFetchConcurrentDownloads); - const tasks = links.map((link) => this.downloadLink(link)); - const batches = await Promise.all(tasks); - this.downloadedBatches.push(...batches); - } - - return this.downloadedBatches.splice(0, 1); - } - - private async downloadLink(link: TSparkArrowResultLink): Promise { - if (Date.now() >= link.expiryTime.toNumber()) { - throw new Error('CloudFetch link has expired'); - } - - const response = await this.fetch(link.fileLink); - if (!response.ok) { - throw new Error(`CloudFetch HTTP error ${response.status} ${response.statusText}`); - } - - const result = await response.arrayBuffer(); - return Buffer.from(result); - } - - private async fetch(url: RequestInfo, init?: RequestInit) { - const connectionProvider = await this.context.getConnectionProvider(); - const agent = await connectionProvider.getAgent(); - - return fetch(url, { - agent, - ...init, - }); - } -} diff --git a/lib/result/CloudFetchResultHandler.ts b/lib/result/CloudFetchResultHandler.ts new file mode 100644 index 00000000..f1743628 --- /dev/null +++ b/lib/result/CloudFetchResultHandler.ts @@ -0,0 +1,71 @@ +import { Buffer } from 'buffer'; +import fetch, { RequestInfo, RequestInit } from 'node-fetch'; +import { TRowSet, TSparkArrowResultLink } from '../../thrift/TCLIService_types'; +import IClientContext from '../contracts/IClientContext'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; + +export default class CloudFetchResultHandler implements IResultsProvider> { + protected readonly context: IClientContext; + + private readonly source: IResultsProvider; + + private pendingLinks: Array = []; + + private downloadTasks: Array> = []; + + constructor(context: IClientContext, source: IResultsProvider) { + this.context = context; + this.source = source; + } + + public async hasMore() { + if (this.pendingLinks.length > 0 || this.downloadTasks.length > 0) { + return true; + } + return this.source.hasMore(); + } + + public async fetchNext(options: ResultsProviderFetchNextOptions) { + const data = await this.source.fetchNext(options); + + data?.resultLinks?.forEach((link) => { + this.pendingLinks.push(link); + }); + + const clientConfig = this.context.getConfig(); + const freeTaskSlotsCount = clientConfig.cloudFetchConcurrentDownloads - this.downloadTasks.length; + + if (freeTaskSlotsCount > 0) { + const links = this.pendingLinks.splice(0, freeTaskSlotsCount); + const tasks = links.map((link) => this.downloadLink(link)); + this.downloadTasks.push(...tasks); + } + + const batch = await this.downloadTasks.shift(); + return batch ? [batch] : []; + } + + private async downloadLink(link: TSparkArrowResultLink): Promise { + if (Date.now() >= link.expiryTime.toNumber()) { + throw new Error('CloudFetch link has expired'); + } + + const response = await this.fetch(link.fileLink); + if (!response.ok) { + throw new Error(`CloudFetch HTTP error ${response.status} ${response.statusText}`); + } + + const result = await response.arrayBuffer(); + return Buffer.from(result); + } + + private async fetch(url: RequestInfo, init?: RequestInit) { + const connectionProvider = await this.context.getConnectionProvider(); + const agent = await connectionProvider.getAgent(); + + return fetch(url, { + agent, + ...init, + }); + } +} diff --git a/lib/result/IOperationResult.ts b/lib/result/IOperationResult.ts deleted file mode 100644 index 7b42a196..00000000 --- a/lib/result/IOperationResult.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { TRowSet } from '../../thrift/TCLIService_types'; - -export default interface IOperationResult { - getValue(data?: Array): Promise; - - hasPendingData(): Promise; -} diff --git a/lib/result/IResultsProvider.ts b/lib/result/IResultsProvider.ts new file mode 100644 index 00000000..0e521f71 --- /dev/null +++ b/lib/result/IResultsProvider.ts @@ -0,0 +1,9 @@ +export interface ResultsProviderFetchNextOptions { + limit: number; +} + +export default interface IResultsProvider { + fetchNext(options: ResultsProviderFetchNextOptions): Promise; + + hasMore(): Promise; +} diff --git a/lib/result/JsonResult.ts b/lib/result/JsonResultHandler.ts similarity index 73% rename from lib/result/JsonResult.ts rename to lib/result/JsonResultHandler.ts index 0c7daefa..bcc07e77 100644 --- a/lib/result/JsonResult.ts +++ b/lib/result/JsonResultHandler.ts @@ -1,34 +1,38 @@ import { ColumnCode } from '../hive/Types'; import { TRowSet, TTableSchema, TColumn, TColumnDesc } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; -import IOperationResult from './IOperationResult'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; import { getSchemaColumns, convertThriftValue } from './utils'; -export default class JsonResult implements IOperationResult { +export default class JsonResultHandler implements IResultsProvider> { private readonly context: IClientContext; + private readonly source: IResultsProvider; + private readonly schema: Array; - constructor(context: IClientContext, schema?: TTableSchema) { + constructor(context: IClientContext, source: IResultsProvider, schema?: TTableSchema) { this.context = context; + this.source = source; this.schema = getSchemaColumns(schema); } - async hasPendingData() { - return false; + public async hasMore() { + return this.source.hasMore(); } - async getValue(data?: Array): Promise> { - if (this.schema.length === 0 || !data) { + public async fetchNext(options: ResultsProviderFetchNextOptions) { + if (this.schema.length === 0) { return []; } - return data.reduce((result: Array, rowSet: TRowSet) => { - const columns = rowSet.columns || []; - const rows = this.getRows(columns, this.schema); + const data = await this.source.fetchNext(options); + if (!data) { + return []; + } - return result.concat(rows); - }, []); + const columns = data.columns || []; + return this.getRows(columns, this.schema); } private getRows(columns: Array, descriptors: Array): Array { diff --git a/lib/result/ResultSlicer.ts b/lib/result/ResultSlicer.ts new file mode 100644 index 00000000..0f640a9a --- /dev/null +++ b/lib/result/ResultSlicer.ts @@ -0,0 +1,74 @@ +import IClientContext from '../contracts/IClientContext'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; + +export interface ResultSlicerFetchNextOptions extends ResultsProviderFetchNextOptions { + // Setting this to `true` will disable slicer, and it will return unprocessed chunks + // from underlying results provider + disableBuffering?: boolean; +} + +export default class ResultSlicer implements IResultsProvider> { + private readonly context: IClientContext; + + private readonly source: IResultsProvider>; + + private remainingResults: Array = []; + + constructor(context: IClientContext, source: IResultsProvider>) { + this.context = context; + this.source = source; + } + + public async hasMore(): Promise { + if (this.remainingResults.length > 0) { + return true; + } + return this.source.hasMore(); + } + + public async fetchNext(options: ResultSlicerFetchNextOptions): Promise> { + // If we're asked to not use buffer - first try to return whatever we have in buffer. + // If buffer is empty - just proxy the call to underlying results provider + if (options.disableBuffering) { + if (this.remainingResults.length > 0) { + const result = this.remainingResults; + this.remainingResults = []; + return result; + } + + return this.source.fetchNext(options); + } + + const result: Array> = []; + let resultsCount = 0; + + // First, use remaining items from the previous fetch + if (this.remainingResults.length > 0) { + result.push(this.remainingResults); + resultsCount += this.remainingResults.length; + this.remainingResults = []; + } + + // Fetch items from source results provider until we reach a requested count + while (resultsCount < options.limit) { + // eslint-disable-next-line no-await-in-loop + const chunk = await this.source.fetchNext(options); + if (chunk.length === 0) { + break; + } + + result.push(chunk); + resultsCount += chunk.length; + } + + // If we collected more results than requested, slice the excess items and store them for the next time + if (resultsCount > options.limit) { + const lastChunk = result.pop() ?? []; + const neededCount = options.limit - (resultsCount - lastChunk.length); + result.push(lastChunk.splice(0, neededCount)); + this.remainingResults = lastChunk; + } + + return result.flat(); + } +} diff --git a/lib/DBSQLOperation/FetchResultsHelper.ts b/lib/result/RowSetProvider.ts similarity index 60% rename from lib/DBSQLOperation/FetchResultsHelper.ts rename to lib/result/RowSetProvider.ts index 79f82603..5661d208 100644 --- a/lib/DBSQLOperation/FetchResultsHelper.ts +++ b/lib/result/RowSetProvider.ts @@ -8,6 +8,7 @@ import { import { ColumnCode, FetchType, Int64 } from '../hive/Types'; import Status from '../dto/Status'; import IClientContext from '../contracts/IClientContext'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; function checkIfOperationHasMoreRows(response: TFetchResultsResp): boolean { if (response.hasMoreRows) { @@ -35,7 +36,7 @@ function checkIfOperationHasMoreRows(response: TFetchResultsResp): boolean { return (columnValue?.values?.length || 0) > 0; } -export default class FetchResultsHelper { +export default class RowSetProvider implements IResultsProvider { private readonly context: IClientContext; private readonly operationHandle: TOperationHandle; @@ -46,7 +47,16 @@ export default class FetchResultsHelper { private readonly returnOnlyPrefetchedResults: boolean; - public hasMoreRows: boolean = false; + private hasMoreRowsFlag?: boolean = undefined; + + private get hasMoreRows(): boolean { + // `hasMoreRowsFlag` is populated only after fetching the first row set. + // Prior to that, we use a `operationHandle.hasResultSet` flag which + // is set if there are any data at all. Also, we have to choose appropriate + // flag in a getter because both `hasMoreRowsFlag` and `operationHandle.hasResultSet` + // may change between this getter calls + return this.hasMoreRowsFlag ?? this.operationHandle.hasResultSet; + } constructor( context: IClientContext, @@ -67,32 +77,48 @@ export default class FetchResultsHelper { private processFetchResponse(response: TFetchResultsResp): TRowSet | undefined { Status.assert(response.status); this.fetchOrientation = TFetchOrientation.FETCH_NEXT; - - if (this.prefetchedResults.length > 0) { - this.hasMoreRows = true; - } else if (this.returnOnlyPrefetchedResults) { - this.hasMoreRows = false; - } else { - this.hasMoreRows = checkIfOperationHasMoreRows(response); - } - + this.hasMoreRowsFlag = checkIfOperationHasMoreRows(response); return response.results; } - public async fetch(maxRows: number) { + public async fetchNext({ limit }: ResultsProviderFetchNextOptions) { const prefetchedResponse = this.prefetchedResults.shift(); if (prefetchedResponse) { return this.processFetchResponse(prefetchedResponse); } + // We end up here if no more prefetched results available (checked above) + if (this.returnOnlyPrefetchedResults) { + return undefined; + } + + // Don't fetch next chunk if there are no more data available + if (!this.hasMoreRows) { + return undefined; + } + const driver = await this.context.getDriver(); const response = await driver.fetchResults({ operationHandle: this.operationHandle, orientation: this.fetchOrientation, - maxRows: new Int64(maxRows), + maxRows: new Int64(limit), fetchType: FetchType.Data, }); return this.processFetchResponse(response); } + + public async hasMore() { + // If there are prefetched results available - return `true` regardless of + // the actual state of `hasMoreRows` flag (because we actually have some data) + if (this.prefetchedResults.length > 0) { + return true; + } + // We end up here if no more prefetched results available (checked above) + if (this.returnOnlyPrefetchedResults) { + return false; + } + + return this.hasMoreRows; + } } diff --git a/package-lock.json b/package-lock.json index 571bc7d4..51ddbe11 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@databricks/sql", - "version": "1.6.1", + "version": "1.7.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@databricks/sql", - "version": "1.6.1", + "version": "1.7.0", "license": "Apache 2.0", "dependencies": { "apache-arrow": "^13.0.0", @@ -37,6 +37,7 @@ "eslint-plugin-jsx-a11y": "^6.6.1", "eslint-plugin-react": "^7.30.1", "eslint-plugin-react-hooks": "^4.6.0", + "http-proxy": "^1.18.1", "mocha": "^10.2.0", "nyc": "^15.1.0", "prettier": "^2.8.4", @@ -2644,6 +2645,12 @@ "node": ">=0.10.0" } }, + "node_modules/eventemitter3": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", + "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", + "dev": true + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", @@ -2810,6 +2817,26 @@ "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" }, + "node_modules/follow-redirects": { + "version": "1.15.3", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.3.tgz", + "integrity": "sha512-1VzOtuEM8pC9SFU1E+8KfTjZyMztRsgEfwQl44z8A25uy13jSzTj6dyK2Df52iV0vgHCfBwLhDWevLn95w5v6Q==", + "dev": true, + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, "node_modules/foreground-child": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-2.0.0.tgz", @@ -3195,6 +3222,20 @@ "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", "dev": true }, + "node_modules/http-proxy": { + "version": "1.18.1", + "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.18.1.tgz", + "integrity": "sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==", + "dev": true, + "dependencies": { + "eventemitter3": "^4.0.0", + "follow-redirects": "^1.0.0", + "requires-port": "^1.0.0" + }, + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/http-proxy-agent": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz", @@ -5063,6 +5104,12 @@ "integrity": "sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==", "dev": true }, + "node_modules/requires-port": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", + "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==", + "dev": true + }, "node_modules/resolve": { "version": "1.22.1", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.1.tgz", @@ -8007,6 +8054,12 @@ "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==" }, + "eventemitter3": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", + "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", + "dev": true + }, "fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", @@ -8142,6 +8195,12 @@ "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" }, + "follow-redirects": { + "version": "1.15.3", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.3.tgz", + "integrity": "sha512-1VzOtuEM8pC9SFU1E+8KfTjZyMztRsgEfwQl44z8A25uy13jSzTj6dyK2Df52iV0vgHCfBwLhDWevLn95w5v6Q==", + "dev": true + }, "foreground-child": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-2.0.0.tgz", @@ -8409,6 +8468,17 @@ "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", "dev": true }, + "http-proxy": { + "version": "1.18.1", + "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.18.1.tgz", + "integrity": "sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==", + "dev": true, + "requires": { + "eventemitter3": "^4.0.0", + "follow-redirects": "^1.0.0", + "requires-port": "^1.0.0" + } + }, "http-proxy-agent": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz", @@ -9793,6 +9863,12 @@ "integrity": "sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==", "dev": true }, + "requires-port": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", + "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==", + "dev": true + }, "resolve": { "version": "1.22.1", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.1.tgz", diff --git a/package.json b/package.json index 34d947bd..61d627ca 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@databricks/sql", - "version": "1.6.1", + "version": "1.7.0", "description": "Driver for connection to Databricks SQL via Thrift API.", "main": "dist/index.js", "types": "dist/index.d.ts", @@ -63,6 +63,7 @@ "eslint-plugin-jsx-a11y": "^6.6.1", "eslint-plugin-react": "^7.30.1", "eslint-plugin-react-hooks": "^4.6.0", + "http-proxy": "^1.18.1", "mocha": "^10.2.0", "nyc": "^15.1.0", "prettier": "^2.8.4", diff --git a/tests/e2e/arrow.test.js b/tests/e2e/arrow.test.js index a75c3059..d23c9552 100644 --- a/tests/e2e/arrow.test.js +++ b/tests/e2e/arrow.test.js @@ -1,9 +1,11 @@ const { expect } = require('chai'); +const sinon = require('sinon'); const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const ArrowResult = require('../../dist/result/ArrowResult').default; -const globalConfig = require('../../dist/globalConfig').default; +const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; +const ArrowResultConverter = require('../../dist/result/ArrowResultConverter').default; +const ResultSlicer = require('../../dist/result/ResultSlicer').default; const fixtures = require('../fixtures/compatibility'); const { expected: expectedColumn } = require('../fixtures/compatibility/column'); @@ -11,9 +13,15 @@ const { expected: expectedArrow } = require('../fixtures/compatibility/arrow'); const { expected: expectedArrowNativeTypes } = require('../fixtures/compatibility/arrow_native_types'); const { fixArrowResult } = fixtures; -async function openSession() { +async function openSession(customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -50,9 +58,9 @@ async function initializeTable(session, tableName) { describe('Arrow support', () => { const tableName = `dbsql_nodejs_sdk_e2e_arrow_${config.tableSuffix}`; - function createTest(testBody) { + function createTest(testBody, customConfig) { return async () => { - const session = await openSession(); + const session = await openSession(customConfig); try { await initializeTable(session, tableName); await testBody(session); @@ -68,60 +76,74 @@ describe('Arrow support', () => { it( 'should not use arrow if disabled', - createTest(async (session) => { - globalConfig.arrowEnabled = false; - - const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); - const result = await operation.fetchAll(); - expect(result).to.deep.equal(expectedColumn); - - const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.not.instanceof(ArrowResult); - - await operation.close(); - }), + createTest( + async (session) => { + const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); + const result = await operation.fetchAll(); + expect(result).to.deep.equal(expectedColumn); + + const resultHandler = await operation.getResultHandler(); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.not.instanceof(ArrowResultConverter); + + await operation.close(); + }, + { + arrowEnabled: false, + }, + ), ); it( 'should use arrow with native types disabled', - createTest(async (session) => { - globalConfig.arrowEnabled = true; - globalConfig.useArrowNativeTypes = false; - - const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); - const result = await operation.fetchAll(); - expect(fixArrowResult(result)).to.deep.equal(expectedArrow); - - const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResult); - - await operation.close(); - }), + createTest( + async (session) => { + const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); + const result = await operation.fetchAll(); + expect(fixArrowResult(result)).to.deep.equal(expectedArrow); + + const resultHandler = await operation.getResultHandler(); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.instanceof(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceof(ArrowResultHandler); + + await operation.close(); + }, + { + arrowEnabled: true, + useArrowNativeTypes: false, + }, + ), ); it( 'should use arrow with native types enabled', - createTest(async (session) => { - globalConfig.arrowEnabled = true; - globalConfig.useArrowNativeTypes = true; - - const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); - const result = await operation.fetchAll(); - expect(fixArrowResult(result)).to.deep.equal(expectedArrowNativeTypes); - - const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResult); - - await operation.close(); - }), + createTest( + async (session) => { + const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); + const result = await operation.fetchAll(); + expect(fixArrowResult(result)).to.deep.equal(expectedArrowNativeTypes); + + const resultHandler = await operation.getResultHandler(); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.instanceof(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceof(ArrowResultHandler); + + await operation.close(); + }, + { + arrowEnabled: true, + useArrowNativeTypes: true, + }, + ), ); it('should handle multiple batches in response', async () => { - globalConfig.arrowEnabled = true; - const rowsCount = 10000; - const session = await openSession(); + const session = await openSession({ + arrowEnabled: true, + }); const operation = await session.executeStatement(` SELECT * FROM range(0, ${rowsCount}) AS t1 @@ -130,14 +152,20 @@ describe('Arrow support', () => { // We use some internals here to check that server returned response with multiple batches const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResult); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.instanceof(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceof(ArrowResultHandler); + + sinon.spy(operation._data, 'fetchNext'); + + const result = await resultHandler.fetchNext({ limit: rowsCount }); - const rawData = await operation._data.fetch(rowsCount); + expect(operation._data.fetchNext.callCount).to.be.eq(1); + const rawData = await operation._data.fetchNext.firstCall.returnValue; // We don't know exact count of batches returned, it depends on server's configuration, // but with much enough rows there should be more than one result batch expect(rawData.arrowBatches?.length).to.be.gt(1); - const result = await resultHandler.getValue([rawData]); expect(result.length).to.be.eq(rowsCount); }); }); diff --git a/tests/e2e/batched_fetch.test.js b/tests/e2e/batched_fetch.test.js index 5218088e..e22e1a8e 100644 --- a/tests/e2e/batched_fetch.test.js +++ b/tests/e2e/batched_fetch.test.js @@ -3,11 +3,16 @@ const sinon = require('sinon'); const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const globalConfig = require('../../dist/globalConfig').default; -const openSession = async () => { +async function openSession(customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -18,17 +23,9 @@ const openSession = async () => { initialCatalog: config.database[0], initialSchema: config.database[1], }); -}; +} describe('Data fetching', () => { - beforeEach(() => { - globalConfig.arrowEnabled = false; - }); - - afterEach(() => { - globalConfig.arrowEnabled = true; - }); - const query = ` SELECT * FROM range(0, 1000) AS t1 @@ -36,12 +33,14 @@ describe('Data fetching', () => { `; it('fetch chunks should return a max row set of chunkSize', async () => { - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); sinon.spy(session.context.driver, 'fetchResults'); try { // set `maxRows` to null to disable direct results so all the data are fetched through `driver.fetchResults` const operation = await session.executeStatement(query, { maxRows: null }); - let chunkedOp = await operation.fetchChunk({ maxRows: 10 }).catch((error) => logger(error)); + let chunkedOp = await operation + .fetchChunk({ maxRows: 10, disableBuffering: true }) + .catch((error) => logger(error)); expect(chunkedOp.length).to.be.equal(10); // we explicitly requested only one chunk expect(session.context.driver.fetchResults.callCount).to.equal(1); @@ -50,8 +49,35 @@ describe('Data fetching', () => { } }); + it('fetch chunks should respect maxRows', async () => { + const session = await openSession({ arrowEnabled: false }); + + const chunkSize = 300; + const lastChunkSize = 100; // 1000 % chunkSize + + try { + const operation = await session.executeStatement(query, { maxRows: 500 }); + + let hasMoreRows = true; + let chunkCount = 0; + + while (hasMoreRows) { + let chunkedOp = await operation.fetchChunk({ maxRows: 300 }); + chunkCount += 1; + hasMoreRows = await operation.hasMoreRows(); + + const isLastChunk = !hasMoreRows; + expect(chunkedOp.length).to.be.equal(isLastChunk ? lastChunkSize : chunkSize); + } + + expect(chunkCount).to.be.equal(4); // 1000 = 3*300 + 1*100 + } finally { + await session.close(); + } + }); + it('fetch all should fetch all records', async () => { - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); sinon.spy(session.context.driver, 'fetchResults'); try { // set `maxRows` to null to disable direct results so all the data are fetched through `driver.fetchResults` @@ -66,7 +92,7 @@ describe('Data fetching', () => { }); it('should fetch all records if they fit within directResults response', async () => { - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); sinon.spy(session.context.driver, 'fetchResults'); try { // here `maxRows` enables direct results with limit of the first batch @@ -81,7 +107,7 @@ describe('Data fetching', () => { }); it('should fetch all records if only part of them fit within directResults response', async () => { - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); sinon.spy(session.context.driver, 'fetchResults'); try { // here `maxRows` enables direct results with limit of the first batch diff --git a/tests/e2e/cloudfetch.test.js b/tests/e2e/cloudfetch.test.js index 3997f6af..4dc41e43 100644 --- a/tests/e2e/cloudfetch.test.js +++ b/tests/e2e/cloudfetch.test.js @@ -1,14 +1,20 @@ const { expect } = require('chai'); const sinon = require('sinon'); const config = require('./utils/config'); -const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const CloudFetchResult = require('../../dist/result/CloudFetchResult').default; -const globalConfig = require('../../dist/globalConfig').default; +const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; +const ArrowResultConverter = require('../../dist/result/ArrowResultConverter').default; +const ResultSlicer = require('../../dist/result/ResultSlicer').default; -const openSession = async () => { +async function openSession(customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -19,25 +25,14 @@ const openSession = async () => { initialCatalog: config.database[0], initialSchema: config.database[1], }); -}; +} // This suite takes a while to execute, and in this case it's expected. // If one day it starts to fail with timeouts - you may consider to just increase timeout for it describe('CloudFetch', () => { - let savedConcurrentDownloads; - - beforeEach(() => { - savedConcurrentDownloads = globalConfig.cloudFetchConcurrentDownloads; - }); - - afterEach(() => { - globalConfig.cloudFetchConcurrentDownloads = savedConcurrentDownloads; - }); - it('should fetch data', async () => { - globalConfig.cloudFetchConcurrentDownloads = 5; - - const session = await openSession(); + const cloudFetchConcurrentDownloads = 5; + const session = await openSession({ cloudFetchConcurrentDownloads }); const queriedRowsCount = 10000000; // result has to be quite big to enable CloudFetch const operation = await session.executeStatement( @@ -47,7 +42,7 @@ describe('CloudFetch', () => { LEFT JOIN (SELECT 1) AS t2 `, { - maxRows: 100000, + maxRows: null, // disable DirectResults useCloudFetch: true, // tell server that we would like to use CloudFetch }, ); @@ -57,33 +52,35 @@ describe('CloudFetch', () => { // Check if we're actually getting data via CloudFetch const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceOf(CloudFetchResult); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.instanceof(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceOf(CloudFetchResultHandler); + + const cfResultHandler = resultHandler.source.source; // Fetch first chunk and check if result handler behaves properly. // With the count of rows we queried, there should be at least one row set, // containing 8 result links. After fetching the first chunk, // result handler should download 5 of them and schedule the rest - expect(await resultHandler.hasPendingData()).to.be.false; - expect(resultHandler.pendingLinks.length).to.be.equal(0); - expect(resultHandler.downloadedBatches.length).to.be.equal(0); + expect(await cfResultHandler.hasMore()).to.be.true; + expect(cfResultHandler.pendingLinks.length).to.be.equal(0); + expect(cfResultHandler.downloadTasks.length).to.be.equal(0); - sinon.spy(operation._data, 'fetch'); + sinon.spy(operation._data, 'fetchNext'); - const chunk = await operation.fetchChunk({ maxRows: 100000 }); + const chunk = await operation.fetchChunk({ maxRows: 100000, disableBuffering: true }); // Count links returned from server - const resultSet = await operation._data.fetch.firstCall.returnValue; + const resultSet = await operation._data.fetchNext.firstCall.returnValue; const resultLinksCount = resultSet?.resultLinks?.length ?? 0; - expect(await resultHandler.hasPendingData()).to.be.true; + expect(await cfResultHandler.hasMore()).to.be.true; // expected batches minus first 5 already fetched - expect(resultHandler.pendingLinks.length).to.be.equal( - resultLinksCount - globalConfig.cloudFetchConcurrentDownloads, - ); - expect(resultHandler.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 1); + expect(cfResultHandler.pendingLinks.length).to.be.equal(resultLinksCount - cloudFetchConcurrentDownloads); + expect(cfResultHandler.downloadTasks.length).to.be.equal(cloudFetchConcurrentDownloads - 1); let fetchedRowCount = chunk.length; while (await operation.hasMoreRows()) { - const chunk = await operation.fetchChunk({ maxRows: 100000 }); + const chunk = await operation.fetchChunk({ maxRows: 100000, disableBuffering: true }); fetchedRowCount += chunk.length; } diff --git a/tests/e2e/data_types.test.js b/tests/e2e/data_types.test.js index 8308cc12..59c24856 100644 --- a/tests/e2e/data_types.test.js +++ b/tests/e2e/data_types.test.js @@ -1,12 +1,18 @@ const { expect } = require('chai'); +const sinon = require('sinon'); const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const globalConfig = require('../../dist/globalConfig').default; -const openSession = async () => { +async function openSession(customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -17,7 +23,7 @@ const openSession = async () => { initialCatalog: config.database[0], initialSchema: config.database[1], }); -}; +} const execute = async (session, statement) => { const operation = await session.executeStatement(statement); @@ -39,18 +45,10 @@ function removeTrailingMetadata(columns) { } describe('Data types', () => { - beforeEach(() => { - globalConfig.arrowEnabled = false; - }); - - afterEach(() => { - globalConfig.arrowEnabled = true; - }); - it('primitive data types should presented correctly', async () => { const table = `dbsql_nodejs_sdk_e2e_primitive_types_${config.tableSuffix}`; - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); try { await execute(session, `DROP TABLE IF EXISTS ${table}`); await execute( @@ -201,7 +199,7 @@ describe('Data types', () => { it('interval types should be presented correctly', async () => { const table = `dbsql_nodejs_sdk_e2e_interval_types_${config.tableSuffix}`; - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); try { await execute(session, `DROP TABLE IF EXISTS ${table}`); await execute( @@ -246,7 +244,7 @@ describe('Data types', () => { const table = `dbsql_nodejs_sdk_e2e_complex_types_${config.tableSuffix}`; const helperTable = `dbsql_nodejs_sdk_e2e_complex_types_helper_${config.tableSuffix}`; - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); try { await execute(session, `DROP TABLE IF EXISTS ${helperTable}`); await execute(session, `DROP TABLE IF EXISTS ${table}`); diff --git a/tests/e2e/proxy.test.js b/tests/e2e/proxy.test.js new file mode 100644 index 00000000..eab37261 --- /dev/null +++ b/tests/e2e/proxy.test.js @@ -0,0 +1,75 @@ +const { expect } = require('chai'); +const sinon = require('sinon'); +const httpProxy = require('http-proxy'); +const https = require('https'); +const config = require('./utils/config'); +const { DBSQLClient } = require('../..'); + +class HttpProxyMock { + constructor(target, port) { + this.requests = []; + + this.config = { + protocol: 'http', + host: 'localhost', + port, + }; + + this.target = `https://${config.host}`; + + this.proxy = httpProxy.createServer({ + target: this.target, + agent: new https.Agent({ + rejectUnauthorized: false, + }), + }); + + this.proxy.on('proxyRes', (proxyRes) => { + const req = proxyRes.req; + this.requests.push({ + method: req.method?.toUpperCase(), + url: `${req.protocol}//${req.host}${req.path}`, + requestHeaders: { ...req.getHeaders() }, + responseHeaders: proxyRes.headers, + }); + }); + + this.proxy.listen(port); + console.log(`Proxy listening at ${this.config.host}:${this.config.port} -> ${this.target}`); + } + + close() { + this.proxy.close(() => { + console.log(`Proxy stopped at ${this.config.host}:${this.config.port}`); + }); + } +} + +describe('Proxy', () => { + it('should use http proxy', async () => { + const proxy = new HttpProxyMock(`https://${config.host}`, 9090); + try { + const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns(clientConfig); + + const connection = await client.connect({ + host: config.host, + path: config.path, + token: config.token, + proxy: proxy.config, + }); + + const session = await connection.openSession({ + initialCatalog: config.database[0], + initialSchema: config.database[1], + }); + + expect(proxy.requests.length).to.be.gte(1); + expect(proxy.requests[0].method).to.be.eq('POST'); + expect(proxy.requests[0].url).to.be.eq(`https://${config.host}${config.path}`); + } finally { + proxy.close(); + } + }); +}); diff --git a/tests/e2e/query_parameters.test.js b/tests/e2e/query_parameters.test.js index 6f7fdf00..6efa5160 100644 --- a/tests/e2e/query_parameters.test.js +++ b/tests/e2e/query_parameters.test.js @@ -19,9 +19,8 @@ const openSession = async () => { }); }; -// TODO: Temporarily disable those tests until we figure out issues with E2E test env describe('Query parameters', () => { - it.skip('should use named parameters', async () => { + it('should use named parameters', async () => { const session = await openSession(); const operation = await session.executeStatement( ` @@ -72,7 +71,7 @@ describe('Query parameters', () => { ]); }); - it.skip('should accept primitives as values for named parameters', async () => { + it('should accept primitives as values for named parameters', async () => { const session = await openSession(); const operation = await session.executeStatement( ` @@ -117,7 +116,7 @@ describe('Query parameters', () => { ]); }); - it.skip('should use ordinal parameters', async () => { + it('should use ordinal parameters', async () => { const session = await openSession(); const operation = await session.executeStatement( ` @@ -168,7 +167,7 @@ describe('Query parameters', () => { ]); }); - it.skip('should accept primitives as values for ordinal parameters', async () => { + it('should accept primitives as values for ordinal parameters', async () => { const session = await openSession(); const operation = await session.executeStatement( ` diff --git a/tests/e2e/timeouts.test.js b/tests/e2e/timeouts.test.js index fdf495c3..c535ce6b 100644 --- a/tests/e2e/timeouts.test.js +++ b/tests/e2e/timeouts.test.js @@ -1,11 +1,17 @@ const { expect, AssertionError } = require('chai'); +const sinon = require('sinon'); const config = require('./utils/config'); const { DBSQLClient } = require('../..'); -const globalConfig = require('../../dist/globalConfig').default; -const openSession = async (socketTimeout) => { +async function openSession(socketTimeout, customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -17,37 +23,26 @@ const openSession = async (socketTimeout) => { initialCatalog: config.database[0], initialSchema: config.database[1], }); -}; - -describe('Data fetching', () => { - const query = ` - SELECT * - FROM range(0, 100000) AS t1 - LEFT JOIN (SELECT 1) AS t2 - ORDER BY RANDOM() ASC - `; +} +describe('Timeouts', () => { const socketTimeout = 1; // minimum value to make sure any request will time out it('should use default socket timeout', async () => { - const savedTimeout = globalConfig.socketTimeout; - globalConfig.socketTimeout = socketTimeout; try { - await openSession(); + await openSession(undefined, { socketTimeout }); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { throw error; } expect(error.message).to.be.eq('Request timed out'); - } finally { - globalConfig.socketTimeout = savedTimeout; } }); it('should use socket timeout from options', async () => { try { - await await openSession(socketTimeout); + await openSession(socketTimeout); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { diff --git a/tests/unit/DBSQLClient.test.js b/tests/unit/DBSQLClient.test.js index 55231707..b1a1f3f2 100644 --- a/tests/unit/DBSQLClient.test.js +++ b/tests/unit/DBSQLClient.test.js @@ -184,7 +184,7 @@ describe('DBSQLClient.getClient', () => { const thriftClient = {}; client.authProvider = new AuthProviderMock(); - client.connectionProvider = new HttpConnection({ ...options }); + client.connectionProvider = new HttpConnection({ ...options }, client); client.thrift = { createClient: sinon.stub().returns(thriftClient), }; @@ -199,7 +199,7 @@ describe('DBSQLClient.getClient', () => { const thriftClient = {}; - client.connectionProvider = new HttpConnection({ ...options }); + client.connectionProvider = new HttpConnection({ ...options }, client); client.thrift = { createClient: sinon.stub().returns(thriftClient), }; diff --git a/tests/unit/DBSQLOperation.test.js b/tests/unit/DBSQLOperation.test.js index 94834baf..99cc1e66 100644 --- a/tests/unit/DBSQLOperation.test.js +++ b/tests/unit/DBSQLOperation.test.js @@ -6,9 +6,11 @@ const DBSQLOperation = require('../../dist/DBSQLOperation').default; const StatusError = require('../../dist/errors/StatusError').default; const OperationStateError = require('../../dist/errors/OperationStateError').default; const HiveDriverError = require('../../dist/errors/HiveDriverError').default; -const JsonResult = require('../../dist/result/JsonResult').default; -const ArrowResult = require('../../dist/result/ArrowResult').default; -const CloudFetchResult = require('../../dist/result/CloudFetchResult').default; +const JsonResultHandler = require('../../dist/result/JsonResultHandler').default; +const ArrowResultConverter = require('../../dist/result/ArrowResultConverter').default; +const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; +const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; +const ResultSlicer = require('../../dist/result/ResultSlicer').default; class OperationHandleMock { constructor(hasResultSet = true) { @@ -127,7 +129,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(operation.state).to.equal(TOperationState.INITIALIZED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should pick up state from directResults', async () => { @@ -147,7 +149,7 @@ describe('DBSQLOperation', () => { }); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should fetch status and update internal state', async () => { @@ -163,14 +165,14 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(operation.state).to.equal(TOperationState.INITIALIZED_STATE); - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; const status = await operation.status(); expect(context.driver.getOperationStatus.called).to.be.true; expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should request progress', async () => { @@ -202,7 +204,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(operation.state).to.equal(TOperationState.INITIALIZED_STATE); - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; // First call - should fetch data and cache context.driver.getOperationStatusResp = { @@ -214,7 +216,7 @@ describe('DBSQLOperation', () => { expect(context.driver.getOperationStatus.callCount).to.equal(1); expect(status1.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; // Second call - should return cached data context.driver.getOperationStatusResp = { @@ -226,7 +228,7 @@ describe('DBSQLOperation', () => { expect(context.driver.getOperationStatus.callCount).to.equal(1); expect(status2.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should fetch status if directResults status is not finished', async () => { @@ -252,14 +254,14 @@ describe('DBSQLOperation', () => { }); expect(operation.state).to.equal(TOperationState.RUNNING_STATE); // from directResults - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; const status = await operation.status(false); expect(context.driver.getOperationStatus.called).to.be.true; expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should not fetch status if directResults status is finished', async () => { @@ -285,14 +287,14 @@ describe('DBSQLOperation', () => { }); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); // from directResults - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; const status = await operation.status(false); expect(context.driver.getOperationStatus.called).to.be.false; expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; }); it('should throw an error in case of a status error', async () => { @@ -407,7 +409,7 @@ describe('DBSQLOperation', () => { expect(operation.cancelled).to.be.true; await expectFailure(() => operation.fetchAll()); - await expectFailure(() => operation.fetchChunk()); + await expectFailure(() => operation.fetchChunk({ disableBuffering: true })); await expectFailure(() => operation.status()); await expectFailure(() => operation.finished()); await expectFailure(() => operation.getSchema()); @@ -533,7 +535,7 @@ describe('DBSQLOperation', () => { expect(operation.closed).to.be.true; await expectFailure(() => operation.fetchAll()); - await expectFailure(() => operation.fetchChunk()); + await expectFailure(() => operation.fetchChunk({ disableBuffering: true })); await expectFailure(() => operation.status()); await expectFailure(() => operation.finished()); await expectFailure(() => operation.getSchema()); @@ -885,7 +887,8 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(JsonResult); + expect(resultHandler).to.be.instanceOf(ResultSlicer); + expect(resultHandler.source).to.be.instanceOf(JsonResultHandler); } arrowHandler: { @@ -895,7 +898,9 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(ArrowResult); + expect(resultHandler).to.be.instanceOf(ResultSlicer); + expect(resultHandler.source).to.be.instanceOf(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceOf(ArrowResultHandler); } cloudFetchHandler: { @@ -905,7 +910,9 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(CloudFetchResult); + expect(resultHandler).to.be.instanceOf(ResultSlicer); + expect(resultHandler.source).to.be.instanceOf(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceOf(CloudFetchResultHandler); } }); }); @@ -921,7 +928,7 @@ describe('DBSQLOperation', () => { sinon.spy(context.driver, 'fetchResults'); const operation = new DBSQLOperation({ handle, context }); - const results = await operation.fetchChunk(); + const results = await operation.fetchChunk({ disableBuffering: true }); expect(results).to.deep.equal([]); expect(context.driver.getResultSetMetadata.called).to.be.false; @@ -948,7 +955,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); - const results = await operation.fetchChunk(); + const results = await operation.fetchChunk({ disableBuffering: true }); expect(context.driver.getOperationStatus.called).to.be.true; expect(results).to.deep.equal([]); @@ -974,7 +981,7 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.results.columns = []; const operation = new DBSQLOperation({ handle, context }); - await operation.fetchChunk({ progress: true }); + await operation.fetchChunk({ progress: true, disableBuffering: true }); expect(context.driver.getOperationStatus.called).to.be.true; const request = context.driver.getOperationStatus.getCall(0).args[0]; @@ -1005,7 +1012,7 @@ describe('DBSQLOperation', () => { const callback = sinon.stub(); - await operation.fetchChunk({ callback }); + await operation.fetchChunk({ callback, disableBuffering: true }); expect(context.driver.getOperationStatus.called).to.be.true; expect(callback.callCount).to.be.equal(attemptsUntilFinished); @@ -1018,12 +1025,13 @@ describe('DBSQLOperation', () => { handle.hasResultSet = true; context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; + context.driver.getOperationStatusResp.hasResultSet = true; sinon.spy(context.driver, 'getResultSetMetadata'); sinon.spy(context.driver, 'fetchResults'); const operation = new DBSQLOperation({ handle, context }); - const results = await operation.fetchChunk(); + const results = await operation.fetchChunk({ disableBuffering: true }); expect(results).to.deep.equal([{ test: 1 }, { test: 2 }, { test: 3 }]); expect(context.driver.getResultSetMetadata.called).to.be.true; @@ -1060,7 +1068,7 @@ describe('DBSQLOperation', () => { }, }); - const results = await operation.fetchChunk(); + const results = await operation.fetchChunk({ disableBuffering: true }); expect(results).to.deep.equal([{ test: 5 }, { test: 6 }]); expect(context.driver.getResultSetMetadata.called).to.be.true; @@ -1098,13 +1106,13 @@ describe('DBSQLOperation', () => { }, }); - const results1 = await operation.fetchChunk(); + const results1 = await operation.fetchChunk({ disableBuffering: true }); expect(results1).to.deep.equal([{ test: 5 }, { test: 6 }]); expect(context.driver.getResultSetMetadata.callCount).to.be.eq(1); expect(context.driver.fetchResults.callCount).to.be.eq(0); - const results2 = await operation.fetchChunk(); + const results2 = await operation.fetchChunk({ disableBuffering: true }); expect(results2).to.deep.equal([{ test: 1 }, { test: 2 }, { test: 3 }]); expect(context.driver.getResultSetMetadata.callCount).to.be.eq(1); @@ -1125,7 +1133,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); try { - await operation.fetchChunk(); + await operation.fetchChunk({ disableBuffering: true }); expect.fail('It should throw a HiveDriverError'); } catch (e) { if (e instanceof AssertionError) { @@ -1168,7 +1176,7 @@ describe('DBSQLOperation', () => { }); describe('hasMoreRows', () => { - it('should return False until first chunk of data fetched', async () => { + it('should return initial value prior to first fetch', async () => { const context = new ClientContextMock(); const handle = new OperationHandleMock(); @@ -1176,12 +1184,15 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; context.driver.getOperationStatusResp.hasResultSet = true; - context.driver.fetchResultsResp.hasMoreRows = true; + context.driver.fetchResultsResp.hasMoreRows = false; + context.driver.fetchResultsResp.results = undefined; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; + await operation.fetchChunk({ disableBuffering: true }); + expect(await operation.hasMoreRows()).to.be.false; + expect(operation._data.hasMoreRowsFlag).to.be.false; }); it('should return False if operation was closed', async () => { @@ -1195,8 +1206,8 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = true; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); + expect(await operation.hasMoreRows()).to.be.true; + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; await operation.close(); expect(await operation.hasMoreRows()).to.be.false; @@ -1213,8 +1224,8 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = true; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); + expect(await operation.hasMoreRows()).to.be.true; + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; await operation.cancel(); expect(await operation.hasMoreRows()).to.be.false; @@ -1231,9 +1242,11 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = true; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; + await operation.fetchChunk({ disableBuffering: true }); + expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.true; }); it('should return True if hasMoreRows flag is False but there is actual data', async () => { @@ -1247,9 +1260,11 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = false; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; + await operation.fetchChunk({ disableBuffering: true }); + expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.true; }); it('should return True if hasMoreRows flag is unset but there is actual data', async () => { @@ -1263,9 +1278,11 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = undefined; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; + await operation.fetchChunk({ disableBuffering: true }); + expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.true; }); it('should return False if hasMoreRows flag is False and there is no data', async () => { @@ -1280,9 +1297,11 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.results = undefined; const operation = new DBSQLOperation({ handle, context }); + expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); - expect(await operation.hasMoreRows()).to.be.false; + expect(operation._data.hasMoreRowsFlag).to.be.false; }); }); }); diff --git a/tests/unit/DBSQLSession.test.js b/tests/unit/DBSQLSession.test.js index 9ff5172b..b172b29f 100644 --- a/tests/unit/DBSQLSession.test.js +++ b/tests/unit/DBSQLSession.test.js @@ -6,6 +6,7 @@ const InfoValue = require('../../dist/dto/InfoValue').default; const Status = require('../../dist/dto/Status').default; const DBSQLOperation = require('../../dist/DBSQLOperation').default; const HiveDriver = require('../../dist/hive/HiveDriver').default; +const DBSQLClient = require('../../dist/DBSQLClient').default; // Create logger that won't emit // @@ -37,9 +38,12 @@ function createDriverMock(customMethodHandler) { function createSession(customMethodHandler) { const driver = createDriverMock(customMethodHandler); + const clientConfig = DBSQLClient.getDefaultConfig(); + return new DBSQLSession({ handle: { sessionId: 'id' }, context: { + getConfig: () => clientConfig, getLogger: () => logger, getDriver: async () => driver, }, diff --git a/tests/unit/connection/connections/HttpConnection.test.js b/tests/unit/connection/connections/HttpConnection.test.js index a9a21136..261a3af8 100644 --- a/tests/unit/connection/connections/HttpConnection.test.js +++ b/tests/unit/connection/connections/HttpConnection.test.js @@ -2,14 +2,24 @@ const http = require('http'); const { expect } = require('chai'); const HttpConnection = require('../../../../dist/connection/connections/HttpConnection').default; const ThriftHttpConnection = require('../../../../dist/connection/connections/ThriftHttpConnection').default; +const DBSQLClient = require('../../../../dist/DBSQLClient').default; describe('HttpConnection.connect', () => { it('should create Thrift connection', async () => { - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - path: '/hive', - }); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + path: '/hive', + }, + context, + ); const thriftConnection = await connection.getThriftConnection(); @@ -22,15 +32,24 @@ describe('HttpConnection.connect', () => { }); it('should set SSL certificates and disable rejectUnauthorized', async () => { - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - path: '/hive', - https: true, - ca: 'ca', - cert: 'cert', - key: 'key', - }); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + path: '/hive', + https: true, + ca: 'ca', + cert: 'cert', + key: 'key', + }, + context, + ); const thriftConnection = await connection.getThriftConnection(); @@ -41,12 +60,21 @@ describe('HttpConnection.connect', () => { }); it('should initialize http agents', async () => { - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - https: false, - path: '/hive', - }); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + https: false, + path: '/hive', + }, + context, + ); const thriftConnection = await connection.getThriftConnection(); @@ -54,17 +82,26 @@ describe('HttpConnection.connect', () => { }); it('should update headers (case 1: Thrift connection not initialized)', async () => { + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const initialHeaders = { a: 'test header A', b: 'test header B', }; - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - path: '/hive', - headers: initialHeaders, - }); + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + path: '/hive', + headers: initialHeaders, + }, + context, + ); const extraHeaders = { b: 'new header B', @@ -82,17 +119,26 @@ describe('HttpConnection.connect', () => { }); it('should update headers (case 2: Thrift connection initialized)', async () => { + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const initialHeaders = { a: 'test header A', b: 'test header B', }; - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - path: '/hive', - headers: initialHeaders, - }); + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + path: '/hive', + headers: initialHeaders, + }, + context, + ); const thriftConnection = await connection.getThriftConnection(); diff --git a/tests/unit/hive/commands/BaseCommand.test.js b/tests/unit/hive/commands/BaseCommand.test.js index 1a20c303..d6b286cf 100644 --- a/tests/unit/hive/commands/BaseCommand.test.js +++ b/tests/unit/hive/commands/BaseCommand.test.js @@ -2,9 +2,7 @@ const { expect, AssertionError } = require('chai'); const { Thrift } = require('thrift'); const HiveDriverError = require('../../../../dist/errors/HiveDriverError').default; const BaseCommand = require('../../../../dist/hive/Commands/BaseCommand').default; -const globalConfig = require('../../../../dist/globalConfig').default; - -const savedGlobalConfig = { ...globalConfig }; +const DBSQLClient = require('../../../../dist/DBSQLClient').default; class ThriftClientMock { constructor(methodHandler) { @@ -26,18 +24,24 @@ ThriftClientMock.defaultResponse = { }; class CustomCommand extends BaseCommand { + constructor(...args) { + super(...args); + } + execute(request) { return this.executeCommand(request, this.client.CustomMethod); } } describe('BaseCommand', () => { - afterEach(() => { - Object.assign(globalConfig, savedGlobalConfig); - }); - it('should fail if trying to invoke non-existing command', async () => { - const command = new CustomCommand({}); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const command = new CustomCommand({}, context); try { await command.execute(); @@ -54,11 +58,20 @@ describe('BaseCommand', () => { it('should handle exceptions thrown by command', async () => { const errorMessage = 'Unexpected error'; - const command = new CustomCommand({ - CustomMethod() { - throw new Error(errorMessage); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const command = new CustomCommand( + { + CustomMethod() { + throw new Error(errorMessage); + }, }, - }); + context, + ); try { await command.execute(); @@ -75,10 +88,16 @@ describe('BaseCommand', () => { [429, 503].forEach((statusCode) => { describe(`HTTP ${statusCode} error`, () => { it('should fail on max retry attempts exceeded', async () => { - globalConfig.retriesTimeout = 200; // ms - globalConfig.retryDelayMin = 5; // ms - globalConfig.retryDelayMax = 20; // ms - globalConfig.retryMaxAttempts = 3; + const clientConfig = DBSQLClient.getDefaultConfig(); + + clientConfig.retriesTimeout = 200; // ms + clientConfig.retryDelayMin = 5; // ms + clientConfig.retryDelayMax = 20; // ms + clientConfig.retryMaxAttempts = 3; + + const context = { + getConfig: () => clientConfig, + }; let methodCallCount = 0; const command = new CustomCommand( @@ -88,6 +107,7 @@ describe('BaseCommand', () => { error.statusCode = statusCode; throw error; }), + context, ); try { @@ -100,15 +120,21 @@ describe('BaseCommand', () => { expect(error).to.be.instanceof(HiveDriverError); expect(error.message).to.contain(`${statusCode} when connecting to resource`); expect(error.message).to.contain('Max retry count exceeded'); - expect(methodCallCount).to.equal(globalConfig.retryMaxAttempts); + expect(methodCallCount).to.equal(clientConfig.retryMaxAttempts); } }); it('should fail on retry timeout exceeded', async () => { - globalConfig.retriesTimeout = 200; // ms - globalConfig.retryDelayMin = 5; // ms - globalConfig.retryDelayMax = 20; // ms - globalConfig.retryMaxAttempts = 50; + const clientConfig = DBSQLClient.getDefaultConfig(); + + clientConfig.retriesTimeout = 200; // ms + clientConfig.retryDelayMin = 5; // ms + clientConfig.retryDelayMax = 20; // ms + clientConfig.retryMaxAttempts = 50; + + const context = { + getConfig: () => clientConfig, + }; let methodCallCount = 0; const command = new CustomCommand( @@ -118,6 +144,7 @@ describe('BaseCommand', () => { error.statusCode = statusCode; throw error; }), + context, ); try { @@ -138,10 +165,16 @@ describe('BaseCommand', () => { }); it('should succeed after few attempts', async () => { - globalConfig.retriesTimeout = 200; // ms - globalConfig.retryDelayMin = 5; // ms - globalConfig.retryDelayMax = 20; // ms - globalConfig.retryMaxAttempts = 5; + const clientConfig = DBSQLClient.getDefaultConfig(); + + clientConfig.retriesTimeout = 200; // ms + clientConfig.retryDelayMin = 5; // ms + clientConfig.retryDelayMax = 20; // ms + clientConfig.retryMaxAttempts = 5; + + const context = { + getConfig: () => clientConfig, + }; let methodCallCount = 0; const command = new CustomCommand( @@ -154,6 +187,7 @@ describe('BaseCommand', () => { } return ThriftClientMock.defaultResponse; }), + context, ); const response = await command.execute(); @@ -166,12 +200,19 @@ describe('BaseCommand', () => { it(`should re-throw unrecognized HTTP errors`, async () => { const errorMessage = 'Unrecognized HTTP error'; + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const command = new CustomCommand( new ThriftClientMock(() => { const error = new Thrift.TApplicationException(undefined, errorMessage); error.statusCode = 500; throw error; }), + context, ); try { @@ -189,10 +230,17 @@ describe('BaseCommand', () => { it(`should re-throw unrecognized Thrift errors`, async () => { const errorMessage = 'Unrecognized HTTP error'; + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const command = new CustomCommand( new ThriftClientMock(() => { throw new Thrift.TApplicationException(undefined, errorMessage); }), + context, ); try { @@ -210,10 +258,17 @@ describe('BaseCommand', () => { it(`should re-throw unrecognized errors`, async () => { const errorMessage = 'Unrecognized error'; + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const command = new CustomCommand( new ThriftClientMock(() => { throw new Error(errorMessage); }), + context, ); try { diff --git a/tests/unit/result/ArrowResult.test.js b/tests/unit/result/ArrowResultConverter.test.js similarity index 58% rename from tests/unit/result/ArrowResult.test.js rename to tests/unit/result/ArrowResultConverter.test.js index 27244190..3ff87a15 100644 --- a/tests/unit/result/ArrowResult.test.js +++ b/tests/unit/result/ArrowResultConverter.test.js @@ -1,7 +1,8 @@ const { expect } = require('chai'); const fs = require('fs'); const path = require('path'); -const ArrowResult = require('../../../dist/result/ArrowResult').default; +const ArrowResultConverter = require('../../../dist/result/ArrowResultConverter').default; +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); const sampleThriftSchema = { columns: [ @@ -32,92 +33,55 @@ const sampleArrowSchema = Buffer.from([ 0, 0, 0, 0, ]); -const sampleEmptyArrowBatch = { - batch: undefined, - rowCount: 0, -}; - -const sampleArrowBatch = { - batch: Buffer.from([ +const sampleArrowBatch = [ + sampleArrowSchema, + Buffer.from([ 255, 255, 255, 255, 136, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 12, 0, 22, 0, 14, 0, 21, 0, 16, 0, 4, 0, 12, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 16, 0, 0, 0, 0, 3, 10, 0, 24, 0, 12, 0, 8, 0, 4, 0, 10, 0, 0, 0, 20, 0, 0, 0, 56, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, ]), - rowCount: 1, -}; - -const sampleRowSet1 = { - startRowOffset: 0, - arrowBatches: undefined, -}; - -const sampleRowSet2 = { - startRowOffset: 0, - arrowBatches: [], -}; - -const sampleRowSet3 = { - startRowOffset: 0, - arrowBatches: [sampleEmptyArrowBatch], -}; - -const sampleRowSet4 = { - startRowOffset: 0, - arrowBatches: [sampleArrowBatch], -}; +]; const thriftSchemaAllNulls = JSON.parse( fs.readFileSync(path.join(__dirname, 'fixtures/thriftSchemaAllNulls.json')).toString('utf-8'), ); -const arrowSchemaAllNulls = fs.readFileSync(path.join(__dirname, 'fixtures/arrowSchemaAllNulls.arrow')); - -const rowSetAllNulls = { - startRowOffset: 0, - arrowBatches: [ - { - batch: fs.readFileSync(path.join(__dirname, 'fixtures/dataAllNulls.arrow')), - rowCount: 1, - }, - ], -}; - -describe('ArrowResult', () => { - it('should not buffer any data', async () => { - const context = {}; - const result = new ArrowResult(context, sampleThriftSchema, sampleArrowSchema); - await result.getValue([sampleRowSet1]); - expect(await result.hasPendingData()).to.be.false; - }); +const arrowBatchAllNulls = [ + fs.readFileSync(path.join(__dirname, 'fixtures/arrowSchemaAllNulls.arrow')), + fs.readFileSync(path.join(__dirname, 'fixtures/dataAllNulls.arrow')), +]; +describe('ArrowResultHandler', () => { it('should convert data', async () => { const context = {}; - const result = new ArrowResult(context, sampleThriftSchema, sampleArrowSchema); - expect(await result.getValue([sampleRowSet1])).to.be.deep.eq([]); - expect(await result.getValue([sampleRowSet2])).to.be.deep.eq([]); - expect(await result.getValue([sampleRowSet3])).to.be.deep.eq([]); - expect(await result.getValue([sampleRowSet4])).to.be.deep.eq([{ 1: 1 }]); + const rowSetProvider = new ResultsProviderMock([sampleArrowBatch]); + const result = new ArrowResultConverter(context, rowSetProvider, sampleThriftSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([{ 1: 1 }]); }); it('should return empty array if no data to process', async () => { const context = {}; - const result = new ArrowResult(context, sampleThriftSchema, sampleArrowSchema); - expect(await result.getValue()).to.be.deep.eq([]); - expect(await result.getValue([])).to.be.deep.eq([]); + const rowSetProvider = new ResultsProviderMock([], []); + const result = new ArrowResultConverter(context, rowSetProvider, sampleThriftSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; }); it('should return empty array if no schema available', async () => { const context = {}; - const result = new ArrowResult(context); - expect(await result.getValue([sampleRowSet4])).to.be.deep.eq([]); + const rowSetProvider = new ResultsProviderMock([sampleArrowBatch]); + const result = new ArrowResultConverter(context, rowSetProvider); + expect(await result.hasMore()).to.be.false; + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should detect nulls', async () => { const context = {}; - const result = new ArrowResult(context, thriftSchemaAllNulls, arrowSchemaAllNulls); - expect(await result.getValue([rowSetAllNulls])).to.be.deep.eq([ + const rowSetProvider = new ResultsProviderMock([arrowBatchAllNulls]); + const result = new ArrowResultConverter(context, rowSetProvider, thriftSchemaAllNulls); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { boolean_field: null, diff --git a/tests/unit/result/ArrowResultHandler.test.js b/tests/unit/result/ArrowResultHandler.test.js new file mode 100644 index 00000000..9c24e680 --- /dev/null +++ b/tests/unit/result/ArrowResultHandler.test.js @@ -0,0 +1,101 @@ +const { expect } = require('chai'); +const fs = require('fs'); +const path = require('path'); +const ArrowResultHandler = require('../../../dist/result/ArrowResultHandler').default; +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); + +const sampleArrowSchema = Buffer.from([ + 255, 255, 255, 255, 208, 0, 0, 0, 16, 0, 0, 0, 0, 0, 10, 0, 14, 0, 6, 0, 13, 0, 8, 0, 10, 0, 0, 0, 0, 0, 4, 0, 16, 0, + 0, 0, 0, 1, 10, 0, 12, 0, 0, 0, 8, 0, 4, 0, 10, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 24, 0, 0, 0, + 0, 0, 18, 0, 24, 0, 20, 0, 0, 0, 19, 0, 12, 0, 0, 0, 8, 0, 4, 0, 18, 0, 0, 0, 20, 0, 0, 0, 80, 0, 0, 0, 88, 0, 0, 0, + 0, 0, 0, 2, 92, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 8, 0, 12, 0, 8, 0, 4, 0, 8, 0, 0, 0, 8, 0, 0, 0, 12, 0, 0, 0, 3, 0, + 0, 0, 73, 78, 84, 0, 22, 0, 0, 0, 83, 112, 97, 114, 107, 58, 68, 97, 116, 97, 84, 121, 112, 101, 58, 83, 113, 108, 78, + 97, 109, 101, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 8, 0, 7, 0, 8, 0, 0, 0, 0, 0, 0, 1, 32, 0, 0, 0, 1, 0, 0, 0, 49, 0, 0, 0, + 0, 0, 0, 0, +]); + +const sampleArrowBatch = { + batch: Buffer.from([ + 255, 255, 255, 255, 136, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 12, 0, 22, 0, 14, 0, 21, 0, 16, 0, 4, 0, 12, 0, 0, 0, 16, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 16, 0, 0, 0, 0, 3, 10, 0, 24, 0, 12, 0, 8, 0, 4, 0, 10, 0, 0, 0, 20, 0, 0, 0, 56, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, + 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + ]), + rowCount: 1, +}; + +const sampleRowSet1 = { + startRowOffset: 0, + arrowBatches: [sampleArrowBatch], +}; + +const sampleRowSet2 = { + startRowOffset: 0, + arrowBatches: undefined, +}; + +const sampleRowSet3 = { + startRowOffset: 0, + arrowBatches: [], +}; + +const sampleRowSet4 = { + startRowOffset: 0, + arrowBatches: [ + { + batch: undefined, + rowCount: 0, + }, + ], +}; + +describe('ArrowResultHandler', () => { + it('should not buffer any data', async () => { + const context = {}; + const rowSetProvider = new ResultsProviderMock([sampleRowSet1]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); + expect(await rowSetProvider.hasMore()).to.be.true; + expect(await result.hasMore()).to.be.true; + + await result.fetchNext({ limit: 10000 }); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(await result.hasMore()).to.be.false; + }); + + it('should return empty array if no data to process', async () => { + const context = {}; + case1: { + const rowSetProvider = new ResultsProviderMock(); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; + } + case2: { + const rowSetProvider = new ResultsProviderMock([sampleRowSet2]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; + } + case3: { + const rowSetProvider = new ResultsProviderMock([sampleRowSet3]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; + } + case4: { + const rowSetProvider = new ResultsProviderMock([sampleRowSet4]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; + } + }); + + it('should return empty array if no schema available', async () => { + const context = {}; + const rowSetProvider = new ResultsProviderMock([sampleRowSet2]); + const result = new ArrowResultHandler(context, rowSetProvider); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; + }); +}); diff --git a/tests/unit/result/CloudFetchResult.test.js b/tests/unit/result/CloudFetchResultHandler.test.js similarity index 52% rename from tests/unit/result/CloudFetchResult.test.js rename to tests/unit/result/CloudFetchResultHandler.test.js index 20451093..bbe9638f 100644 --- a/tests/unit/result/CloudFetchResult.test.js +++ b/tests/unit/result/CloudFetchResultHandler.test.js @@ -1,27 +1,9 @@ const { expect, AssertionError } = require('chai'); const sinon = require('sinon'); const Int64 = require('node-int64'); -const CloudFetchResult = require('../../../dist/result/CloudFetchResult').default; -const globalConfig = require('../../../dist/globalConfig').default; - -const sampleThriftSchema = { - columns: [ - { - columnName: '1', - typeDesc: { - types: [ - { - primitiveEntry: { - type: 3, - typeQualifiers: null, - }, - }, - ], - }, - position: 1, - }, - ], -}; +const CloudFetchResultHandler = require('../../../dist/result/CloudFetchResultHandler').default; +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); +const DBSQLClient = require('../../../dist/DBSQLClient').default; const sampleArrowSchema = Buffer.from([ 255, 255, 255, 255, 208, 0, 0, 0, 16, 0, 0, 0, 0, 0, 10, 0, 14, 0, 6, 0, 13, 0, 8, 0, 10, 0, 0, 0, 0, 0, 4, 0, 16, 0, @@ -94,133 +76,156 @@ const sampleExpiredRowSet = { ], }; -describe('CloudFetchResult', () => { - let savedConcurrentDownloads; - - beforeEach(() => { - savedConcurrentDownloads = globalConfig.cloudFetchConcurrentDownloads; - }); +describe('CloudFetchResultHandler', () => { + it('should report pending data if there are any', async () => { + const rowSetProvider = new ResultsProviderMock(); + const clientConfig = DBSQLClient.getDefaultConfig(); - afterEach(() => { - globalConfig.cloudFetchConcurrentDownloads = savedConcurrentDownloads; - }); + const context = { + getConfig: () => clientConfig, + }; - it('should report pending data if there are any', async () => { - const context = {}; - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); case1: { result.pendingLinks = []; - result.downloadedBatches = []; - expect(await result.hasPendingData()).to.be.false; + result.downloadTasks = []; + expect(await result.hasMore()).to.be.false; } case2: { result.pendingLinks = [{}]; // just anything here - result.downloadedBatches = []; - expect(await result.hasPendingData()).to.be.true; + result.downloadTasks = []; + expect(await result.hasMore()).to.be.true; } case3: { result.pendingLinks = []; - result.downloadedBatches = [{}]; // just anything here - expect(await result.hasPendingData()).to.be.true; + result.downloadTasks = [{}]; // just anything here + expect(await result.hasMore()).to.be.true; } }); it('should extract links from row sets', async () => { - globalConfig.cloudFetchConcurrentDownloads = 0; // this will prevent it from downloading batches + const clientConfig = DBSQLClient.getDefaultConfig(); + clientConfig.cloudFetchConcurrentDownloads = 0; // this will prevent it from downloading batches + + const rowSets = [sampleRowSet1, sampleEmptyRowSet, sampleRowSet2]; + const expectedLinksCount = rowSets.reduce((prev, item) => prev + (item.resultLinks?.length ?? 0), 0); - const context = {}; + const rowSetProvider = new ResultsProviderMock(rowSets); + const context = { + getConfig: () => clientConfig, + }; - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: true, status: 200, statusText: 'OK', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleRowSet1, sampleEmptyRowSet, sampleRowSet2]; - const expectedLinksCount = rowSets.reduce((prev, item) => prev + (item.resultLinks?.length ?? 0), 0); + do { + await result.fetchNext({ limit: 100000 }); + } while (await rowSetProvider.hasMore()); - const batches = await result.getBatches(rowSets); - expect(batches.length).to.be.equal(0); - expect(result.fetch.called).to.be.false; expect(result.pendingLinks.length).to.be.equal(expectedLinksCount); + expect(result.downloadTasks.length).to.be.equal(0); + expect(result.fetch.called).to.be.false; }); it('should download batches according to settings', async () => { - globalConfig.cloudFetchConcurrentDownloads = 2; + const clientConfig = DBSQLClient.getDefaultConfig(); + clientConfig.cloudFetchConcurrentDownloads = 3; - const context = {}; + const rowSet = { + startRowOffset: 0, + resultLinks: [...sampleRowSet1.resultLinks, ...sampleRowSet2.resultLinks], + }; + const expectedLinksCount = rowSet.resultLinks.length; // 5 + const rowSetProvider = new ResultsProviderMock([rowSet]); + const context = { + getConfig: () => clientConfig, + }; - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: true, status: 200, statusText: 'OK', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleRowSet1, sampleRowSet2]; - const expectedLinksCount = rowSets.reduce((prev, item) => prev + (item.resultLinks?.length ?? 0), 0); + expect(await rowSetProvider.hasMore()).to.be.true; initialFetch: { - const batches = await result.getBatches(rowSets); - expect(batches.length).to.be.equal(1); - expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads); - expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads); - expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 1); + // `cloudFetchConcurrentDownloads` out of `expectedLinksCount` links should be scheduled immediately + // first one should be `await`-ed and returned from `fetchNext` + const items = await result.fetchNext({ limit: 10000 }); + expect(items.length).to.be.gt(0); + expect(await rowSetProvider.hasMore()).to.be.false; + + expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads); + expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads); + expect(result.downloadTasks.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); } secondFetch: { - // It should return previously fetched batch, not performing additional network requests - const batches = await result.getBatches([]); - expect(batches.length).to.be.equal(1); - expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads); // no new fetches - expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads); - expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 2); + // It should return previously fetched batch, and schedule one more + const items = await result.fetchNext({ limit: 10000 }); + expect(items.length).to.be.gt(0); + expect(await rowSetProvider.hasMore()).to.be.false; + + expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads + 1); + expect(result.pendingLinks.length).to.be.equal( + expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads - 1, + ); + expect(result.downloadTasks.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); } thirdFetch: { // Now buffer should be empty, and it should fetch next batches - const batches = await result.getBatches([]); - expect(batches.length).to.be.equal(1); - expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads * 2); + const items = await result.fetchNext({ limit: 10000 }); + expect(items.length).to.be.gt(0); + expect(await rowSetProvider.hasMore()).to.be.false; + + expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads + 2); expect(result.pendingLinks.length).to.be.equal( - expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads * 2, + expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads - 2, ); - expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 1); + expect(result.downloadTasks.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); } }); it('should handle HTTP errors', async () => { - globalConfig.cloudFetchConcurrentDownloads = 1; + const clientConfig = DBSQLClient.getDefaultConfig(); + clientConfig.cloudFetchConcurrentDownloads = 1; - const context = {}; + const rowSetProvider = new ResultsProviderMock([sampleRowSet1]); + const context = { + getConfig: () => clientConfig, + }; - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: false, status: 500, statusText: 'Internal Server Error', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleRowSet1]; - try { - await result.getBatches(rowSets); + await result.fetchNext({ limit: 10000 }); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { @@ -232,23 +237,30 @@ describe('CloudFetchResult', () => { }); it('should handle expired links', async () => { - const context = {}; + const rowSetProvider = new ResultsProviderMock([sampleExpiredRowSet]); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; - const result = new CloudFetchResult(context, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: true, status: 200, statusText: 'OK', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleExpiredRowSet]; + // There are two link in the batch - first one is valid and second one is expired + // The first fetch has to be successful, and the second one should fail + await result.fetchNext({ limit: 10000 }); try { - await result.getBatches(rowSets); + await result.fetchNext({ limit: 10000 }); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { diff --git a/tests/unit/result/JsonResult.test.js b/tests/unit/result/JsonResultHandler.test.js similarity index 86% rename from tests/unit/result/JsonResult.test.js rename to tests/unit/result/JsonResultHandler.test.js index f7e90259..d6c3bf09 100644 --- a/tests/unit/result/JsonResult.test.js +++ b/tests/unit/result/JsonResultHandler.test.js @@ -1,7 +1,8 @@ const { expect } = require('chai'); -const JsonResult = require('../../../dist/result/JsonResult').default; +const JsonResultHandler = require('../../../dist/result/JsonResultHandler').default; const { TCLIService_types } = require('../../../').thrift; const Int64 = require('node-int64'); +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); const getColumnSchema = (columnName, type, position) => { if (type === undefined) { @@ -27,7 +28,7 @@ const getColumnSchema = (columnName, type, position) => { }; }; -describe('JsonResult', () => { +describe('JsonResultHandler', () => { it('should not buffer any data', async () => { const schema = { columns: [getColumnSchema('table.id', TCLIService_types.TTypeId.STRING_TYPE, 1)], @@ -39,10 +40,15 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new ResultsProviderMock(data); - const result = new JsonResult(context, schema); - await result.getValue(data); - expect(await result.hasPendingData()).to.be.false; + const result = new JsonResultHandler(context, rowSetProvider, schema); + expect(await rowSetProvider.hasMore()).to.be.true; + expect(await result.hasMore()).to.be.true; + + await result.fetchNext({ limit: 10000 }); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(await result.hasMore()).to.be.false; }); it('should convert schema with primitive types to json', async () => { @@ -127,10 +133,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new ResultsProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.str': 'a', 'table.int64': 282578800148737, @@ -199,10 +206,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new ResultsProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.array': ['a', 'b'], 'table.map': { key: 12 }, @@ -218,44 +226,11 @@ describe('JsonResult', () => { ]); }); - it('should merge data items', async () => { - const schema = { - columns: [getColumnSchema('table.id', TCLIService_types.TTypeId.STRING_TYPE, 1)], - }; - const data = [ - { - columns: [ - { - stringVal: { values: ['0', '1'] }, - }, - ], - }, - {}, // it should also handle empty sets - { - columns: [ - { - stringVal: { values: ['2', '3'] }, - }, - ], - }, - ]; - - const context = {}; - - const result = new JsonResult(context, schema); - - expect(await result.getValue(data)).to.be.deep.eq([ - { 'table.id': '0' }, - { 'table.id': '1' }, - { 'table.id': '2' }, - { 'table.id': '3' }, - ]); - }); - it('should detect nulls', () => { const context = {}; + const rowSetProvider = new ResultsProviderMock(); - const result = new JsonResult(context, null); + const result = new JsonResultHandler(context, rowSetProvider, null); const buf = Buffer.from([0x55, 0xaa, 0xc3]); [ @@ -368,10 +343,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new ResultsProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.str': null, 'table.int64': null, @@ -399,11 +375,10 @@ describe('JsonResult', () => { }; const context = {}; + const rowSetProvider = new ResultsProviderMock(); - const result = new JsonResult(context, schema); - - expect(await result.getValue()).to.be.deep.eq([]); - expect(await result.getValue([])).to.be.deep.eq([]); + const result = new JsonResultHandler(context, rowSetProvider, schema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should return empty array if no schema available', async () => { @@ -418,10 +393,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new ResultsProviderMock(data); - const result = new JsonResult(context); + const result = new JsonResultHandler(context, rowSetProvider); - expect(await result.getValue(data)).to.be.deep.eq([]); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should return raw data if types are not specified', async () => { @@ -453,10 +429,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new ResultsProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.array': '["a", "b"]', 'table.map': '{ "key": 12 }', diff --git a/tests/unit/result/ResultSlicer.test.js b/tests/unit/result/ResultSlicer.test.js new file mode 100644 index 00000000..715d250b --- /dev/null +++ b/tests/unit/result/ResultSlicer.test.js @@ -0,0 +1,88 @@ +const { expect } = require('chai'); +const sinon = require('sinon'); +const ResultSlicer = require('../../../dist/result/ResultSlicer').default; +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); + +describe('ResultSlicer', () => { + it('should return chunks of requested size', async () => { + const provider = new ResultsProviderMock( + [ + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + ], + [], + ); + + const slicer = new ResultSlicer({}, provider); + + const chunk1 = await slicer.fetchNext({ limit: 4 }); + expect(chunk1).to.deep.eq([10, 11, 12, 13]); + expect(await slicer.hasMore()).to.be.true; + + const chunk2 = await slicer.fetchNext({ limit: 10 }); + expect(chunk2).to.deep.eq([14, 15, 20, 21, 22, 23, 24, 25, 30, 31]); + expect(await slicer.hasMore()).to.be.true; + + const chunk3 = await slicer.fetchNext({ limit: 10 }); + expect(chunk3).to.deep.eq([32, 33, 34, 35]); + expect(await slicer.hasMore()).to.be.false; + }); + + it('should return raw chunks', async () => { + const provider = new ResultsProviderMock( + [ + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + ], + [], + ); + sinon.spy(provider, 'fetchNext'); + + const slicer = new ResultSlicer({}, provider); + + const chunk1 = await slicer.fetchNext({ limit: 4, disableBuffering: true }); + expect(chunk1).to.deep.eq([10, 11, 12, 13, 14, 15]); + expect(await slicer.hasMore()).to.be.true; + expect(provider.fetchNext.callCount).to.be.equal(1); + + const chunk2 = await slicer.fetchNext({ limit: 10, disableBuffering: true }); + expect(chunk2).to.deep.eq([20, 21, 22, 23, 24, 25]); + expect(await slicer.hasMore()).to.be.true; + expect(provider.fetchNext.callCount).to.be.equal(2); + }); + + it('should switch between returning sliced and raw chunks', async () => { + const provider = new ResultsProviderMock( + [ + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + ], + [], + ); + + const slicer = new ResultSlicer({}, provider); + + const chunk1 = await slicer.fetchNext({ limit: 4 }); + expect(chunk1).to.deep.eq([10, 11, 12, 13]); + expect(await slicer.hasMore()).to.be.true; + + const chunk2 = await slicer.fetchNext({ limit: 10, disableBuffering: true }); + expect(chunk2).to.deep.eq([14, 15]); + expect(await slicer.hasMore()).to.be.true; + + const chunk3 = await slicer.fetchNext({ limit: 10, disableBuffering: true }); + expect(chunk3).to.deep.eq([20, 21, 22, 23, 24, 25]); + expect(await slicer.hasMore()).to.be.true; + + const chunk4 = await slicer.fetchNext({ limit: 4 }); + expect(chunk4).to.deep.eq([30, 31, 32, 33]); + expect(await slicer.hasMore()).to.be.true; + + const chunk5 = await slicer.fetchNext({ limit: 4 }); + expect(chunk5).to.deep.eq([34, 35]); + expect(await slicer.hasMore()).to.be.false; + }); +}); diff --git a/tests/unit/result/compatibility.test.js b/tests/unit/result/compatibility.test.js index 5b27d39e..c01f6674 100644 --- a/tests/unit/result/compatibility.test.js +++ b/tests/unit/result/compatibility.test.js @@ -1,31 +1,45 @@ const { expect } = require('chai'); -const ArrowResult = require('../../../dist/result/ArrowResult').default; -const JsonResult = require('../../../dist/result/JsonResult').default; +const ArrowResultHandler = require('../../../dist/result/ArrowResultHandler').default; +const ArrowResultConverter = require('../../../dist/result/ArrowResultConverter').default; +const JsonResultHandler = require('../../../dist/result/JsonResultHandler').default; const { fixArrowResult } = require('../../fixtures/compatibility'); const fixtureColumn = require('../../fixtures/compatibility/column'); const fixtureArrow = require('../../fixtures/compatibility/arrow'); const fixtureArrowNT = require('../../fixtures/compatibility/arrow_native_types'); +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); + describe('Result handlers compatibility tests', () => { it('colum-based data', async () => { const context = {}; - const result = new JsonResult(context, fixtureColumn.schema); - const rows = await result.getValue(fixtureColumn.rowSets); + const rowSetProvider = new ResultsProviderMock(fixtureColumn.rowSets); + const result = new JsonResultHandler(context, rowSetProvider, fixtureColumn.schema); + const rows = await result.fetchNext({ limit: 10000 }); expect(rows).to.deep.equal(fixtureColumn.expected); }); it('arrow-based data without native types', async () => { const context = {}; - const result = new ArrowResult(context, fixtureArrow.schema, fixtureArrow.arrowSchema); - const rows = await result.getValue(fixtureArrow.rowSets); + const rowSetProvider = new ResultsProviderMock(fixtureArrow.rowSets); + const result = new ArrowResultConverter( + context, + new ArrowResultHandler(context, rowSetProvider, fixtureArrow.arrowSchema), + fixtureArrow.schema, + ); + const rows = await result.fetchNext({ limit: 10000 }); expect(fixArrowResult(rows)).to.deep.equal(fixtureArrow.expected); }); it('arrow-based data with native types', async () => { const context = {}; - const result = new ArrowResult(context, fixtureArrowNT.schema, fixtureArrowNT.arrowSchema); - const rows = await result.getValue(fixtureArrowNT.rowSets); + const rowSetProvider = new ResultsProviderMock(fixtureArrowNT.rowSets); + const result = new ArrowResultConverter( + context, + new ArrowResultHandler(context, rowSetProvider, fixtureArrowNT.arrowSchema), + fixtureArrowNT.schema, + ); + const rows = await result.fetchNext({ limit: 10000 }); expect(fixArrowResult(rows)).to.deep.equal(fixtureArrowNT.expected); }); }); diff --git a/tests/unit/result/fixtures/ResultsProviderMock.js b/tests/unit/result/fixtures/ResultsProviderMock.js new file mode 100644 index 00000000..a1dba3e0 --- /dev/null +++ b/tests/unit/result/fixtures/ResultsProviderMock.js @@ -0,0 +1,16 @@ +class ResultsProviderMock { + constructor(items, emptyItem) { + this.items = Array.isArray(items) ? [...items] : []; + this.emptyItem = emptyItem; + } + + async hasMore() { + return this.items.length > 0; + } + + async fetchNext() { + return this.items.shift() ?? this.emptyItem; + } +} + +module.exports = ResultsProviderMock;