From 57c21d74e0736267c94182e996952eca7b0d1009 Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Wed, 15 Nov 2023 00:08:01 +0200 Subject: [PATCH 01/11] DBSQLOperation Refactoring (3 of 3) (#198) * Refactoring: Introduce concept of results provider; convert FetchResultsHelper into provider of TRowSet Signed-off-by: Levko Kravets * Convert Json/Arrow/CloudFetch result handlers to implement result provider interface Signed-off-by: Levko Kravets * Refine the code and update tests Signed-off-by: Levko Kravets --------- Signed-off-by: Levko Kravets --- lib/DBSQLOperation/index.ts | 39 ++++----- .../{ArrowResult.ts => ArrowResultHandler.ts} | 36 +++++--- ...chResult.ts => CloudFetchResultHandler.ts} | 24 +++--- lib/result/IOperationResult.ts | 7 -- lib/result/IResultsProvider.ts | 9 ++ .../{JsonResult.ts => JsonResultHandler.ts} | 28 +++--- .../RowSetProvider.ts} | 11 ++- tests/e2e/arrow.test.js | 19 +++-- tests/e2e/cloudfetch.test.js | 12 +-- tests/unit/DBSQLOperation.test.js | 12 +-- ...ult.test.js => ArrowResultHandler.test.js} | 58 +++++++++---- ...est.js => CloudFetchResultHandler.test.js} | 75 +++++++++------- ...sult.test.js => JsonResultHandler.test.js} | 85 +++++++------------ tests/unit/result/compatibility.test.js | 21 +++-- .../result/fixtures/RowSetProviderMock.js | 15 ++++ 15 files changed, 254 insertions(+), 197 deletions(-) rename lib/result/{ArrowResult.ts => ArrowResultHandler.ts} (82%) rename lib/result/{CloudFetchResult.ts => CloudFetchResultHandler.ts} (71%) delete mode 100644 lib/result/IOperationResult.ts create mode 100644 lib/result/IResultsProvider.ts rename lib/result/{JsonResult.ts => JsonResultHandler.ts} (73%) rename lib/{DBSQLOperation/FetchResultsHelper.ts => result/RowSetProvider.ts} (88%) rename tests/unit/result/{ArrowResult.test.js => ArrowResultHandler.test.js} (59%) rename tests/unit/result/{CloudFetchResult.test.js => CloudFetchResultHandler.test.js} (74%) rename tests/unit/result/{JsonResult.test.js => JsonResultHandler.test.js} (86%) create mode 100644 tests/unit/result/fixtures/RowSetProviderMock.js diff --git a/lib/DBSQLOperation/index.ts b/lib/DBSQLOperation/index.ts index 5198d726..08e32864 100644 --- a/lib/DBSQLOperation/index.ts +++ b/lib/DBSQLOperation/index.ts @@ -16,13 +16,13 @@ import { TOperationState, } from '../../thrift/TCLIService_types'; import Status from '../dto/Status'; -import FetchResultsHelper from './FetchResultsHelper'; import { LogLevel } from '../contracts/IDBSQLLogger'; import OperationStateError, { OperationStateErrorCode } from '../errors/OperationStateError'; -import IOperationResult from '../result/IOperationResult'; -import JsonResult from '../result/JsonResult'; -import ArrowResult from '../result/ArrowResult'; -import CloudFetchResult from '../result/CloudFetchResult'; +import IResultsProvider from '../result/IResultsProvider'; +import RowSetProvider from '../result/RowSetProvider'; +import JsonResultHandler from '../result/JsonResultHandler'; +import ArrowResultHandler from '../result/ArrowResultHandler'; +import CloudFetchResultHandler from '../result/CloudFetchResultHandler'; import { definedOrError } from '../utils'; import HiveDriverError from '../errors/HiveDriverError'; import IClientContext from '../contracts/IClientContext'; @@ -50,7 +50,7 @@ export default class DBSQLOperation implements IOperation { public onClose?: () => void; - private readonly _data: FetchResultsHelper; + private readonly _data: RowSetProvider; private readonly closeOperation?: TCloseOperationResp; @@ -68,7 +68,7 @@ export default class DBSQLOperation implements IOperation { private hasResultSet: boolean = false; - private resultHandler?: IOperationResult; + private resultHandler?: IResultsProvider>; constructor({ handle, directResults, context }: DBSQLOperationConstructorOptions) { this.operationHandle = handle; @@ -82,7 +82,7 @@ export default class DBSQLOperation implements IOperation { } this.metadata = directResults?.resultSetMetadata; - this._data = new FetchResultsHelper( + this._data = new RowSetProvider( this.context, this.operationHandle, [directResults?.resultSet], @@ -135,14 +135,12 @@ export default class DBSQLOperation implements IOperation { await this.waitUntilReady(options); - const [resultHandler, data] = await Promise.all([ - this.getResultHandler(), - this._data.fetch(options?.maxRows || defaultMaxRows), - ]); + const resultHandler = await this.getResultHandler(); + await this.failIfClosed(); + const result = resultHandler.fetchNext({ limit: options?.maxRows || defaultMaxRows }); await this.failIfClosed(); - const result = await resultHandler.getValue(data ? [data] : []); this.context .getLogger() .log( @@ -234,14 +232,9 @@ export default class DBSQLOperation implements IOperation { return false; } - // Return early if there are still data available for fetching - if (this._data.hasMoreRows) { - return true; - } - // If we fetched all the data from server - check if there's anything buffered in result handler const resultHandler = await this.getResultHandler(); - return resultHandler.hasPendingData(); + return resultHandler.hasMore(); } public async getSchema(options?: GetSchemaOptions): Promise { @@ -342,20 +335,20 @@ export default class DBSQLOperation implements IOperation { return this.metadata; } - private async getResultHandler(): Promise { + private async getResultHandler(): Promise>> { const metadata = await this.fetchMetadata(); const resultFormat = definedOrError(metadata.resultFormat); if (!this.resultHandler) { switch (resultFormat) { case TSparkRowSetType.COLUMN_BASED_SET: - this.resultHandler = new JsonResult(this.context, metadata.schema); + this.resultHandler = new JsonResultHandler(this.context, this._data, metadata.schema); break; case TSparkRowSetType.ARROW_BASED_SET: - this.resultHandler = new ArrowResult(this.context, metadata.schema, metadata.arrowSchema); + this.resultHandler = new ArrowResultHandler(this.context, this._data, metadata.schema, metadata.arrowSchema); break; case TSparkRowSetType.URL_BASED_SET: - this.resultHandler = new CloudFetchResult(this.context, metadata.schema); + this.resultHandler = new CloudFetchResultHandler(this.context, this._data, metadata.schema); break; default: this.resultHandler = undefined; diff --git a/lib/result/ArrowResult.ts b/lib/result/ArrowResultHandler.ts similarity index 82% rename from lib/result/ArrowResult.ts rename to lib/result/ArrowResultHandler.ts index b44ae305..a1076293 100644 --- a/lib/result/ArrowResult.ts +++ b/lib/result/ArrowResultHandler.ts @@ -13,7 +13,7 @@ import { } from 'apache-arrow'; import { TRowSet, TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; -import IOperationResult from './IOperationResult'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; import { getSchemaColumns, convertThriftValue } from './utils'; const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils; @@ -21,28 +21,38 @@ const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils; type ArrowSchema = Schema; type ArrowSchemaField = Field>; -export default class ArrowResult implements IOperationResult { +export default class ArrowResultHandler implements IResultsProvider> { protected readonly context: IClientContext; + private readonly source: IResultsProvider; + private readonly schema: Array; private readonly arrowSchema?: Buffer; - constructor(context: IClientContext, schema?: TTableSchema, arrowSchema?: Buffer) { + constructor( + context: IClientContext, + source: IResultsProvider, + schema?: TTableSchema, + arrowSchema?: Buffer, + ) { this.context = context; + this.source = source; this.schema = getSchemaColumns(schema); this.arrowSchema = arrowSchema; } - async hasPendingData() { - return false; + public async hasMore() { + return this.source.hasMore(); } - async getValue(data?: Array) { - if (this.schema.length === 0 || !this.arrowSchema || !data) { + public async fetchNext(options: ResultsProviderFetchNextOptions) { + if (this.schema.length === 0 || !this.arrowSchema) { return []; } + const data = await this.source.fetchNext(options); + const batches = await this.getBatches(data); if (batches.length === 0) { return []; @@ -52,15 +62,13 @@ export default class ArrowResult implements IOperationResult { return this.getRows(table.schema, table.toArray()); } - protected async getBatches(data: Array): Promise> { + protected async getBatches(rowSet?: TRowSet): Promise> { const result: Array = []; - data.forEach((rowSet) => { - rowSet.arrowBatches?.forEach((arrowBatch) => { - if (arrowBatch.batch) { - result.push(arrowBatch.batch); - } - }); + rowSet?.arrowBatches?.forEach((arrowBatch) => { + if (arrowBatch.batch) { + result.push(arrowBatch.batch); + } }); return result; diff --git a/lib/result/CloudFetchResult.ts b/lib/result/CloudFetchResultHandler.ts similarity index 71% rename from lib/result/CloudFetchResult.ts rename to lib/result/CloudFetchResultHandler.ts index 31fbd633..a49e8714 100644 --- a/lib/result/CloudFetchResult.ts +++ b/lib/result/CloudFetchResultHandler.ts @@ -2,29 +2,31 @@ import { Buffer } from 'buffer'; import fetch, { RequestInfo, RequestInit } from 'node-fetch'; import { TRowSet, TSparkArrowResultLink, TTableSchema } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; -import ArrowResult from './ArrowResult'; +import IResultsProvider from './IResultsProvider'; +import ArrowResultHandler from './ArrowResultHandler'; import globalConfig from '../globalConfig'; -export default class CloudFetchResult extends ArrowResult { +export default class CloudFetchResultHandler extends ArrowResultHandler { private pendingLinks: Array = []; private downloadedBatches: Array = []; - constructor(context: IClientContext, schema?: TTableSchema) { + constructor(context: IClientContext, source: IResultsProvider, schema?: TTableSchema) { // Arrow schema returned in metadata is not needed for CloudFetch results: // each batch already contains schema and could be decoded as is - super(context, schema, Buffer.alloc(0)); + super(context, source, schema, Buffer.alloc(0)); } - async hasPendingData() { - return this.pendingLinks.length > 0 || this.downloadedBatches.length > 0; + public async hasMore() { + if (this.pendingLinks.length > 0 || this.downloadedBatches.length > 0) { + return true; + } + return super.hasMore(); } - protected async getBatches(data: Array): Promise> { - data.forEach((item) => { - item.resultLinks?.forEach((link) => { - this.pendingLinks.push(link); - }); + protected async getBatches(data?: TRowSet): Promise> { + data?.resultLinks?.forEach((link) => { + this.pendingLinks.push(link); }); if (this.downloadedBatches.length === 0) { diff --git a/lib/result/IOperationResult.ts b/lib/result/IOperationResult.ts deleted file mode 100644 index 7b42a196..00000000 --- a/lib/result/IOperationResult.ts +++ /dev/null @@ -1,7 +0,0 @@ -import { TRowSet } from '../../thrift/TCLIService_types'; - -export default interface IOperationResult { - getValue(data?: Array): Promise; - - hasPendingData(): Promise; -} diff --git a/lib/result/IResultsProvider.ts b/lib/result/IResultsProvider.ts new file mode 100644 index 00000000..0e521f71 --- /dev/null +++ b/lib/result/IResultsProvider.ts @@ -0,0 +1,9 @@ +export interface ResultsProviderFetchNextOptions { + limit: number; +} + +export default interface IResultsProvider { + fetchNext(options: ResultsProviderFetchNextOptions): Promise; + + hasMore(): Promise; +} diff --git a/lib/result/JsonResult.ts b/lib/result/JsonResultHandler.ts similarity index 73% rename from lib/result/JsonResult.ts rename to lib/result/JsonResultHandler.ts index 0c7daefa..bcc07e77 100644 --- a/lib/result/JsonResult.ts +++ b/lib/result/JsonResultHandler.ts @@ -1,34 +1,38 @@ import { ColumnCode } from '../hive/Types'; import { TRowSet, TTableSchema, TColumn, TColumnDesc } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; -import IOperationResult from './IOperationResult'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; import { getSchemaColumns, convertThriftValue } from './utils'; -export default class JsonResult implements IOperationResult { +export default class JsonResultHandler implements IResultsProvider> { private readonly context: IClientContext; + private readonly source: IResultsProvider; + private readonly schema: Array; - constructor(context: IClientContext, schema?: TTableSchema) { + constructor(context: IClientContext, source: IResultsProvider, schema?: TTableSchema) { this.context = context; + this.source = source; this.schema = getSchemaColumns(schema); } - async hasPendingData() { - return false; + public async hasMore() { + return this.source.hasMore(); } - async getValue(data?: Array): Promise> { - if (this.schema.length === 0 || !data) { + public async fetchNext(options: ResultsProviderFetchNextOptions) { + if (this.schema.length === 0) { return []; } - return data.reduce((result: Array, rowSet: TRowSet) => { - const columns = rowSet.columns || []; - const rows = this.getRows(columns, this.schema); + const data = await this.source.fetchNext(options); + if (!data) { + return []; + } - return result.concat(rows); - }, []); + const columns = data.columns || []; + return this.getRows(columns, this.schema); } private getRows(columns: Array, descriptors: Array): Array { diff --git a/lib/DBSQLOperation/FetchResultsHelper.ts b/lib/result/RowSetProvider.ts similarity index 88% rename from lib/DBSQLOperation/FetchResultsHelper.ts rename to lib/result/RowSetProvider.ts index 79f82603..b131e905 100644 --- a/lib/DBSQLOperation/FetchResultsHelper.ts +++ b/lib/result/RowSetProvider.ts @@ -8,6 +8,7 @@ import { import { ColumnCode, FetchType, Int64 } from '../hive/Types'; import Status from '../dto/Status'; import IClientContext from '../contracts/IClientContext'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; function checkIfOperationHasMoreRows(response: TFetchResultsResp): boolean { if (response.hasMoreRows) { @@ -35,7 +36,7 @@ function checkIfOperationHasMoreRows(response: TFetchResultsResp): boolean { return (columnValue?.values?.length || 0) > 0; } -export default class FetchResultsHelper { +export default class RowSetProvider implements IResultsProvider { private readonly context: IClientContext; private readonly operationHandle: TOperationHandle; @@ -79,7 +80,7 @@ export default class FetchResultsHelper { return response.results; } - public async fetch(maxRows: number) { + public async fetchNext({ limit }: ResultsProviderFetchNextOptions) { const prefetchedResponse = this.prefetchedResults.shift(); if (prefetchedResponse) { return this.processFetchResponse(prefetchedResponse); @@ -89,10 +90,14 @@ export default class FetchResultsHelper { const response = await driver.fetchResults({ operationHandle: this.operationHandle, orientation: this.fetchOrientation, - maxRows: new Int64(maxRows), + maxRows: new Int64(limit), fetchType: FetchType.Data, }); return this.processFetchResponse(response); } + + public async hasMore() { + return this.hasMoreRows; + } } diff --git a/tests/e2e/arrow.test.js b/tests/e2e/arrow.test.js index a75c3059..4118a116 100644 --- a/tests/e2e/arrow.test.js +++ b/tests/e2e/arrow.test.js @@ -1,8 +1,9 @@ const { expect } = require('chai'); +const sinon = require('sinon'); const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const ArrowResult = require('../../dist/result/ArrowResult').default; +const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; const globalConfig = require('../../dist/globalConfig').default; const fixtures = require('../fixtures/compatibility'); @@ -76,7 +77,7 @@ describe('Arrow support', () => { expect(result).to.deep.equal(expectedColumn); const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.not.instanceof(ArrowResult); + expect(resultHandler).to.be.not.instanceof(ArrowResultHandler); await operation.close(); }), @@ -93,7 +94,7 @@ describe('Arrow support', () => { expect(fixArrowResult(result)).to.deep.equal(expectedArrow); const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResult); + expect(resultHandler).to.be.instanceof(ArrowResultHandler); await operation.close(); }), @@ -110,7 +111,7 @@ describe('Arrow support', () => { expect(fixArrowResult(result)).to.deep.equal(expectedArrowNativeTypes); const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResult); + expect(resultHandler).to.be.instanceof(ArrowResultHandler); await operation.close(); }), @@ -130,14 +131,18 @@ describe('Arrow support', () => { // We use some internals here to check that server returned response with multiple batches const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResult); + expect(resultHandler).to.be.instanceof(ArrowResultHandler); - const rawData = await operation._data.fetch(rowsCount); + sinon.spy(operation._data, 'fetchNext'); + + const result = await resultHandler.fetchNext({ limit: rowsCount }); + + expect(operation._data.fetchNext.callCount).to.be.eq(1); + const rawData = await operation._data.fetchNext.firstCall.returnValue; // We don't know exact count of batches returned, it depends on server's configuration, // but with much enough rows there should be more than one result batch expect(rawData.arrowBatches?.length).to.be.gt(1); - const result = await resultHandler.getValue([rawData]); expect(result.length).to.be.eq(rowsCount); }); }); diff --git a/tests/e2e/cloudfetch.test.js b/tests/e2e/cloudfetch.test.js index 3997f6af..03b2cb60 100644 --- a/tests/e2e/cloudfetch.test.js +++ b/tests/e2e/cloudfetch.test.js @@ -3,7 +3,7 @@ const sinon = require('sinon'); const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const CloudFetchResult = require('../../dist/result/CloudFetchResult').default; +const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; const globalConfig = require('../../dist/globalConfig').default; const openSession = async () => { @@ -57,24 +57,24 @@ describe('CloudFetch', () => { // Check if we're actually getting data via CloudFetch const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceOf(CloudFetchResult); + expect(resultHandler).to.be.instanceOf(CloudFetchResultHandler); // Fetch first chunk and check if result handler behaves properly. // With the count of rows we queried, there should be at least one row set, // containing 8 result links. After fetching the first chunk, // result handler should download 5 of them and schedule the rest - expect(await resultHandler.hasPendingData()).to.be.false; + expect(await resultHandler.hasMore()).to.be.false; expect(resultHandler.pendingLinks.length).to.be.equal(0); expect(resultHandler.downloadedBatches.length).to.be.equal(0); - sinon.spy(operation._data, 'fetch'); + sinon.spy(operation._data, 'fetchNext'); const chunk = await operation.fetchChunk({ maxRows: 100000 }); // Count links returned from server - const resultSet = await operation._data.fetch.firstCall.returnValue; + const resultSet = await operation._data.fetchNext.firstCall.returnValue; const resultLinksCount = resultSet?.resultLinks?.length ?? 0; - expect(await resultHandler.hasPendingData()).to.be.true; + expect(await resultHandler.hasMore()).to.be.true; // expected batches minus first 5 already fetched expect(resultHandler.pendingLinks.length).to.be.equal( resultLinksCount - globalConfig.cloudFetchConcurrentDownloads, diff --git a/tests/unit/DBSQLOperation.test.js b/tests/unit/DBSQLOperation.test.js index 94834baf..ef9c89c9 100644 --- a/tests/unit/DBSQLOperation.test.js +++ b/tests/unit/DBSQLOperation.test.js @@ -6,9 +6,9 @@ const DBSQLOperation = require('../../dist/DBSQLOperation').default; const StatusError = require('../../dist/errors/StatusError').default; const OperationStateError = require('../../dist/errors/OperationStateError').default; const HiveDriverError = require('../../dist/errors/HiveDriverError').default; -const JsonResult = require('../../dist/result/JsonResult').default; -const ArrowResult = require('../../dist/result/ArrowResult').default; -const CloudFetchResult = require('../../dist/result/CloudFetchResult').default; +const JsonResultHandler = require('../../dist/result/JsonResultHandler').default; +const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; +const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; class OperationHandleMock { constructor(hasResultSet = true) { @@ -885,7 +885,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(JsonResult); + expect(resultHandler).to.be.instanceOf(JsonResultHandler); } arrowHandler: { @@ -895,7 +895,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(ArrowResult); + expect(resultHandler).to.be.instanceOf(ArrowResultHandler); } cloudFetchHandler: { @@ -905,7 +905,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(CloudFetchResult); + expect(resultHandler).to.be.instanceOf(CloudFetchResultHandler); } }); }); diff --git a/tests/unit/result/ArrowResult.test.js b/tests/unit/result/ArrowResultHandler.test.js similarity index 59% rename from tests/unit/result/ArrowResult.test.js rename to tests/unit/result/ArrowResultHandler.test.js index 27244190..03cdb5e1 100644 --- a/tests/unit/result/ArrowResult.test.js +++ b/tests/unit/result/ArrowResultHandler.test.js @@ -1,7 +1,8 @@ const { expect } = require('chai'); const fs = require('fs'); const path = require('path'); -const ArrowResult = require('../../../dist/result/ArrowResult').default; +const ArrowResultHandler = require('../../../dist/result/ArrowResultHandler').default; +const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); const sampleThriftSchema = { columns: [ @@ -84,40 +85,63 @@ const rowSetAllNulls = { ], }; -describe('ArrowResult', () => { +describe('ArrowResultHandler', () => { it('should not buffer any data', async () => { const context = {}; - const result = new ArrowResult(context, sampleThriftSchema, sampleArrowSchema); - await result.getValue([sampleRowSet1]); - expect(await result.hasPendingData()).to.be.false; + const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await rowSetProvider.hasMore()).to.be.true; + expect(await result.hasMore()).to.be.true; + + await result.fetchNext({ limit: 10000 }); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(await result.hasMore()).to.be.false; }); it('should convert data', async () => { const context = {}; - const result = new ArrowResult(context, sampleThriftSchema, sampleArrowSchema); - expect(await result.getValue([sampleRowSet1])).to.be.deep.eq([]); - expect(await result.getValue([sampleRowSet2])).to.be.deep.eq([]); - expect(await result.getValue([sampleRowSet3])).to.be.deep.eq([]); - expect(await result.getValue([sampleRowSet4])).to.be.deep.eq([{ 1: 1 }]); + + case1: { + const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + } + case2: { + const rowSetProvider = new RowSetProviderMock([sampleRowSet2]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + } + case3: { + const rowSetProvider = new RowSetProviderMock([sampleRowSet3]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + } + case4: { + const rowSetProvider = new RowSetProviderMock([sampleRowSet4]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([{ 1: 1 }]); + } }); it('should return empty array if no data to process', async () => { const context = {}; - const result = new ArrowResult(context, sampleThriftSchema, sampleArrowSchema); - expect(await result.getValue()).to.be.deep.eq([]); - expect(await result.getValue([])).to.be.deep.eq([]); + const rowSetProvider = new RowSetProviderMock(); + const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should return empty array if no schema available', async () => { const context = {}; - const result = new ArrowResult(context); - expect(await result.getValue([sampleRowSet4])).to.be.deep.eq([]); + const rowSetProvider = new RowSetProviderMock([sampleRowSet4]); + const result = new ArrowResultHandler(context, rowSetProvider); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should detect nulls', async () => { const context = {}; - const result = new ArrowResult(context, thriftSchemaAllNulls, arrowSchemaAllNulls); - expect(await result.getValue([rowSetAllNulls])).to.be.deep.eq([ + const rowSetProvider = new RowSetProviderMock([rowSetAllNulls]); + const result = new ArrowResultHandler(context, rowSetProvider, thriftSchemaAllNulls, arrowSchemaAllNulls); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { boolean_field: null, diff --git a/tests/unit/result/CloudFetchResult.test.js b/tests/unit/result/CloudFetchResultHandler.test.js similarity index 74% rename from tests/unit/result/CloudFetchResult.test.js rename to tests/unit/result/CloudFetchResultHandler.test.js index 20451093..e0a48151 100644 --- a/tests/unit/result/CloudFetchResult.test.js +++ b/tests/unit/result/CloudFetchResultHandler.test.js @@ -1,8 +1,9 @@ const { expect, AssertionError } = require('chai'); const sinon = require('sinon'); const Int64 = require('node-int64'); -const CloudFetchResult = require('../../../dist/result/CloudFetchResult').default; +const CloudFetchResultHandler = require('../../../dist/result/CloudFetchResultHandler').default; const globalConfig = require('../../../dist/globalConfig').default; +const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); const sampleThriftSchema = { columns: [ @@ -94,7 +95,7 @@ const sampleExpiredRowSet = { ], }; -describe('CloudFetchResult', () => { +describe('CloudFetchResultHandler', () => { let savedConcurrentDownloads; beforeEach(() => { @@ -107,24 +108,25 @@ describe('CloudFetchResult', () => { it('should report pending data if there are any', async () => { const context = {}; - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const rowSetProvider = new RowSetProviderMock(); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); case1: { result.pendingLinks = []; result.downloadedBatches = []; - expect(await result.hasPendingData()).to.be.false; + expect(await result.hasMore()).to.be.false; } case2: { result.pendingLinks = [{}]; // just anything here result.downloadedBatches = []; - expect(await result.hasPendingData()).to.be.true; + expect(await result.hasMore()).to.be.true; } case3: { result.pendingLinks = []; result.downloadedBatches = [{}]; // just anything here - expect(await result.hasPendingData()).to.be.true; + expect(await result.hasMore()).to.be.true; } }); @@ -132,22 +134,28 @@ describe('CloudFetchResult', () => { globalConfig.cloudFetchConcurrentDownloads = 0; // this will prevent it from downloading batches const context = {}; + const rowSetProvider = new RowSetProviderMock(); - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: true, status: 200, statusText: 'OK', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); const rowSets = [sampleRowSet1, sampleEmptyRowSet, sampleRowSet2]; const expectedLinksCount = rowSets.reduce((prev, item) => prev + (item.resultLinks?.length ?? 0), 0); - const batches = await result.getBatches(rowSets); + const batches = []; + for (const rowSet of rowSets) { + const items = await result.getBatches(rowSet); + batches.push(...items); + } + expect(batches.length).to.be.equal(0); expect(result.fetch.called).to.be.false; expect(result.pendingLinks.length).to.be.equal(expectedLinksCount); @@ -157,24 +165,31 @@ describe('CloudFetchResult', () => { globalConfig.cloudFetchConcurrentDownloads = 2; const context = {}; + const rowSet = { + startRowOffset: 0, + resultLinks: [...sampleRowSet1.resultLinks, ...sampleRowSet2.resultLinks], + }; + const expectedLinksCount = rowSet.resultLinks.length; + const rowSetProvider = new RowSetProviderMock([rowSet]); - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: true, status: 200, statusText: 'OK', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleRowSet1, sampleRowSet2]; - const expectedLinksCount = rowSets.reduce((prev, item) => prev + (item.resultLinks?.length ?? 0), 0); + expect(await rowSetProvider.hasMore()).to.be.true; initialFetch: { - const batches = await result.getBatches(rowSets); - expect(batches.length).to.be.equal(1); + const items = await result.fetchNext({ limit: 10000 }); + expect(items.length).to.be.gt(0); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads); expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads); expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 1); @@ -182,8 +197,10 @@ describe('CloudFetchResult', () => { secondFetch: { // It should return previously fetched batch, not performing additional network requests - const batches = await result.getBatches([]); - expect(batches.length).to.be.equal(1); + const items = await result.fetchNext({ limit: 10000 }); + expect(items.length).to.be.gt(0); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads); // no new fetches expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads); expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 2); @@ -191,8 +208,10 @@ describe('CloudFetchResult', () => { thirdFetch: { // Now buffer should be empty, and it should fetch next batches - const batches = await result.getBatches([]); - expect(batches.length).to.be.equal(1); + const items = await result.fetchNext({ limit: 10000 }); + expect(items.length).to.be.gt(0); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads * 2); expect(result.pendingLinks.length).to.be.equal( expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads * 2, @@ -205,22 +224,21 @@ describe('CloudFetchResult', () => { globalConfig.cloudFetchConcurrentDownloads = 1; const context = {}; + const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); - const result = new CloudFetchResult({}, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: false, status: 500, statusText: 'Internal Server Error', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleRowSet1]; - try { - await result.getBatches(rowSets); + await result.fetchNext({ limit: 10000 }); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { @@ -233,22 +251,21 @@ describe('CloudFetchResult', () => { it('should handle expired links', async () => { const context = {}; + const rowSetProvider = new RowSetProviderMock([sampleExpiredRowSet]); - const result = new CloudFetchResult(context, sampleThriftSchema, sampleArrowSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); sinon.stub(result, 'fetch').returns( Promise.resolve({ ok: true, status: 200, statusText: 'OK', - arrayBuffer: async () => sampleArrowBatch, + arrayBuffer: async () => Buffer.concat([sampleArrowSchema, sampleArrowBatch]), }), ); - const rowSets = [sampleExpiredRowSet]; - try { - await result.getBatches(rowSets); + await result.fetchNext({ limit: 10000 }); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { diff --git a/tests/unit/result/JsonResult.test.js b/tests/unit/result/JsonResultHandler.test.js similarity index 86% rename from tests/unit/result/JsonResult.test.js rename to tests/unit/result/JsonResultHandler.test.js index f7e90259..db6ff01d 100644 --- a/tests/unit/result/JsonResult.test.js +++ b/tests/unit/result/JsonResultHandler.test.js @@ -1,7 +1,8 @@ const { expect } = require('chai'); -const JsonResult = require('../../../dist/result/JsonResult').default; +const JsonResultHandler = require('../../../dist/result/JsonResultHandler').default; const { TCLIService_types } = require('../../../').thrift; const Int64 = require('node-int64'); +const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); const getColumnSchema = (columnName, type, position) => { if (type === undefined) { @@ -27,7 +28,7 @@ const getColumnSchema = (columnName, type, position) => { }; }; -describe('JsonResult', () => { +describe('JsonResultHandler', () => { it('should not buffer any data', async () => { const schema = { columns: [getColumnSchema('table.id', TCLIService_types.TTypeId.STRING_TYPE, 1)], @@ -39,10 +40,15 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); - await result.getValue(data); - expect(await result.hasPendingData()).to.be.false; + const result = new JsonResultHandler(context, rowSetProvider, schema); + expect(await rowSetProvider.hasMore()).to.be.true; + expect(await result.hasMore()).to.be.true; + + await result.fetchNext({ limit: 10000 }); + expect(await rowSetProvider.hasMore()).to.be.false; + expect(await result.hasMore()).to.be.false; }); it('should convert schema with primitive types to json', async () => { @@ -127,10 +133,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.str': 'a', 'table.int64': 282578800148737, @@ -199,10 +206,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.array': ['a', 'b'], 'table.map': { key: 12 }, @@ -218,44 +226,11 @@ describe('JsonResult', () => { ]); }); - it('should merge data items', async () => { - const schema = { - columns: [getColumnSchema('table.id', TCLIService_types.TTypeId.STRING_TYPE, 1)], - }; - const data = [ - { - columns: [ - { - stringVal: { values: ['0', '1'] }, - }, - ], - }, - {}, // it should also handle empty sets - { - columns: [ - { - stringVal: { values: ['2', '3'] }, - }, - ], - }, - ]; - - const context = {}; - - const result = new JsonResult(context, schema); - - expect(await result.getValue(data)).to.be.deep.eq([ - { 'table.id': '0' }, - { 'table.id': '1' }, - { 'table.id': '2' }, - { 'table.id': '3' }, - ]); - }); - it('should detect nulls', () => { const context = {}; + const rowSetProvider = new RowSetProviderMock(); - const result = new JsonResult(context, null); + const result = new JsonResultHandler(context, rowSetProvider, null); const buf = Buffer.from([0x55, 0xaa, 0xc3]); [ @@ -368,10 +343,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.str': null, 'table.int64': null, @@ -399,11 +375,10 @@ describe('JsonResult', () => { }; const context = {}; + const rowSetProvider = new RowSetProviderMock(); - const result = new JsonResult(context, schema); - - expect(await result.getValue()).to.be.deep.eq([]); - expect(await result.getValue([])).to.be.deep.eq([]); + const result = new JsonResultHandler(context, rowSetProvider, schema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should return empty array if no schema available', async () => { @@ -418,10 +393,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context); + const result = new JsonResultHandler(context, rowSetProvider); - expect(await result.getValue(data)).to.be.deep.eq([]); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); }); it('should return raw data if types are not specified', async () => { @@ -453,10 +429,11 @@ describe('JsonResult', () => { ]; const context = {}; + const rowSetProvider = new RowSetProviderMock(data); - const result = new JsonResult(context, schema); + const result = new JsonResultHandler(context, rowSetProvider, schema); - expect(await result.getValue(data)).to.be.deep.eq([ + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ { 'table.array': '["a", "b"]', 'table.map': '{ "key": 12 }', diff --git a/tests/unit/result/compatibility.test.js b/tests/unit/result/compatibility.test.js index 5b27d39e..6d047d57 100644 --- a/tests/unit/result/compatibility.test.js +++ b/tests/unit/result/compatibility.test.js @@ -1,31 +1,36 @@ const { expect } = require('chai'); -const ArrowResult = require('../../../dist/result/ArrowResult').default; -const JsonResult = require('../../../dist/result/JsonResult').default; +const ArrowResultHandler = require('../../../dist/result/ArrowResultHandler').default; +const JsonResultHandler = require('../../../dist/result/JsonResultHandler').default; const { fixArrowResult } = require('../../fixtures/compatibility'); const fixtureColumn = require('../../fixtures/compatibility/column'); const fixtureArrow = require('../../fixtures/compatibility/arrow'); const fixtureArrowNT = require('../../fixtures/compatibility/arrow_native_types'); +const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); + describe('Result handlers compatibility tests', () => { it('colum-based data', async () => { const context = {}; - const result = new JsonResult(context, fixtureColumn.schema); - const rows = await result.getValue(fixtureColumn.rowSets); + const rowSetProvider = new RowSetProviderMock(fixtureColumn.rowSets); + const result = new JsonResultHandler(context, rowSetProvider, fixtureColumn.schema); + const rows = await result.fetchNext({ limit: 10000 }); expect(rows).to.deep.equal(fixtureColumn.expected); }); it('arrow-based data without native types', async () => { const context = {}; - const result = new ArrowResult(context, fixtureArrow.schema, fixtureArrow.arrowSchema); - const rows = await result.getValue(fixtureArrow.rowSets); + const rowSetProvider = new RowSetProviderMock(fixtureArrow.rowSets); + const result = new ArrowResultHandler(context, rowSetProvider, fixtureArrow.schema, fixtureArrow.arrowSchema); + const rows = await result.fetchNext({ limit: 10000 }); expect(fixArrowResult(rows)).to.deep.equal(fixtureArrow.expected); }); it('arrow-based data with native types', async () => { const context = {}; - const result = new ArrowResult(context, fixtureArrowNT.schema, fixtureArrowNT.arrowSchema); - const rows = await result.getValue(fixtureArrowNT.rowSets); + const rowSetProvider = new RowSetProviderMock(fixtureArrowNT.rowSets); + const result = new ArrowResultHandler(context, rowSetProvider, fixtureArrowNT.schema, fixtureArrowNT.arrowSchema); + const rows = await result.fetchNext({ limit: 10000 }); expect(fixArrowResult(rows)).to.deep.equal(fixtureArrowNT.expected); }); }); diff --git a/tests/unit/result/fixtures/RowSetProviderMock.js b/tests/unit/result/fixtures/RowSetProviderMock.js new file mode 100644 index 00000000..1e878a01 --- /dev/null +++ b/tests/unit/result/fixtures/RowSetProviderMock.js @@ -0,0 +1,15 @@ +class RowSetProviderMock { + constructor(rowSets) { + this.rowSets = Array.isArray(rowSets) ? [...rowSets] : []; + } + + async hasMore() { + return this.rowSets.length > 0; + } + + async fetchNext() { + return this.rowSets.shift(); + } +} + +module.exports = RowSetProviderMock; From 80c660e33af1c063e53645047bde93bc056558b3 Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Wed, 15 Nov 2023 01:07:59 +0200 Subject: [PATCH 02/11] Refactoring: Convert global config to client config and make it available through client context (#202) Convert global config to client config and make it available through client context Signed-off-by: Levko Kravets --- lib/DBSQLClient.ts | 27 ++++- lib/DBSQLSession.ts | 12 +- lib/connection/connections/HttpConnection.ts | 13 ++- lib/contracts/IClientContext.ts | 16 +++ lib/globalConfig.ts | 27 ----- lib/hive/Commands/BaseCommand.ts | 19 +-- lib/hive/HiveDriver.ts | 42 +++---- lib/result/CloudFetchResultHandler.ts | 4 +- tests/e2e/arrow.test.js | 104 ++++++++++------- tests/e2e/batched_fetch.test.js | 27 ++--- tests/e2e/cloudfetch.test.js | 32 ++--- tests/e2e/data_types.test.js | 26 ++--- tests/e2e/timeouts.test.js | 27 ++--- tests/unit/DBSQLClient.test.js | 4 +- tests/unit/DBSQLSession.test.js | 4 + .../connections/HttpConnection.test.js | 110 +++++++++++++----- tests/unit/hive/commands/BaseCommand.test.js | 105 +++++++++++++---- .../result/CloudFetchResultHandler.test.js | 64 +++++----- 18 files changed, 397 insertions(+), 266 deletions(-) delete mode 100644 lib/globalConfig.ts diff --git a/lib/DBSQLClient.ts b/lib/DBSQLClient.ts index 779492e6..5c25d540 100644 --- a/lib/DBSQLClient.ts +++ b/lib/DBSQLClient.ts @@ -5,7 +5,7 @@ import TCLIService from '../thrift/TCLIService'; import { TProtocolVersion } from '../thrift/TCLIService_types'; import IDBSQLClient, { ClientOptions, ConnectionOptions, OpenSessionRequest } from './contracts/IDBSQLClient'; import IDriver from './contracts/IDriver'; -import IClientContext from './contracts/IClientContext'; +import IClientContext, { ClientConfig } from './contracts/IClientContext'; import HiveDriver from './hive/HiveDriver'; import { Int64 } from './hive/Types'; import DBSQLSession from './DBSQLSession'; @@ -46,6 +46,8 @@ function getInitialNamespaceOptions(catalogName?: string, schemaName?: string) { export default class DBSQLClient extends EventEmitter implements IDBSQLClient, IClientContext { private static defaultLogger?: IDBSQLLogger; + private readonly config: ClientConfig; + private connectionProvider?: IConnectionProvider; private authProvider?: IAuthentication; @@ -69,8 +71,25 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I return this.defaultLogger; } + private static getDefaultConfig(): ClientConfig { + return { + arrowEnabled: true, + useArrowNativeTypes: true, + socketTimeout: 15 * 60 * 1000, // 15 minutes + + retryMaxAttempts: 30, + retriesTimeout: 900 * 1000, + retryDelayMin: 1 * 1000, + retryDelayMax: 60 * 1000, + + useCloudFetch: false, + cloudFetchConcurrentDownloads: 10, + }; + } + constructor(options?: ClientOptions) { super(); + this.config = DBSQLClient.getDefaultConfig(); this.logger = options?.logger ?? DBSQLClient.getDefaultLogger(); this.logger.log(LogLevel.info, 'Created DBSQLClient'); } @@ -129,7 +148,7 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I public async connect(options: ConnectionOptions, authProvider?: IAuthentication): Promise { this.authProvider = this.initAuthProvider(options, authProvider); - this.connectionProvider = new HttpConnection(this.getConnectionOptions(options)); + this.connectionProvider = new HttpConnection(this.getConnectionOptions(options), this); const thriftConnection = await this.connectionProvider.getThriftConnection(); @@ -196,6 +215,10 @@ export default class DBSQLClient extends EventEmitter implements IDBSQLClient, I this.authProvider = undefined; } + public getConfig(): ClientConfig { + return this.config; + } + public getLogger(): IDBSQLLogger { return this.logger; } diff --git a/lib/DBSQLSession.ts b/lib/DBSQLSession.ts index ccba880a..cf14dcd9 100644 --- a/lib/DBSQLSession.ts +++ b/lib/DBSQLSession.ts @@ -33,11 +33,10 @@ import { definedOrError } from './utils'; import CloseableCollection from './utils/CloseableCollection'; import { LogLevel } from './contracts/IDBSQLLogger'; import HiveDriverError from './errors/HiveDriverError'; -import globalConfig from './globalConfig'; import StagingError from './errors/StagingError'; import { DBSQLParameter, DBSQLParameterValue } from './DBSQLParameter'; import ParameterError from './errors/ParameterError'; -import IClientContext from './contracts/IClientContext'; +import IClientContext, { ClientConfig } from './contracts/IClientContext'; const defaultMaxRows = 100000; @@ -59,11 +58,11 @@ function getDirectResultsOptions(maxRows: number | null = defaultMaxRows) { }; } -function getArrowOptions(): { +function getArrowOptions(config: ClientConfig): { canReadArrowResult: boolean; useArrowNativeTypes?: TSparkArrowTypes; } { - const { arrowEnabled = true, useArrowNativeTypes = true } = globalConfig; + const { arrowEnabled = true, useArrowNativeTypes = true } = config; if (!arrowEnabled) { return { @@ -187,14 +186,15 @@ export default class DBSQLSession implements IDBSQLSession { public async executeStatement(statement: string, options: ExecuteStatementOptions = {}): Promise { await this.failIfClosed(); const driver = await this.context.getDriver(); + const clientConfig = this.context.getConfig(); const operationPromise = driver.executeStatement({ sessionHandle: this.sessionHandle, statement, queryTimeout: options.queryTimeout, runAsync: true, ...getDirectResultsOptions(options.maxRows), - ...getArrowOptions(), - canDownloadResult: options.useCloudFetch ?? globalConfig.useCloudFetch, + ...getArrowOptions(clientConfig), + canDownloadResult: options.useCloudFetch ?? clientConfig.useCloudFetch, parameters: getQueryParameters(this.sessionHandle, options.namedParameters, options.ordinalParameters), }); const response = await this.handleResponse(operationPromise); diff --git a/lib/connection/connections/HttpConnection.ts b/lib/connection/connections/HttpConnection.ts index 79f24c3d..2543dbbf 100644 --- a/lib/connection/connections/HttpConnection.ts +++ b/lib/connection/connections/HttpConnection.ts @@ -6,21 +6,24 @@ import { ProxyAgent } from 'proxy-agent'; import IConnectionProvider from '../contracts/IConnectionProvider'; import IConnectionOptions, { ProxyOptions } from '../contracts/IConnectionOptions'; -import globalConfig from '../../globalConfig'; +import IClientContext from '../../contracts/IClientContext'; import ThriftHttpConnection from './ThriftHttpConnection'; export default class HttpConnection implements IConnectionProvider { private readonly options: IConnectionOptions; + private readonly context: IClientContext; + private headers: HeadersInit = {}; private connection?: ThriftHttpConnection; private agent?: http.Agent; - constructor(options: IConnectionOptions) { + constructor(options: IConnectionOptions, context: IClientContext) { this.options = options; + this.context = context; } public setHeaders(headers: HeadersInit) { @@ -44,11 +47,12 @@ export default class HttpConnection implements IConnectionProvider { } private getAgentDefaultOptions(): http.AgentOptions { + const clientConfig = this.context.getConfig(); return { keepAlive: true, maxSockets: 5, keepAliveMsecs: 10000, - timeout: this.options.socketTimeout ?? globalConfig.socketTimeout, + timeout: this.options.socketTimeout ?? clientConfig.socketTimeout, }; } @@ -89,6 +93,7 @@ export default class HttpConnection implements IConnectionProvider { public async getThriftConnection(): Promise { if (!this.connection) { const { options } = this; + const clientConfig = this.context.getConfig(); const agent = await this.getAgent(); this.connection = new ThriftHttpConnection( @@ -99,7 +104,7 @@ export default class HttpConnection implements IConnectionProvider { }, { agent, - timeout: options.socketTimeout ?? globalConfig.socketTimeout, + timeout: options.socketTimeout ?? clientConfig.socketTimeout, headers: { ...options.headers, ...this.headers, diff --git a/lib/contracts/IClientContext.ts b/lib/contracts/IClientContext.ts index 8df0f7e9..062d0795 100644 --- a/lib/contracts/IClientContext.ts +++ b/lib/contracts/IClientContext.ts @@ -3,7 +3,23 @@ import IDriver from './IDriver'; import IConnectionProvider from '../connection/contracts/IConnectionProvider'; import TCLIService from '../../thrift/TCLIService'; +export interface ClientConfig { + arrowEnabled?: boolean; + useArrowNativeTypes?: boolean; + socketTimeout: number; + + retryMaxAttempts: number; + retriesTimeout: number; // in milliseconds + retryDelayMin: number; // in milliseconds + retryDelayMax: number; // in milliseconds + + useCloudFetch: boolean; + cloudFetchConcurrentDownloads: number; +} + export default interface IClientContext { + getConfig(): ClientConfig; + getLogger(): IDBSQLLogger; getConnectionProvider(): Promise; diff --git a/lib/globalConfig.ts b/lib/globalConfig.ts deleted file mode 100644 index ad477d8b..00000000 --- a/lib/globalConfig.ts +++ /dev/null @@ -1,27 +0,0 @@ -interface GlobalConfig { - arrowEnabled?: boolean; - useArrowNativeTypes?: boolean; - socketTimeout: number; - - retryMaxAttempts: number; - retriesTimeout: number; // in milliseconds - retryDelayMin: number; // in milliseconds - retryDelayMax: number; // in milliseconds - - useCloudFetch: boolean; - cloudFetchConcurrentDownloads: number; -} - -export default { - arrowEnabled: true, - useArrowNativeTypes: true, - socketTimeout: 15 * 60 * 1000, // 15 minutes - - retryMaxAttempts: 30, - retriesTimeout: 900 * 1000, - retryDelayMin: 1 * 1000, - retryDelayMax: 60 * 1000, - - useCloudFetch: false, - cloudFetchConcurrentDownloads: 10, -} satisfies GlobalConfig; diff --git a/lib/hive/Commands/BaseCommand.ts b/lib/hive/Commands/BaseCommand.ts index 3fff1946..f059d9e1 100644 --- a/lib/hive/Commands/BaseCommand.ts +++ b/lib/hive/Commands/BaseCommand.ts @@ -1,16 +1,16 @@ import { Thrift } from 'thrift'; import TCLIService from '../../../thrift/TCLIService'; import HiveDriverError from '../../errors/HiveDriverError'; -import globalConfig from '../../globalConfig'; +import IClientContext, { ClientConfig } from '../../contracts/IClientContext'; interface CommandExecutionInfo { startTime: number; // in milliseconds attempt: number; } -function getRetryDelay(attempt: number): number { +function getRetryDelay(attempt: number, config: ClientConfig): number { const scale = Math.max(1, 1.5 ** (attempt - 1)); // ensure scale >= 1 - return Math.min(globalConfig.retryDelayMin * scale, globalConfig.retryDelayMax); + return Math.min(config.retryDelayMin * scale, config.retryDelayMax); } function delay(milliseconds: number): Promise { @@ -22,8 +22,11 @@ function delay(milliseconds: number): Promise { export default abstract class BaseCommand { protected client: TCLIService.Client; - constructor(client: TCLIService.Client) { + protected context: IClientContext; + + constructor(client: TCLIService.Client, context: IClientContext) { this.client = client; + this.context = context; } protected executeCommand(request: object, command: Function | void): Promise { @@ -49,19 +52,21 @@ export default abstract class BaseCommand { case 503: // Service Unavailable info.attempt += 1; + const clientConfig = this.context.getConfig(); + // Delay interval depends on current attempt - the more attempts we do // the longer the interval will be // TODO: Respect `Retry-After` header (PECO-729) - const retryDelay = getRetryDelay(info.attempt); + const retryDelay = getRetryDelay(info.attempt, clientConfig); - const attemptsExceeded = info.attempt >= globalConfig.retryMaxAttempts; + const attemptsExceeded = info.attempt >= clientConfig.retryMaxAttempts; if (attemptsExceeded) { throw new HiveDriverError( `Hive driver: ${error.statusCode} when connecting to resource. Max retry count exceeded.`, ); } - const timeoutExceeded = Date.now() - info.startTime + retryDelay >= globalConfig.retriesTimeout; + const timeoutExceeded = Date.now() - info.startTime + retryDelay >= clientConfig.retriesTimeout; if (timeoutExceeded) { throw new HiveDriverError( `Hive driver: ${error.statusCode} when connecting to resource. Retry timeout exceeded.`, diff --git a/lib/hive/HiveDriver.ts b/lib/hive/HiveDriver.ts index 9b7384b1..7afd03a5 100644 --- a/lib/hive/HiveDriver.ts +++ b/lib/hive/HiveDriver.ts @@ -58,127 +58,127 @@ export default class HiveDriver implements IDriver { async openSession(request: TOpenSessionReq) { const client = await this.context.getClient(); - const action = new OpenSessionCommand(client); + const action = new OpenSessionCommand(client, this.context); return action.execute(request); } async closeSession(request: TCloseSessionReq) { const client = await this.context.getClient(); - const command = new CloseSessionCommand(client); + const command = new CloseSessionCommand(client, this.context); return command.execute(request); } async executeStatement(request: TExecuteStatementReq) { const client = await this.context.getClient(); - const command = new ExecuteStatementCommand(client); + const command = new ExecuteStatementCommand(client, this.context); return command.execute(request); } async getResultSetMetadata(request: TGetResultSetMetadataReq) { const client = await this.context.getClient(); - const command = new GetResultSetMetadataCommand(client); + const command = new GetResultSetMetadataCommand(client, this.context); return command.execute(request); } async fetchResults(request: TFetchResultsReq) { const client = await this.context.getClient(); - const command = new FetchResultsCommand(client); + const command = new FetchResultsCommand(client, this.context); return command.execute(request); } async getInfo(request: TGetInfoReq) { const client = await this.context.getClient(); - const command = new GetInfoCommand(client); + const command = new GetInfoCommand(client, this.context); return command.execute(request); } async getTypeInfo(request: TGetTypeInfoReq) { const client = await this.context.getClient(); - const command = new GetTypeInfoCommand(client); + const command = new GetTypeInfoCommand(client, this.context); return command.execute(request); } async getCatalogs(request: TGetCatalogsReq) { const client = await this.context.getClient(); - const command = new GetCatalogsCommand(client); + const command = new GetCatalogsCommand(client, this.context); return command.execute(request); } async getSchemas(request: TGetSchemasReq) { const client = await this.context.getClient(); - const command = new GetSchemasCommand(client); + const command = new GetSchemasCommand(client, this.context); return command.execute(request); } async getTables(request: TGetTablesReq) { const client = await this.context.getClient(); - const command = new GetTablesCommand(client); + const command = new GetTablesCommand(client, this.context); return command.execute(request); } async getTableTypes(request: TGetTableTypesReq) { const client = await this.context.getClient(); - const command = new GetTableTypesCommand(client); + const command = new GetTableTypesCommand(client, this.context); return command.execute(request); } async getColumns(request: TGetColumnsReq) { const client = await this.context.getClient(); - const command = new GetColumnsCommand(client); + const command = new GetColumnsCommand(client, this.context); return command.execute(request); } async getFunctions(request: TGetFunctionsReq) { const client = await this.context.getClient(); - const command = new GetFunctionsCommand(client); + const command = new GetFunctionsCommand(client, this.context); return command.execute(request); } async getPrimaryKeys(request: TGetPrimaryKeysReq) { const client = await this.context.getClient(); - const command = new GetPrimaryKeysCommand(client); + const command = new GetPrimaryKeysCommand(client, this.context); return command.execute(request); } async getCrossReference(request: TGetCrossReferenceReq) { const client = await this.context.getClient(); - const command = new GetCrossReferenceCommand(client); + const command = new GetCrossReferenceCommand(client, this.context); return command.execute(request); } async getOperationStatus(request: TGetOperationStatusReq) { const client = await this.context.getClient(); - const command = new GetOperationStatusCommand(client); + const command = new GetOperationStatusCommand(client, this.context); return command.execute(request); } async cancelOperation(request: TCancelOperationReq) { const client = await this.context.getClient(); - const command = new CancelOperationCommand(client); + const command = new CancelOperationCommand(client, this.context); return command.execute(request); } async closeOperation(request: TCloseOperationReq) { const client = await this.context.getClient(); - const command = new CloseOperationCommand(client); + const command = new CloseOperationCommand(client, this.context); return command.execute(request); } async getDelegationToken(request: TGetDelegationTokenReq) { const client = await this.context.getClient(); - const command = new GetDelegationTokenCommand(client); + const command = new GetDelegationTokenCommand(client, this.context); return command.execute(request); } async cancelDelegationToken(request: TCancelDelegationTokenReq) { const client = await this.context.getClient(); - const command = new CancelDelegationTokenCommand(client); + const command = new CancelDelegationTokenCommand(client, this.context); return command.execute(request); } async renewDelegationToken(request: TRenewDelegationTokenReq) { const client = await this.context.getClient(); - const command = new RenewDelegationTokenCommand(client); + const command = new RenewDelegationTokenCommand(client, this.context); return command.execute(request); } } diff --git a/lib/result/CloudFetchResultHandler.ts b/lib/result/CloudFetchResultHandler.ts index a49e8714..db52b95f 100644 --- a/lib/result/CloudFetchResultHandler.ts +++ b/lib/result/CloudFetchResultHandler.ts @@ -4,7 +4,6 @@ import { TRowSet, TSparkArrowResultLink, TTableSchema } from '../../thrift/TCLIS import IClientContext from '../contracts/IClientContext'; import IResultsProvider from './IResultsProvider'; import ArrowResultHandler from './ArrowResultHandler'; -import globalConfig from '../globalConfig'; export default class CloudFetchResultHandler extends ArrowResultHandler { private pendingLinks: Array = []; @@ -30,7 +29,8 @@ export default class CloudFetchResultHandler extends ArrowResultHandler { }); if (this.downloadedBatches.length === 0) { - const links = this.pendingLinks.splice(0, globalConfig.cloudFetchConcurrentDownloads); + const clientConfig = this.context.getConfig(); + const links = this.pendingLinks.splice(0, clientConfig.cloudFetchConcurrentDownloads); const tasks = links.map((link) => this.downloadLink(link)); const batches = await Promise.all(tasks); this.downloadedBatches.push(...batches); diff --git a/tests/e2e/arrow.test.js b/tests/e2e/arrow.test.js index 4118a116..08fe17d8 100644 --- a/tests/e2e/arrow.test.js +++ b/tests/e2e/arrow.test.js @@ -4,7 +4,6 @@ const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; -const globalConfig = require('../../dist/globalConfig').default; const fixtures = require('../fixtures/compatibility'); const { expected: expectedColumn } = require('../fixtures/compatibility/column'); @@ -12,9 +11,15 @@ const { expected: expectedArrow } = require('../fixtures/compatibility/arrow'); const { expected: expectedArrowNativeTypes } = require('../fixtures/compatibility/arrow_native_types'); const { fixArrowResult } = fixtures; -async function openSession() { +async function openSession(customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -51,9 +56,9 @@ async function initializeTable(session, tableName) { describe('Arrow support', () => { const tableName = `dbsql_nodejs_sdk_e2e_arrow_${config.tableSuffix}`; - function createTest(testBody) { + function createTest(testBody, customConfig) { return async () => { - const session = await openSession(); + const session = await openSession(customConfig); try { await initializeTable(session, tableName); await testBody(session); @@ -69,60 +74,69 @@ describe('Arrow support', () => { it( 'should not use arrow if disabled', - createTest(async (session) => { - globalConfig.arrowEnabled = false; - - const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); - const result = await operation.fetchAll(); - expect(result).to.deep.equal(expectedColumn); - - const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.not.instanceof(ArrowResultHandler); - - await operation.close(); - }), + createTest( + async (session) => { + const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); + const result = await operation.fetchAll(); + expect(result).to.deep.equal(expectedColumn); + + const resultHandler = await operation.getResultHandler(); + expect(resultHandler).to.be.not.instanceof(ArrowResultHandler); + + await operation.close(); + }, + { + arrowEnabled: false, + }, + ), ); it( 'should use arrow with native types disabled', - createTest(async (session) => { - globalConfig.arrowEnabled = true; - globalConfig.useArrowNativeTypes = false; - - const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); - const result = await operation.fetchAll(); - expect(fixArrowResult(result)).to.deep.equal(expectedArrow); - - const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResultHandler); - - await operation.close(); - }), + createTest( + async (session) => { + const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); + const result = await operation.fetchAll(); + expect(fixArrowResult(result)).to.deep.equal(expectedArrow); + + const resultHandler = await operation.getResultHandler(); + expect(resultHandler).to.be.instanceof(ArrowResultHandler); + + await operation.close(); + }, + { + arrowEnabled: true, + useArrowNativeTypes: false, + }, + ), ); it( 'should use arrow with native types enabled', - createTest(async (session) => { - globalConfig.arrowEnabled = true; - globalConfig.useArrowNativeTypes = true; - - const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); - const result = await operation.fetchAll(); - expect(fixArrowResult(result)).to.deep.equal(expectedArrowNativeTypes); - - const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResultHandler); - - await operation.close(); - }), + createTest( + async (session) => { + const operation = await session.executeStatement(`SELECT * FROM ${tableName}`); + const result = await operation.fetchAll(); + expect(fixArrowResult(result)).to.deep.equal(expectedArrowNativeTypes); + + const resultHandler = await operation.getResultHandler(); + expect(resultHandler).to.be.instanceof(ArrowResultHandler); + + await operation.close(); + }, + { + arrowEnabled: true, + useArrowNativeTypes: true, + }, + ), ); it('should handle multiple batches in response', async () => { - globalConfig.arrowEnabled = true; - const rowsCount = 10000; - const session = await openSession(); + const session = await openSession({ + arrowEnabled: true, + }); const operation = await session.executeStatement(` SELECT * FROM range(0, ${rowsCount}) AS t1 diff --git a/tests/e2e/batched_fetch.test.js b/tests/e2e/batched_fetch.test.js index 5218088e..2e3059a9 100644 --- a/tests/e2e/batched_fetch.test.js +++ b/tests/e2e/batched_fetch.test.js @@ -3,11 +3,16 @@ const sinon = require('sinon'); const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const globalConfig = require('../../dist/globalConfig').default; -const openSession = async () => { +async function openSession(customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -18,17 +23,9 @@ const openSession = async () => { initialCatalog: config.database[0], initialSchema: config.database[1], }); -}; +} describe('Data fetching', () => { - beforeEach(() => { - globalConfig.arrowEnabled = false; - }); - - afterEach(() => { - globalConfig.arrowEnabled = true; - }); - const query = ` SELECT * FROM range(0, 1000) AS t1 @@ -36,7 +33,7 @@ describe('Data fetching', () => { `; it('fetch chunks should return a max row set of chunkSize', async () => { - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); sinon.spy(session.context.driver, 'fetchResults'); try { // set `maxRows` to null to disable direct results so all the data are fetched through `driver.fetchResults` @@ -51,7 +48,7 @@ describe('Data fetching', () => { }); it('fetch all should fetch all records', async () => { - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); sinon.spy(session.context.driver, 'fetchResults'); try { // set `maxRows` to null to disable direct results so all the data are fetched through `driver.fetchResults` @@ -66,7 +63,7 @@ describe('Data fetching', () => { }); it('should fetch all records if they fit within directResults response', async () => { - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); sinon.spy(session.context.driver, 'fetchResults'); try { // here `maxRows` enables direct results with limit of the first batch @@ -81,7 +78,7 @@ describe('Data fetching', () => { }); it('should fetch all records if only part of them fit within directResults response', async () => { - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); sinon.spy(session.context.driver, 'fetchResults'); try { // here `maxRows` enables direct results with limit of the first batch diff --git a/tests/e2e/cloudfetch.test.js b/tests/e2e/cloudfetch.test.js index 03b2cb60..e2b564db 100644 --- a/tests/e2e/cloudfetch.test.js +++ b/tests/e2e/cloudfetch.test.js @@ -4,11 +4,16 @@ const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; -const globalConfig = require('../../dist/globalConfig').default; -const openSession = async () => { +async function openSession(customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -19,25 +24,14 @@ const openSession = async () => { initialCatalog: config.database[0], initialSchema: config.database[1], }); -}; +} // This suite takes a while to execute, and in this case it's expected. // If one day it starts to fail with timeouts - you may consider to just increase timeout for it describe('CloudFetch', () => { - let savedConcurrentDownloads; - - beforeEach(() => { - savedConcurrentDownloads = globalConfig.cloudFetchConcurrentDownloads; - }); - - afterEach(() => { - globalConfig.cloudFetchConcurrentDownloads = savedConcurrentDownloads; - }); - it('should fetch data', async () => { - globalConfig.cloudFetchConcurrentDownloads = 5; - - const session = await openSession(); + const cloudFetchConcurrentDownloads = 5; + const session = await openSession({ cloudFetchConcurrentDownloads }); const queriedRowsCount = 10000000; // result has to be quite big to enable CloudFetch const operation = await session.executeStatement( @@ -76,10 +70,8 @@ describe('CloudFetch', () => { expect(await resultHandler.hasMore()).to.be.true; // expected batches minus first 5 already fetched - expect(resultHandler.pendingLinks.length).to.be.equal( - resultLinksCount - globalConfig.cloudFetchConcurrentDownloads, - ); - expect(resultHandler.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 1); + expect(resultHandler.pendingLinks.length).to.be.equal(resultLinksCount - cloudFetchConcurrentDownloads); + expect(resultHandler.downloadedBatches.length).to.be.equal(cloudFetchConcurrentDownloads - 1); let fetchedRowCount = chunk.length; while (await operation.hasMoreRows()) { diff --git a/tests/e2e/data_types.test.js b/tests/e2e/data_types.test.js index 8308cc12..59c24856 100644 --- a/tests/e2e/data_types.test.js +++ b/tests/e2e/data_types.test.js @@ -1,12 +1,18 @@ const { expect } = require('chai'); +const sinon = require('sinon'); const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); -const globalConfig = require('../../dist/globalConfig').default; -const openSession = async () => { +async function openSession(customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -17,7 +23,7 @@ const openSession = async () => { initialCatalog: config.database[0], initialSchema: config.database[1], }); -}; +} const execute = async (session, statement) => { const operation = await session.executeStatement(statement); @@ -39,18 +45,10 @@ function removeTrailingMetadata(columns) { } describe('Data types', () => { - beforeEach(() => { - globalConfig.arrowEnabled = false; - }); - - afterEach(() => { - globalConfig.arrowEnabled = true; - }); - it('primitive data types should presented correctly', async () => { const table = `dbsql_nodejs_sdk_e2e_primitive_types_${config.tableSuffix}`; - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); try { await execute(session, `DROP TABLE IF EXISTS ${table}`); await execute( @@ -201,7 +199,7 @@ describe('Data types', () => { it('interval types should be presented correctly', async () => { const table = `dbsql_nodejs_sdk_e2e_interval_types_${config.tableSuffix}`; - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); try { await execute(session, `DROP TABLE IF EXISTS ${table}`); await execute( @@ -246,7 +244,7 @@ describe('Data types', () => { const table = `dbsql_nodejs_sdk_e2e_complex_types_${config.tableSuffix}`; const helperTable = `dbsql_nodejs_sdk_e2e_complex_types_helper_${config.tableSuffix}`; - const session = await openSession(); + const session = await openSession({ arrowEnabled: false }); try { await execute(session, `DROP TABLE IF EXISTS ${helperTable}`); await execute(session, `DROP TABLE IF EXISTS ${table}`); diff --git a/tests/e2e/timeouts.test.js b/tests/e2e/timeouts.test.js index fdf495c3..bea96c20 100644 --- a/tests/e2e/timeouts.test.js +++ b/tests/e2e/timeouts.test.js @@ -1,11 +1,17 @@ const { expect, AssertionError } = require('chai'); +const sinon = require('sinon'); const config = require('./utils/config'); const { DBSQLClient } = require('../..'); -const globalConfig = require('../../dist/globalConfig').default; -const openSession = async (socketTimeout) => { +async function openSession(socketTimeout, customConfig) { const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns({ + ...clientConfig, + ...customConfig, + }); + const connection = await client.connect({ host: config.host, path: config.path, @@ -17,37 +23,26 @@ const openSession = async (socketTimeout) => { initialCatalog: config.database[0], initialSchema: config.database[1], }); -}; +} describe('Data fetching', () => { - const query = ` - SELECT * - FROM range(0, 100000) AS t1 - LEFT JOIN (SELECT 1) AS t2 - ORDER BY RANDOM() ASC - `; - const socketTimeout = 1; // minimum value to make sure any request will time out it('should use default socket timeout', async () => { - const savedTimeout = globalConfig.socketTimeout; - globalConfig.socketTimeout = socketTimeout; try { - await openSession(); + await openSession(undefined, { socketTimeout }); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { throw error; } expect(error.message).to.be.eq('Request timed out'); - } finally { - globalConfig.socketTimeout = savedTimeout; } }); it('should use socket timeout from options', async () => { try { - await await openSession(socketTimeout); + await openSession(socketTimeout); expect.fail('It should throw an error'); } catch (error) { if (error instanceof AssertionError) { diff --git a/tests/unit/DBSQLClient.test.js b/tests/unit/DBSQLClient.test.js index 55231707..b1a1f3f2 100644 --- a/tests/unit/DBSQLClient.test.js +++ b/tests/unit/DBSQLClient.test.js @@ -184,7 +184,7 @@ describe('DBSQLClient.getClient', () => { const thriftClient = {}; client.authProvider = new AuthProviderMock(); - client.connectionProvider = new HttpConnection({ ...options }); + client.connectionProvider = new HttpConnection({ ...options }, client); client.thrift = { createClient: sinon.stub().returns(thriftClient), }; @@ -199,7 +199,7 @@ describe('DBSQLClient.getClient', () => { const thriftClient = {}; - client.connectionProvider = new HttpConnection({ ...options }); + client.connectionProvider = new HttpConnection({ ...options }, client); client.thrift = { createClient: sinon.stub().returns(thriftClient), }; diff --git a/tests/unit/DBSQLSession.test.js b/tests/unit/DBSQLSession.test.js index 9ff5172b..b172b29f 100644 --- a/tests/unit/DBSQLSession.test.js +++ b/tests/unit/DBSQLSession.test.js @@ -6,6 +6,7 @@ const InfoValue = require('../../dist/dto/InfoValue').default; const Status = require('../../dist/dto/Status').default; const DBSQLOperation = require('../../dist/DBSQLOperation').default; const HiveDriver = require('../../dist/hive/HiveDriver').default; +const DBSQLClient = require('../../dist/DBSQLClient').default; // Create logger that won't emit // @@ -37,9 +38,12 @@ function createDriverMock(customMethodHandler) { function createSession(customMethodHandler) { const driver = createDriverMock(customMethodHandler); + const clientConfig = DBSQLClient.getDefaultConfig(); + return new DBSQLSession({ handle: { sessionId: 'id' }, context: { + getConfig: () => clientConfig, getLogger: () => logger, getDriver: async () => driver, }, diff --git a/tests/unit/connection/connections/HttpConnection.test.js b/tests/unit/connection/connections/HttpConnection.test.js index a9a21136..261a3af8 100644 --- a/tests/unit/connection/connections/HttpConnection.test.js +++ b/tests/unit/connection/connections/HttpConnection.test.js @@ -2,14 +2,24 @@ const http = require('http'); const { expect } = require('chai'); const HttpConnection = require('../../../../dist/connection/connections/HttpConnection').default; const ThriftHttpConnection = require('../../../../dist/connection/connections/ThriftHttpConnection').default; +const DBSQLClient = require('../../../../dist/DBSQLClient').default; describe('HttpConnection.connect', () => { it('should create Thrift connection', async () => { - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - path: '/hive', - }); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + path: '/hive', + }, + context, + ); const thriftConnection = await connection.getThriftConnection(); @@ -22,15 +32,24 @@ describe('HttpConnection.connect', () => { }); it('should set SSL certificates and disable rejectUnauthorized', async () => { - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - path: '/hive', - https: true, - ca: 'ca', - cert: 'cert', - key: 'key', - }); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + path: '/hive', + https: true, + ca: 'ca', + cert: 'cert', + key: 'key', + }, + context, + ); const thriftConnection = await connection.getThriftConnection(); @@ -41,12 +60,21 @@ describe('HttpConnection.connect', () => { }); it('should initialize http agents', async () => { - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - https: false, - path: '/hive', - }); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + https: false, + path: '/hive', + }, + context, + ); const thriftConnection = await connection.getThriftConnection(); @@ -54,17 +82,26 @@ describe('HttpConnection.connect', () => { }); it('should update headers (case 1: Thrift connection not initialized)', async () => { + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const initialHeaders = { a: 'test header A', b: 'test header B', }; - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - path: '/hive', - headers: initialHeaders, - }); + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + path: '/hive', + headers: initialHeaders, + }, + context, + ); const extraHeaders = { b: 'new header B', @@ -82,17 +119,26 @@ describe('HttpConnection.connect', () => { }); it('should update headers (case 2: Thrift connection initialized)', async () => { + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const initialHeaders = { a: 'test header A', b: 'test header B', }; - const connection = new HttpConnection({ - host: 'localhost', - port: 10001, - path: '/hive', - headers: initialHeaders, - }); + const connection = new HttpConnection( + { + host: 'localhost', + port: 10001, + path: '/hive', + headers: initialHeaders, + }, + context, + ); const thriftConnection = await connection.getThriftConnection(); diff --git a/tests/unit/hive/commands/BaseCommand.test.js b/tests/unit/hive/commands/BaseCommand.test.js index 1a20c303..d6b286cf 100644 --- a/tests/unit/hive/commands/BaseCommand.test.js +++ b/tests/unit/hive/commands/BaseCommand.test.js @@ -2,9 +2,7 @@ const { expect, AssertionError } = require('chai'); const { Thrift } = require('thrift'); const HiveDriverError = require('../../../../dist/errors/HiveDriverError').default; const BaseCommand = require('../../../../dist/hive/Commands/BaseCommand').default; -const globalConfig = require('../../../../dist/globalConfig').default; - -const savedGlobalConfig = { ...globalConfig }; +const DBSQLClient = require('../../../../dist/DBSQLClient').default; class ThriftClientMock { constructor(methodHandler) { @@ -26,18 +24,24 @@ ThriftClientMock.defaultResponse = { }; class CustomCommand extends BaseCommand { + constructor(...args) { + super(...args); + } + execute(request) { return this.executeCommand(request, this.client.CustomMethod); } } describe('BaseCommand', () => { - afterEach(() => { - Object.assign(globalConfig, savedGlobalConfig); - }); - it('should fail if trying to invoke non-existing command', async () => { - const command = new CustomCommand({}); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const command = new CustomCommand({}, context); try { await command.execute(); @@ -54,11 +58,20 @@ describe('BaseCommand', () => { it('should handle exceptions thrown by command', async () => { const errorMessage = 'Unexpected error'; - const command = new CustomCommand({ - CustomMethod() { - throw new Error(errorMessage); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + + const command = new CustomCommand( + { + CustomMethod() { + throw new Error(errorMessage); + }, }, - }); + context, + ); try { await command.execute(); @@ -75,10 +88,16 @@ describe('BaseCommand', () => { [429, 503].forEach((statusCode) => { describe(`HTTP ${statusCode} error`, () => { it('should fail on max retry attempts exceeded', async () => { - globalConfig.retriesTimeout = 200; // ms - globalConfig.retryDelayMin = 5; // ms - globalConfig.retryDelayMax = 20; // ms - globalConfig.retryMaxAttempts = 3; + const clientConfig = DBSQLClient.getDefaultConfig(); + + clientConfig.retriesTimeout = 200; // ms + clientConfig.retryDelayMin = 5; // ms + clientConfig.retryDelayMax = 20; // ms + clientConfig.retryMaxAttempts = 3; + + const context = { + getConfig: () => clientConfig, + }; let methodCallCount = 0; const command = new CustomCommand( @@ -88,6 +107,7 @@ describe('BaseCommand', () => { error.statusCode = statusCode; throw error; }), + context, ); try { @@ -100,15 +120,21 @@ describe('BaseCommand', () => { expect(error).to.be.instanceof(HiveDriverError); expect(error.message).to.contain(`${statusCode} when connecting to resource`); expect(error.message).to.contain('Max retry count exceeded'); - expect(methodCallCount).to.equal(globalConfig.retryMaxAttempts); + expect(methodCallCount).to.equal(clientConfig.retryMaxAttempts); } }); it('should fail on retry timeout exceeded', async () => { - globalConfig.retriesTimeout = 200; // ms - globalConfig.retryDelayMin = 5; // ms - globalConfig.retryDelayMax = 20; // ms - globalConfig.retryMaxAttempts = 50; + const clientConfig = DBSQLClient.getDefaultConfig(); + + clientConfig.retriesTimeout = 200; // ms + clientConfig.retryDelayMin = 5; // ms + clientConfig.retryDelayMax = 20; // ms + clientConfig.retryMaxAttempts = 50; + + const context = { + getConfig: () => clientConfig, + }; let methodCallCount = 0; const command = new CustomCommand( @@ -118,6 +144,7 @@ describe('BaseCommand', () => { error.statusCode = statusCode; throw error; }), + context, ); try { @@ -138,10 +165,16 @@ describe('BaseCommand', () => { }); it('should succeed after few attempts', async () => { - globalConfig.retriesTimeout = 200; // ms - globalConfig.retryDelayMin = 5; // ms - globalConfig.retryDelayMax = 20; // ms - globalConfig.retryMaxAttempts = 5; + const clientConfig = DBSQLClient.getDefaultConfig(); + + clientConfig.retriesTimeout = 200; // ms + clientConfig.retryDelayMin = 5; // ms + clientConfig.retryDelayMax = 20; // ms + clientConfig.retryMaxAttempts = 5; + + const context = { + getConfig: () => clientConfig, + }; let methodCallCount = 0; const command = new CustomCommand( @@ -154,6 +187,7 @@ describe('BaseCommand', () => { } return ThriftClientMock.defaultResponse; }), + context, ); const response = await command.execute(); @@ -166,12 +200,19 @@ describe('BaseCommand', () => { it(`should re-throw unrecognized HTTP errors`, async () => { const errorMessage = 'Unrecognized HTTP error'; + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const command = new CustomCommand( new ThriftClientMock(() => { const error = new Thrift.TApplicationException(undefined, errorMessage); error.statusCode = 500; throw error; }), + context, ); try { @@ -189,10 +230,17 @@ describe('BaseCommand', () => { it(`should re-throw unrecognized Thrift errors`, async () => { const errorMessage = 'Unrecognized HTTP error'; + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const command = new CustomCommand( new ThriftClientMock(() => { throw new Thrift.TApplicationException(undefined, errorMessage); }), + context, ); try { @@ -210,10 +258,17 @@ describe('BaseCommand', () => { it(`should re-throw unrecognized errors`, async () => { const errorMessage = 'Unrecognized error'; + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const command = new CustomCommand( new ThriftClientMock(() => { throw new Error(errorMessage); }), + context, ); try { diff --git a/tests/unit/result/CloudFetchResultHandler.test.js b/tests/unit/result/CloudFetchResultHandler.test.js index e0a48151..29367c55 100644 --- a/tests/unit/result/CloudFetchResultHandler.test.js +++ b/tests/unit/result/CloudFetchResultHandler.test.js @@ -2,8 +2,8 @@ const { expect, AssertionError } = require('chai'); const sinon = require('sinon'); const Int64 = require('node-int64'); const CloudFetchResultHandler = require('../../../dist/result/CloudFetchResultHandler').default; -const globalConfig = require('../../../dist/globalConfig').default; const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); +const DBSQLClient = require('../../../dist/DBSQLClient').default; const sampleThriftSchema = { columns: [ @@ -96,19 +96,14 @@ const sampleExpiredRowSet = { }; describe('CloudFetchResultHandler', () => { - let savedConcurrentDownloads; - - beforeEach(() => { - savedConcurrentDownloads = globalConfig.cloudFetchConcurrentDownloads; - }); - - afterEach(() => { - globalConfig.cloudFetchConcurrentDownloads = savedConcurrentDownloads; - }); - it('should report pending data if there are any', async () => { - const context = {}; const rowSetProvider = new RowSetProviderMock(); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; + const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); case1: { @@ -131,10 +126,13 @@ describe('CloudFetchResultHandler', () => { }); it('should extract links from row sets', async () => { - globalConfig.cloudFetchConcurrentDownloads = 0; // this will prevent it from downloading batches + const clientConfig = DBSQLClient.getDefaultConfig(); + clientConfig.cloudFetchConcurrentDownloads = 0; // this will prevent it from downloading batches - const context = {}; const rowSetProvider = new RowSetProviderMock(); + const context = { + getConfig: () => clientConfig, + }; const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); @@ -162,15 +160,18 @@ describe('CloudFetchResultHandler', () => { }); it('should download batches according to settings', async () => { - globalConfig.cloudFetchConcurrentDownloads = 2; + const clientConfig = DBSQLClient.getDefaultConfig(); + clientConfig.cloudFetchConcurrentDownloads = 2; - const context = {}; const rowSet = { startRowOffset: 0, resultLinks: [...sampleRowSet1.resultLinks, ...sampleRowSet2.resultLinks], }; const expectedLinksCount = rowSet.resultLinks.length; const rowSetProvider = new RowSetProviderMock([rowSet]); + const context = { + getConfig: () => clientConfig, + }; const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); @@ -190,9 +191,9 @@ describe('CloudFetchResultHandler', () => { expect(items.length).to.be.gt(0); expect(await rowSetProvider.hasMore()).to.be.false; - expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads); - expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads); - expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 1); + expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads); + expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads); + expect(result.downloadedBatches.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); } secondFetch: { @@ -201,9 +202,9 @@ describe('CloudFetchResultHandler', () => { expect(items.length).to.be.gt(0); expect(await rowSetProvider.hasMore()).to.be.false; - expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads); // no new fetches - expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads); - expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 2); + expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads); // no new fetches + expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads); + expect(result.downloadedBatches.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 2); } thirdFetch: { @@ -212,19 +213,22 @@ describe('CloudFetchResultHandler', () => { expect(items.length).to.be.gt(0); expect(await rowSetProvider.hasMore()).to.be.false; - expect(result.fetch.callCount).to.be.equal(globalConfig.cloudFetchConcurrentDownloads * 2); + expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads * 2); expect(result.pendingLinks.length).to.be.equal( - expectedLinksCount - globalConfig.cloudFetchConcurrentDownloads * 2, + expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads * 2, ); - expect(result.downloadedBatches.length).to.be.equal(globalConfig.cloudFetchConcurrentDownloads - 1); + expect(result.downloadedBatches.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); } }); it('should handle HTTP errors', async () => { - globalConfig.cloudFetchConcurrentDownloads = 1; + const clientConfig = DBSQLClient.getDefaultConfig(); + clientConfig.cloudFetchConcurrentDownloads = 1; - const context = {}; const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); + const context = { + getConfig: () => clientConfig, + }; const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); @@ -250,8 +254,12 @@ describe('CloudFetchResultHandler', () => { }); it('should handle expired links', async () => { - const context = {}; const rowSetProvider = new RowSetProviderMock([sampleExpiredRowSet]); + const clientConfig = DBSQLClient.getDefaultConfig(); + + const context = { + getConfig: () => clientConfig, + }; const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); From 8e6098a8aad6c1c618ebe7b62a90cfa80ba6f371 Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Mon, 27 Nov 2023 23:59:41 +0200 Subject: [PATCH 03/11] Add some tests for proxy support (#206) Signed-off-by: Levko Kravets --- package-lock.json | 76 ++++++++++++++++++++++++++++++++++++++ package.json | 1 + tests/e2e/proxy.test.js | 75 +++++++++++++++++++++++++++++++++++++ tests/e2e/timeouts.test.js | 2 +- 4 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 tests/e2e/proxy.test.js diff --git a/package-lock.json b/package-lock.json index 571bc7d4..5fa9584e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -37,6 +37,7 @@ "eslint-plugin-jsx-a11y": "^6.6.1", "eslint-plugin-react": "^7.30.1", "eslint-plugin-react-hooks": "^4.6.0", + "http-proxy": "^1.18.1", "mocha": "^10.2.0", "nyc": "^15.1.0", "prettier": "^2.8.4", @@ -2644,6 +2645,12 @@ "node": ">=0.10.0" } }, + "node_modules/eventemitter3": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", + "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", + "dev": true + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", @@ -2810,6 +2817,26 @@ "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" }, + "node_modules/follow-redirects": { + "version": "1.15.3", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.3.tgz", + "integrity": "sha512-1VzOtuEM8pC9SFU1E+8KfTjZyMztRsgEfwQl44z8A25uy13jSzTj6dyK2Df52iV0vgHCfBwLhDWevLn95w5v6Q==", + "dev": true, + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, "node_modules/foreground-child": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-2.0.0.tgz", @@ -3195,6 +3222,20 @@ "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", "dev": true }, + "node_modules/http-proxy": { + "version": "1.18.1", + "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.18.1.tgz", + "integrity": "sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==", + "dev": true, + "dependencies": { + "eventemitter3": "^4.0.0", + "follow-redirects": "^1.0.0", + "requires-port": "^1.0.0" + }, + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/http-proxy-agent": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz", @@ -5063,6 +5104,12 @@ "integrity": "sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==", "dev": true }, + "node_modules/requires-port": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", + "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==", + "dev": true + }, "node_modules/resolve": { "version": "1.22.1", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.1.tgz", @@ -8007,6 +8054,12 @@ "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==" }, + "eventemitter3": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", + "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==", + "dev": true + }, "fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", @@ -8142,6 +8195,12 @@ "resolved": "https://registry.npmjs.org/fn.name/-/fn.name-1.1.0.tgz", "integrity": "sha512-GRnmB5gPyJpAhTQdSZTSp9uaPSvl09KoYcMQtsB9rQoOmzs9dH6ffeccH+Z+cv6P68Hu5bC6JjRh4Ah/mHSNRw==" }, + "follow-redirects": { + "version": "1.15.3", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.3.tgz", + "integrity": "sha512-1VzOtuEM8pC9SFU1E+8KfTjZyMztRsgEfwQl44z8A25uy13jSzTj6dyK2Df52iV0vgHCfBwLhDWevLn95w5v6Q==", + "dev": true + }, "foreground-child": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-2.0.0.tgz", @@ -8409,6 +8468,17 @@ "integrity": "sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==", "dev": true }, + "http-proxy": { + "version": "1.18.1", + "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.18.1.tgz", + "integrity": "sha512-7mz/721AbnJwIVbnaSv1Cz3Am0ZLT/UBwkC92VlxhXv/k/BBQfM2fXElQNC27BVGr0uwUpplYPQM9LnaBMR5NQ==", + "dev": true, + "requires": { + "eventemitter3": "^4.0.0", + "follow-redirects": "^1.0.0", + "requires-port": "^1.0.0" + } + }, "http-proxy-agent": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/http-proxy-agent/-/http-proxy-agent-7.0.0.tgz", @@ -9793,6 +9863,12 @@ "integrity": "sha512-NKN5kMDylKuldxYLSUfrbo5Tuzh4hd+2E8NPPX02mZtn1VuREQToYe/ZdlJy+J3uCpfaiGF05e7B8W0iXbQHmg==", "dev": true }, + "requires-port": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", + "integrity": "sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ==", + "dev": true + }, "resolve": { "version": "1.22.1", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.1.tgz", diff --git a/package.json b/package.json index 34d947bd..3fec127a 100644 --- a/package.json +++ b/package.json @@ -63,6 +63,7 @@ "eslint-plugin-jsx-a11y": "^6.6.1", "eslint-plugin-react": "^7.30.1", "eslint-plugin-react-hooks": "^4.6.0", + "http-proxy": "^1.18.1", "mocha": "^10.2.0", "nyc": "^15.1.0", "prettier": "^2.8.4", diff --git a/tests/e2e/proxy.test.js b/tests/e2e/proxy.test.js new file mode 100644 index 00000000..eab37261 --- /dev/null +++ b/tests/e2e/proxy.test.js @@ -0,0 +1,75 @@ +const { expect } = require('chai'); +const sinon = require('sinon'); +const httpProxy = require('http-proxy'); +const https = require('https'); +const config = require('./utils/config'); +const { DBSQLClient } = require('../..'); + +class HttpProxyMock { + constructor(target, port) { + this.requests = []; + + this.config = { + protocol: 'http', + host: 'localhost', + port, + }; + + this.target = `https://${config.host}`; + + this.proxy = httpProxy.createServer({ + target: this.target, + agent: new https.Agent({ + rejectUnauthorized: false, + }), + }); + + this.proxy.on('proxyRes', (proxyRes) => { + const req = proxyRes.req; + this.requests.push({ + method: req.method?.toUpperCase(), + url: `${req.protocol}//${req.host}${req.path}`, + requestHeaders: { ...req.getHeaders() }, + responseHeaders: proxyRes.headers, + }); + }); + + this.proxy.listen(port); + console.log(`Proxy listening at ${this.config.host}:${this.config.port} -> ${this.target}`); + } + + close() { + this.proxy.close(() => { + console.log(`Proxy stopped at ${this.config.host}:${this.config.port}`); + }); + } +} + +describe('Proxy', () => { + it('should use http proxy', async () => { + const proxy = new HttpProxyMock(`https://${config.host}`, 9090); + try { + const client = new DBSQLClient(); + const clientConfig = client.getConfig(); + sinon.stub(client, 'getConfig').returns(clientConfig); + + const connection = await client.connect({ + host: config.host, + path: config.path, + token: config.token, + proxy: proxy.config, + }); + + const session = await connection.openSession({ + initialCatalog: config.database[0], + initialSchema: config.database[1], + }); + + expect(proxy.requests.length).to.be.gte(1); + expect(proxy.requests[0].method).to.be.eq('POST'); + expect(proxy.requests[0].url).to.be.eq(`https://${config.host}${config.path}`); + } finally { + proxy.close(); + } + }); +}); diff --git a/tests/e2e/timeouts.test.js b/tests/e2e/timeouts.test.js index bea96c20..c535ce6b 100644 --- a/tests/e2e/timeouts.test.js +++ b/tests/e2e/timeouts.test.js @@ -25,7 +25,7 @@ async function openSession(socketTimeout, customConfig) { }); } -describe('Data fetching', () => { +describe('Timeouts', () => { const socketTimeout = 1; // minimum value to make sure any request will time out it('should use default socket timeout', async () => { From 9b03de36206bb50aa4b8f821af098c595870c821 Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Tue, 28 Nov 2023 13:53:53 +0200 Subject: [PATCH 04/11] Make sure that DBSQLOperation.fetchChunk returns chunks of requested size (#200) * Refactoring: Introduce concept of results provider; convert FetchResultsHelper into provider of TRowSet Signed-off-by: Levko Kravets * Convert Json/Arrow/CloudFetch result handlers to implement result provider interface Signed-off-by: Levko Kravets * Refine the code and update tests Signed-off-by: Levko Kravets * Make sure that DBSQLOperation.fetchChunk returns chunks of requested size Signed-off-by: Levko Kravets * Add option to disable result buffering & slicing Signed-off-by: Levko Kravets * Update existing tests Signed-off-by: Levko Kravets * Add tests for ResultSlicer Signed-off-by: Levko Kravets * Refine code Signed-off-by: Levko Kravets * Add more tests Signed-off-by: Levko Kravets --------- Signed-off-by: Levko Kravets --- lib/DBSQLOperation/index.ts | 36 +++++++--- lib/contracts/IOperation.ts | 3 + lib/result/ResultSlicer.ts | 74 +++++++++++++++++++++ tests/e2e/arrow.test.js | 13 ++-- tests/e2e/batched_fetch.test.js | 31 ++++++++- tests/e2e/cloudfetch.test.js | 22 +++--- tests/unit/DBSQLOperation.test.js | 46 +++++++------ tests/unit/result/ResultSlicer.test.js | 92 ++++++++++++++++++++++++++ 8 files changed, 272 insertions(+), 45 deletions(-) create mode 100644 lib/result/ResultSlicer.ts create mode 100644 tests/unit/result/ResultSlicer.test.js diff --git a/lib/DBSQLOperation/index.ts b/lib/DBSQLOperation/index.ts index 08e32864..eaddac18 100644 --- a/lib/DBSQLOperation/index.ts +++ b/lib/DBSQLOperation/index.ts @@ -23,6 +23,7 @@ import RowSetProvider from '../result/RowSetProvider'; import JsonResultHandler from '../result/JsonResultHandler'; import ArrowResultHandler from '../result/ArrowResultHandler'; import CloudFetchResultHandler from '../result/CloudFetchResultHandler'; +import ResultSlicer from '../result/ResultSlicer'; import { definedOrError } from '../utils'; import HiveDriverError from '../errors/HiveDriverError'; import IClientContext from '../contracts/IClientContext'; @@ -68,7 +69,7 @@ export default class DBSQLOperation implements IOperation { private hasResultSet: boolean = false; - private resultHandler?: IResultsProvider>; + private resultHandler?: ResultSlicer; constructor({ handle, directResults, context }: DBSQLOperationConstructorOptions) { this.operationHandle = handle; @@ -107,9 +108,17 @@ export default class DBSQLOperation implements IOperation { */ public async fetchAll(options?: FetchOptions): Promise> { const data: Array> = []; + + const fetchChunkOptions = { + ...options, + // Tell slicer to return raw chunks. We're going to process all of them anyway, + // so no need to additionally buffer and slice chunks returned by server + disableBuffering: true, + }; + do { // eslint-disable-next-line no-await-in-loop - const chunk = await this.fetchChunk(options); + const chunk = await this.fetchChunk(fetchChunkOptions); data.push(chunk); } while (await this.hasMoreRows()); // eslint-disable-line no-await-in-loop this.context.getLogger().log(LogLevel.debug, `Fetched all data from operation with id: ${this.getId()}`); @@ -138,7 +147,10 @@ export default class DBSQLOperation implements IOperation { const resultHandler = await this.getResultHandler(); await this.failIfClosed(); - const result = resultHandler.fetchNext({ limit: options?.maxRows || defaultMaxRows }); + const result = resultHandler.fetchNext({ + limit: options?.maxRows || defaultMaxRows, + disableBuffering: options?.disableBuffering, + }); await this.failIfClosed(); this.context @@ -335,24 +347,28 @@ export default class DBSQLOperation implements IOperation { return this.metadata; } - private async getResultHandler(): Promise>> { + private async getResultHandler(): Promise> { const metadata = await this.fetchMetadata(); const resultFormat = definedOrError(metadata.resultFormat); if (!this.resultHandler) { + let resultSource: IResultsProvider> | undefined; + switch (resultFormat) { case TSparkRowSetType.COLUMN_BASED_SET: - this.resultHandler = new JsonResultHandler(this.context, this._data, metadata.schema); + resultSource = new JsonResultHandler(this.context, this._data, metadata.schema); break; case TSparkRowSetType.ARROW_BASED_SET: - this.resultHandler = new ArrowResultHandler(this.context, this._data, metadata.schema, metadata.arrowSchema); + resultSource = new ArrowResultHandler(this.context, this._data, metadata.schema, metadata.arrowSchema); break; case TSparkRowSetType.URL_BASED_SET: - this.resultHandler = new CloudFetchResultHandler(this.context, this._data, metadata.schema); - break; - default: - this.resultHandler = undefined; + resultSource = new CloudFetchResultHandler(this.context, this._data, metadata.schema); break; + // no default + } + + if (resultSource) { + this.resultHandler = new ResultSlicer(this.context, resultSource); } } diff --git a/lib/contracts/IOperation.ts b/lib/contracts/IOperation.ts index a9ed45fe..123d4da3 100644 --- a/lib/contracts/IOperation.ts +++ b/lib/contracts/IOperation.ts @@ -14,6 +14,9 @@ export interface FinishedOptions extends WaitUntilReadyOptions { export interface FetchOptions extends WaitUntilReadyOptions { maxRows?: number; + // Disables internal buffer used to ensure a consistent chunks size. + // When set to `true`, returned chunks size may vary (and may differ from `maxRows`) + disableBuffering?: boolean; } export interface GetSchemaOptions extends WaitUntilReadyOptions { diff --git a/lib/result/ResultSlicer.ts b/lib/result/ResultSlicer.ts new file mode 100644 index 00000000..0f640a9a --- /dev/null +++ b/lib/result/ResultSlicer.ts @@ -0,0 +1,74 @@ +import IClientContext from '../contracts/IClientContext'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; + +export interface ResultSlicerFetchNextOptions extends ResultsProviderFetchNextOptions { + // Setting this to `true` will disable slicer, and it will return unprocessed chunks + // from underlying results provider + disableBuffering?: boolean; +} + +export default class ResultSlicer implements IResultsProvider> { + private readonly context: IClientContext; + + private readonly source: IResultsProvider>; + + private remainingResults: Array = []; + + constructor(context: IClientContext, source: IResultsProvider>) { + this.context = context; + this.source = source; + } + + public async hasMore(): Promise { + if (this.remainingResults.length > 0) { + return true; + } + return this.source.hasMore(); + } + + public async fetchNext(options: ResultSlicerFetchNextOptions): Promise> { + // If we're asked to not use buffer - first try to return whatever we have in buffer. + // If buffer is empty - just proxy the call to underlying results provider + if (options.disableBuffering) { + if (this.remainingResults.length > 0) { + const result = this.remainingResults; + this.remainingResults = []; + return result; + } + + return this.source.fetchNext(options); + } + + const result: Array> = []; + let resultsCount = 0; + + // First, use remaining items from the previous fetch + if (this.remainingResults.length > 0) { + result.push(this.remainingResults); + resultsCount += this.remainingResults.length; + this.remainingResults = []; + } + + // Fetch items from source results provider until we reach a requested count + while (resultsCount < options.limit) { + // eslint-disable-next-line no-await-in-loop + const chunk = await this.source.fetchNext(options); + if (chunk.length === 0) { + break; + } + + result.push(chunk); + resultsCount += chunk.length; + } + + // If we collected more results than requested, slice the excess items and store them for the next time + if (resultsCount > options.limit) { + const lastChunk = result.pop() ?? []; + const neededCount = options.limit - (resultsCount - lastChunk.length); + result.push(lastChunk.splice(0, neededCount)); + this.remainingResults = lastChunk; + } + + return result.flat(); + } +} diff --git a/tests/e2e/arrow.test.js b/tests/e2e/arrow.test.js index 08fe17d8..f513882e 100644 --- a/tests/e2e/arrow.test.js +++ b/tests/e2e/arrow.test.js @@ -4,6 +4,7 @@ const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; +const ResultSlicer = require('../../dist/result/ResultSlicer').default; const fixtures = require('../fixtures/compatibility'); const { expected: expectedColumn } = require('../fixtures/compatibility/column'); @@ -81,7 +82,8 @@ describe('Arrow support', () => { expect(result).to.deep.equal(expectedColumn); const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.not.instanceof(ArrowResultHandler); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.not.instanceof(ArrowResultHandler); await operation.close(); }, @@ -100,7 +102,8 @@ describe('Arrow support', () => { expect(fixArrowResult(result)).to.deep.equal(expectedArrow); const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResultHandler); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.instanceof(ArrowResultHandler); await operation.close(); }, @@ -120,7 +123,8 @@ describe('Arrow support', () => { expect(fixArrowResult(result)).to.deep.equal(expectedArrowNativeTypes); const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResultHandler); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.instanceof(ArrowResultHandler); await operation.close(); }, @@ -145,7 +149,8 @@ describe('Arrow support', () => { // We use some internals here to check that server returned response with multiple batches const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceof(ArrowResultHandler); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.instanceof(ArrowResultHandler); sinon.spy(operation._data, 'fetchNext'); diff --git a/tests/e2e/batched_fetch.test.js b/tests/e2e/batched_fetch.test.js index 2e3059a9..e22e1a8e 100644 --- a/tests/e2e/batched_fetch.test.js +++ b/tests/e2e/batched_fetch.test.js @@ -38,7 +38,9 @@ describe('Data fetching', () => { try { // set `maxRows` to null to disable direct results so all the data are fetched through `driver.fetchResults` const operation = await session.executeStatement(query, { maxRows: null }); - let chunkedOp = await operation.fetchChunk({ maxRows: 10 }).catch((error) => logger(error)); + let chunkedOp = await operation + .fetchChunk({ maxRows: 10, disableBuffering: true }) + .catch((error) => logger(error)); expect(chunkedOp.length).to.be.equal(10); // we explicitly requested only one chunk expect(session.context.driver.fetchResults.callCount).to.equal(1); @@ -47,6 +49,33 @@ describe('Data fetching', () => { } }); + it('fetch chunks should respect maxRows', async () => { + const session = await openSession({ arrowEnabled: false }); + + const chunkSize = 300; + const lastChunkSize = 100; // 1000 % chunkSize + + try { + const operation = await session.executeStatement(query, { maxRows: 500 }); + + let hasMoreRows = true; + let chunkCount = 0; + + while (hasMoreRows) { + let chunkedOp = await operation.fetchChunk({ maxRows: 300 }); + chunkCount += 1; + hasMoreRows = await operation.hasMoreRows(); + + const isLastChunk = !hasMoreRows; + expect(chunkedOp.length).to.be.equal(isLastChunk ? lastChunkSize : chunkSize); + } + + expect(chunkCount).to.be.equal(4); // 1000 = 3*300 + 1*100 + } finally { + await session.close(); + } + }); + it('fetch all should fetch all records', async () => { const session = await openSession({ arrowEnabled: false }); sinon.spy(session.context.driver, 'fetchResults'); diff --git a/tests/e2e/cloudfetch.test.js b/tests/e2e/cloudfetch.test.js index e2b564db..5573b434 100644 --- a/tests/e2e/cloudfetch.test.js +++ b/tests/e2e/cloudfetch.test.js @@ -4,6 +4,7 @@ const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; +const ResultSlicer = require('../../dist/result/ResultSlicer').default; async function openSession(customConfig) { const client = new DBSQLClient(); @@ -51,31 +52,34 @@ describe('CloudFetch', () => { // Check if we're actually getting data via CloudFetch const resultHandler = await operation.getResultHandler(); - expect(resultHandler).to.be.instanceOf(CloudFetchResultHandler); + expect(resultHandler).to.be.instanceof(ResultSlicer); + expect(resultHandler.source).to.be.instanceOf(CloudFetchResultHandler); + + const cfResultHandler = resultHandler.source; // Fetch first chunk and check if result handler behaves properly. // With the count of rows we queried, there should be at least one row set, // containing 8 result links. After fetching the first chunk, // result handler should download 5 of them and schedule the rest - expect(await resultHandler.hasMore()).to.be.false; - expect(resultHandler.pendingLinks.length).to.be.equal(0); - expect(resultHandler.downloadedBatches.length).to.be.equal(0); + expect(await cfResultHandler.hasMore()).to.be.false; + expect(cfResultHandler.pendingLinks.length).to.be.equal(0); + expect(cfResultHandler.downloadedBatches.length).to.be.equal(0); sinon.spy(operation._data, 'fetchNext'); - const chunk = await operation.fetchChunk({ maxRows: 100000 }); + const chunk = await operation.fetchChunk({ maxRows: 100000, disableBuffering: true }); // Count links returned from server const resultSet = await operation._data.fetchNext.firstCall.returnValue; const resultLinksCount = resultSet?.resultLinks?.length ?? 0; - expect(await resultHandler.hasMore()).to.be.true; + expect(await cfResultHandler.hasMore()).to.be.true; // expected batches minus first 5 already fetched - expect(resultHandler.pendingLinks.length).to.be.equal(resultLinksCount - cloudFetchConcurrentDownloads); - expect(resultHandler.downloadedBatches.length).to.be.equal(cloudFetchConcurrentDownloads - 1); + expect(cfResultHandler.pendingLinks.length).to.be.equal(resultLinksCount - cloudFetchConcurrentDownloads); + expect(cfResultHandler.downloadedBatches.length).to.be.equal(cloudFetchConcurrentDownloads - 1); let fetchedRowCount = chunk.length; while (await operation.hasMoreRows()) { - const chunk = await operation.fetchChunk({ maxRows: 100000 }); + const chunk = await operation.fetchChunk({ maxRows: 100000, disableBuffering: true }); fetchedRowCount += chunk.length; } diff --git a/tests/unit/DBSQLOperation.test.js b/tests/unit/DBSQLOperation.test.js index ef9c89c9..be08f527 100644 --- a/tests/unit/DBSQLOperation.test.js +++ b/tests/unit/DBSQLOperation.test.js @@ -9,6 +9,7 @@ const HiveDriverError = require('../../dist/errors/HiveDriverError').default; const JsonResultHandler = require('../../dist/result/JsonResultHandler').default; const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; +const ResultSlicer = require('../../dist/result/ResultSlicer').default; class OperationHandleMock { constructor(hasResultSet = true) { @@ -407,7 +408,7 @@ describe('DBSQLOperation', () => { expect(operation.cancelled).to.be.true; await expectFailure(() => operation.fetchAll()); - await expectFailure(() => operation.fetchChunk()); + await expectFailure(() => operation.fetchChunk({ disableBuffering: true })); await expectFailure(() => operation.status()); await expectFailure(() => operation.finished()); await expectFailure(() => operation.getSchema()); @@ -533,7 +534,7 @@ describe('DBSQLOperation', () => { expect(operation.closed).to.be.true; await expectFailure(() => operation.fetchAll()); - await expectFailure(() => operation.fetchChunk()); + await expectFailure(() => operation.fetchChunk({ disableBuffering: true })); await expectFailure(() => operation.status()); await expectFailure(() => operation.finished()); await expectFailure(() => operation.getSchema()); @@ -885,7 +886,8 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(JsonResultHandler); + expect(resultHandler).to.be.instanceOf(ResultSlicer); + expect(resultHandler.source).to.be.instanceOf(JsonResultHandler); } arrowHandler: { @@ -895,7 +897,8 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(ArrowResultHandler); + expect(resultHandler).to.be.instanceOf(ResultSlicer); + expect(resultHandler.source).to.be.instanceOf(ArrowResultHandler); } cloudFetchHandler: { @@ -905,7 +908,8 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; - expect(resultHandler).to.be.instanceOf(CloudFetchResultHandler); + expect(resultHandler).to.be.instanceOf(ResultSlicer); + expect(resultHandler.source).to.be.instanceOf(CloudFetchResultHandler); } }); }); @@ -921,7 +925,7 @@ describe('DBSQLOperation', () => { sinon.spy(context.driver, 'fetchResults'); const operation = new DBSQLOperation({ handle, context }); - const results = await operation.fetchChunk(); + const results = await operation.fetchChunk({ disableBuffering: true }); expect(results).to.deep.equal([]); expect(context.driver.getResultSetMetadata.called).to.be.false; @@ -948,7 +952,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); - const results = await operation.fetchChunk(); + const results = await operation.fetchChunk({ disableBuffering: true }); expect(context.driver.getOperationStatus.called).to.be.true; expect(results).to.deep.equal([]); @@ -974,7 +978,7 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.results.columns = []; const operation = new DBSQLOperation({ handle, context }); - await operation.fetchChunk({ progress: true }); + await operation.fetchChunk({ progress: true, disableBuffering: true }); expect(context.driver.getOperationStatus.called).to.be.true; const request = context.driver.getOperationStatus.getCall(0).args[0]; @@ -1005,7 +1009,7 @@ describe('DBSQLOperation', () => { const callback = sinon.stub(); - await operation.fetchChunk({ callback }); + await operation.fetchChunk({ callback, disableBuffering: true }); expect(context.driver.getOperationStatus.called).to.be.true; expect(callback.callCount).to.be.equal(attemptsUntilFinished); @@ -1023,7 +1027,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); - const results = await operation.fetchChunk(); + const results = await operation.fetchChunk({ disableBuffering: true }); expect(results).to.deep.equal([{ test: 1 }, { test: 2 }, { test: 3 }]); expect(context.driver.getResultSetMetadata.called).to.be.true; @@ -1060,7 +1064,7 @@ describe('DBSQLOperation', () => { }, }); - const results = await operation.fetchChunk(); + const results = await operation.fetchChunk({ disableBuffering: true }); expect(results).to.deep.equal([{ test: 5 }, { test: 6 }]); expect(context.driver.getResultSetMetadata.called).to.be.true; @@ -1098,13 +1102,13 @@ describe('DBSQLOperation', () => { }, }); - const results1 = await operation.fetchChunk(); + const results1 = await operation.fetchChunk({ disableBuffering: true }); expect(results1).to.deep.equal([{ test: 5 }, { test: 6 }]); expect(context.driver.getResultSetMetadata.callCount).to.be.eq(1); expect(context.driver.fetchResults.callCount).to.be.eq(0); - const results2 = await operation.fetchChunk(); + const results2 = await operation.fetchChunk({ disableBuffering: true }); expect(results2).to.deep.equal([{ test: 1 }, { test: 2 }, { test: 3 }]); expect(context.driver.getResultSetMetadata.callCount).to.be.eq(1); @@ -1125,7 +1129,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); try { - await operation.fetchChunk(); + await operation.fetchChunk({ disableBuffering: true }); expect.fail('It should throw a HiveDriverError'); } catch (e) { if (e instanceof AssertionError) { @@ -1180,7 +1184,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; }); @@ -1196,7 +1200,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; await operation.close(); expect(await operation.hasMoreRows()).to.be.false; @@ -1214,7 +1218,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; await operation.cancel(); expect(await operation.hasMoreRows()).to.be.false; @@ -1232,7 +1236,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; }); @@ -1248,7 +1252,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; }); @@ -1264,7 +1268,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; }); @@ -1281,7 +1285,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk(); + await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.false; }); }); diff --git a/tests/unit/result/ResultSlicer.test.js b/tests/unit/result/ResultSlicer.test.js new file mode 100644 index 00000000..7458b462 --- /dev/null +++ b/tests/unit/result/ResultSlicer.test.js @@ -0,0 +1,92 @@ +const { expect } = require('chai'); +const sinon = require('sinon'); +const ResultSlicer = require('../../../dist/result/ResultSlicer').default; + +class ResultsProviderMock { + constructor(chunks) { + this.chunks = chunks; + } + + async hasMore() { + return this.chunks.length > 0; + } + + async fetchNext() { + return this.chunks.shift() ?? []; + } +} + +describe('ResultSlicer', () => { + it('should return chunks of requested size', async () => { + const provider = new ResultsProviderMock([ + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + ]); + + const slicer = new ResultSlicer({}, provider); + + const chunk1 = await slicer.fetchNext({ limit: 4 }); + expect(chunk1).to.deep.eq([10, 11, 12, 13]); + expect(await slicer.hasMore()).to.be.true; + + const chunk2 = await slicer.fetchNext({ limit: 10 }); + expect(chunk2).to.deep.eq([14, 15, 20, 21, 22, 23, 24, 25, 30, 31]); + expect(await slicer.hasMore()).to.be.true; + + const chunk3 = await slicer.fetchNext({ limit: 10 }); + expect(chunk3).to.deep.eq([32, 33, 34, 35]); + expect(await slicer.hasMore()).to.be.false; + }); + + it('should return raw chunks', async () => { + const provider = new ResultsProviderMock([ + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + ]); + sinon.spy(provider, 'fetchNext'); + + const slicer = new ResultSlicer({}, provider); + + const chunk1 = await slicer.fetchNext({ limit: 4, disableBuffering: true }); + expect(chunk1).to.deep.eq([10, 11, 12, 13, 14, 15]); + expect(await slicer.hasMore()).to.be.true; + expect(provider.fetchNext.callCount).to.be.equal(1); + + const chunk2 = await slicer.fetchNext({ limit: 10, disableBuffering: true }); + expect(chunk2).to.deep.eq([20, 21, 22, 23, 24, 25]); + expect(await slicer.hasMore()).to.be.true; + expect(provider.fetchNext.callCount).to.be.equal(2); + }); + + it('should switch between returning sliced and raw chunks', async () => { + const provider = new ResultsProviderMock([ + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + ]); + + const slicer = new ResultSlicer({}, provider); + + const chunk1 = await slicer.fetchNext({ limit: 4 }); + expect(chunk1).to.deep.eq([10, 11, 12, 13]); + expect(await slicer.hasMore()).to.be.true; + + const chunk2 = await slicer.fetchNext({ limit: 10, disableBuffering: true }); + expect(chunk2).to.deep.eq([14, 15]); + expect(await slicer.hasMore()).to.be.true; + + const chunk3 = await slicer.fetchNext({ limit: 10, disableBuffering: true }); + expect(chunk3).to.deep.eq([20, 21, 22, 23, 24, 25]); + expect(await slicer.hasMore()).to.be.true; + + const chunk4 = await slicer.fetchNext({ limit: 4 }); + expect(chunk4).to.deep.eq([30, 31, 32, 33]); + expect(await slicer.hasMore()).to.be.true; + + const chunk5 = await slicer.fetchNext({ limit: 4 }); + expect(chunk5).to.deep.eq([34, 35]); + expect(await slicer.hasMore()).to.be.false; + }); +}); From 69d88b881ce9570bd693e1a2d79833c4635e92ac Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Thu, 30 Nov 2023 22:52:54 +0200 Subject: [PATCH 05/11] Performance fixes (#207) * Fix event loop blocking Signed-off-by: Levko Kravets * Increase connection pool when using CloudFetch Signed-off-by: Levko Kravets --------- Signed-off-by: Levko Kravets --- lib/DBSQLOperation/index.ts | 18 ++++++++++++++++++ lib/connection/connections/HttpConnection.ts | 5 ++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/DBSQLOperation/index.ts b/lib/DBSQLOperation/index.ts index eaddac18..9d35fcbc 100644 --- a/lib/DBSQLOperation/index.ts +++ b/lib/DBSQLOperation/index.ts @@ -147,6 +147,24 @@ export default class DBSQLOperation implements IOperation { const resultHandler = await this.getResultHandler(); await this.failIfClosed(); + // All the library code is Promise-based, however, since Promises are microtasks, + // enqueueing a lot of promises may block macrotasks execution for a while. + // Usually, there are no much microtasks scheduled, however, when fetching query + // results (especially CloudFetch ones) it's quite easy to block event loop for + // long enough to break a lot of things. For example, with CloudFetch, after first + // set of files are downloaded and being processed immediately one by one, event + // loop easily gets blocked for enough time to break connection pool. `http.Agent` + // stops receiving socket events, and marks all sockets invalid on the next attempt + // to use them. See these similar issues that helped to debug this particular case - + // https://github.com/nodejs/node/issues/47130 and https://github.com/node-fetch/node-fetch/issues/1735 + // This simple fix allows to clean up a microtasks queue and allow Node to process + // macrotasks as well, allowing the normal operation of other code. Also, this + // fix is added to `fetchChunk` method because, unlike other methods, `fetchChunk` is + // a potential source of issues described above + await new Promise((resolve) => { + setTimeout(resolve, 0); + }); + const result = resultHandler.fetchNext({ limit: options?.maxRows || defaultMaxRows, disableBuffering: options?.disableBuffering, diff --git a/lib/connection/connections/HttpConnection.ts b/lib/connection/connections/HttpConnection.ts index 2543dbbf..9631322a 100644 --- a/lib/connection/connections/HttpConnection.ts +++ b/lib/connection/connections/HttpConnection.ts @@ -48,9 +48,12 @@ export default class HttpConnection implements IConnectionProvider { private getAgentDefaultOptions(): http.AgentOptions { const clientConfig = this.context.getConfig(); + + const cloudFetchExtraSocketsCount = clientConfig.useCloudFetch ? clientConfig.cloudFetchConcurrentDownloads : 0; + return { keepAlive: true, - maxSockets: 5, + maxSockets: 5 + cloudFetchExtraSocketsCount, keepAliveMsecs: 10000, timeout: this.options.socketTimeout ?? clientConfig.socketTimeout, }; From 5c5b87f3f364ad2d5b673bfda5249307ded44ad1 Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Mon, 4 Dec 2023 22:14:02 +0200 Subject: [PATCH 06/11] [PECO-953] Optimize CloudFetchResultHandler memory consumption (#204) * Refactoring: Introduce concept of results provider; convert FetchResultsHelper into provider of TRowSet Signed-off-by: Levko Kravets * Convert Json/Arrow/CloudFetch result handlers to implement result provider interface Signed-off-by: Levko Kravets * Refine the code and update tests Signed-off-by: Levko Kravets * Make sure that DBSQLOperation.fetchChunk returns chunks of requested size Signed-off-by: Levko Kravets * Add option to disable result buffering & slicing Signed-off-by: Levko Kravets * Update existing tests Signed-off-by: Levko Kravets * Add tests for ResultSlicer Signed-off-by: Levko Kravets * Refine code Signed-off-by: Levko Kravets * Add more tests Signed-off-by: Levko Kravets * Optimize CloudFetchResultHandler memory consumption Signed-off-by: Levko Kravets * Add and update tests Signed-off-by: Levko Kravets --------- Signed-off-by: Levko Kravets --- .gitignore | 1 + .prettierignore | 1 + lib/DBSQLOperation/index.ts | 13 +- lib/result/ArrowResultConverter.ts | 182 ++++++++++++++++++ lib/result/ArrowResultHandler.ts | 138 ++----------- lib/result/CloudFetchResultHandler.ts | 24 ++- tests/e2e/arrow.test.js | 12 +- tests/e2e/cloudfetch.test.js | 7 +- tests/unit/DBSQLOperation.test.js | 7 +- .../unit/result/ArrowResultConverter.test.js | 113 +++++++++++ tests/unit/result/ArrowResultHandler.test.js | 120 +++--------- .../result/CloudFetchResultHandler.test.js | 59 ++---- tests/unit/result/JsonResultHandler.test.js | 18 +- tests/unit/result/ResultSlicer.test.js | 54 +++--- tests/unit/result/compatibility.test.js | 21 +- .../result/fixtures/ResultsProviderMock.js | 16 ++ .../result/fixtures/RowSetProviderMock.js | 15 -- 17 files changed, 460 insertions(+), 341 deletions(-) create mode 100644 lib/result/ArrowResultConverter.ts create mode 100644 tests/unit/result/ArrowResultConverter.test.js create mode 100644 tests/unit/result/fixtures/ResultsProviderMock.js delete mode 100644 tests/unit/result/fixtures/RowSetProviderMock.js diff --git a/.gitignore b/.gitignore index 819b228b..99381ce5 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ node_modules .nyc_output coverage_e2e coverage_unit +.clinic *.code-workspace dist *.DS_Store diff --git a/.prettierignore b/.prettierignore index f90cbaaa..92e9533a 100644 --- a/.prettierignore +++ b/.prettierignore @@ -5,6 +5,7 @@ node_modules .nyc_output coverage_e2e coverage_unit +.clinic dist thrift diff --git a/lib/DBSQLOperation/index.ts b/lib/DBSQLOperation/index.ts index 9d35fcbc..dc6129f6 100644 --- a/lib/DBSQLOperation/index.ts +++ b/lib/DBSQLOperation/index.ts @@ -23,6 +23,7 @@ import RowSetProvider from '../result/RowSetProvider'; import JsonResultHandler from '../result/JsonResultHandler'; import ArrowResultHandler from '../result/ArrowResultHandler'; import CloudFetchResultHandler from '../result/CloudFetchResultHandler'; +import ArrowResultConverter from '../result/ArrowResultConverter'; import ResultSlicer from '../result/ResultSlicer'; import { definedOrError } from '../utils'; import HiveDriverError from '../errors/HiveDriverError'; @@ -377,10 +378,18 @@ export default class DBSQLOperation implements IOperation { resultSource = new JsonResultHandler(this.context, this._data, metadata.schema); break; case TSparkRowSetType.ARROW_BASED_SET: - resultSource = new ArrowResultHandler(this.context, this._data, metadata.schema, metadata.arrowSchema); + resultSource = new ArrowResultConverter( + this.context, + new ArrowResultHandler(this.context, this._data, metadata.arrowSchema), + metadata.schema, + ); break; case TSparkRowSetType.URL_BASED_SET: - resultSource = new CloudFetchResultHandler(this.context, this._data, metadata.schema); + resultSource = new ArrowResultConverter( + this.context, + new CloudFetchResultHandler(this.context, this._data), + metadata.schema, + ); break; // no default } diff --git a/lib/result/ArrowResultConverter.ts b/lib/result/ArrowResultConverter.ts new file mode 100644 index 00000000..1235b2b9 --- /dev/null +++ b/lib/result/ArrowResultConverter.ts @@ -0,0 +1,182 @@ +import { Buffer } from 'buffer'; +import { + Table, + Schema, + Field, + TypeMap, + DataType, + Type, + StructRow, + MapRow, + Vector, + RecordBatch, + RecordBatchReader, + util as arrowUtils, +} from 'apache-arrow'; +import { TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types'; +import IClientContext from '../contracts/IClientContext'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; +import { getSchemaColumns, convertThriftValue } from './utils'; + +const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils; + +type ArrowSchema = Schema; +type ArrowSchemaField = Field>; + +export default class ArrowResultConverter implements IResultsProvider> { + protected readonly context: IClientContext; + + private readonly source: IResultsProvider>; + + private readonly schema: Array; + + private reader?: IterableIterator>; + + private pendingRecordBatch?: RecordBatch; + + constructor(context: IClientContext, source: IResultsProvider>, schema?: TTableSchema) { + this.context = context; + this.source = source; + this.schema = getSchemaColumns(schema); + } + + public async hasMore() { + if (this.schema.length === 0) { + return false; + } + if (this.pendingRecordBatch) { + return true; + } + return this.source.hasMore(); + } + + public async fetchNext(options: ResultsProviderFetchNextOptions) { + if (this.schema.length === 0) { + return []; + } + + // eslint-disable-next-line no-constant-condition + while (true) { + // It's not possible to know if iterator has more items until trying + // to get the next item. But we need to know if iterator is empty right + // after getting the next item. Therefore, after creating the iterator, + // we get one item more and store it in `pendingRecordBatch`. Next time, + // we use that stored item, and prefetch the next one. Prefetched item + // is therefore the next item we are going to return, so it can be used + // to know if we actually can return anything next time + const recordBatch = this.pendingRecordBatch; + this.pendingRecordBatch = this.prefetch(); + + if (recordBatch) { + const table = new Table(recordBatch); + return this.getRows(table.schema, table.toArray()); + } + + // eslint-disable-next-line no-await-in-loop + const batches = await this.source.fetchNext(options); + if (batches.length === 0) { + this.reader = undefined; + break; + } + + const reader = RecordBatchReader.from(batches); + this.reader = reader[Symbol.iterator](); + this.pendingRecordBatch = this.prefetch(); + } + + return []; + } + + private prefetch(): RecordBatch | undefined { + const item = this.reader?.next() ?? { done: true, value: undefined }; + + if (item.done || item.value === undefined) { + this.reader = undefined; + return undefined; + } + + return item.value; + } + + private getRows(schema: ArrowSchema, rows: Array): Array { + return rows.map((row) => { + // First, convert native Arrow values to corresponding plain JS objects + const record = this.convertArrowTypes(row, undefined, schema.fields); + // Second, cast all the values to original Thrift types + return this.convertThriftTypes(record); + }); + } + + private convertArrowTypes(value: any, valueType: DataType | undefined, fields: Array = []): any { + if (value === null) { + return value; + } + + const fieldsMap: Record = {}; + for (const field of fields) { + fieldsMap[field.name] = field; + } + + // Convert structures to plain JS object and process all its fields recursively + if (value instanceof StructRow) { + const result = value.toJSON(); + for (const key of Object.keys(result)) { + const field: ArrowSchemaField | undefined = fieldsMap[key]; + result[key] = this.convertArrowTypes(result[key], field?.type, field?.type.children || []); + } + return result; + } + if (value instanceof MapRow) { + const result = value.toJSON(); + // Map type consists of its key and value types. We need only value type here, key will be cast to string anyway + const field = fieldsMap.entries?.type.children.find((item) => item.name === 'value'); + for (const key of Object.keys(result)) { + result[key] = this.convertArrowTypes(result[key], field?.type, field?.type.children || []); + } + return result; + } + + // Convert lists to JS array and process items recursively + if (value instanceof Vector) { + const result = value.toJSON(); + // Array type contains the only child which defines a type of each array's element + const field = fieldsMap.element; + return result.map((item) => this.convertArrowTypes(item, field?.type, field?.type.children || [])); + } + + if (DataType.isTimestamp(valueType)) { + return new Date(value); + } + + // Convert big number values to BigInt + // Decimals are also represented as big numbers in Arrow, so additionally process them (convert to float) + if (value instanceof Object && value[isArrowBigNumSymbol]) { + const result = bigNumToBigInt(value); + if (DataType.isDecimal(valueType)) { + return Number(result) / 10 ** valueType.scale; + } + return result; + } + + // Convert binary data to Buffer + if (value instanceof Uint8Array) { + return Buffer.from(value); + } + + // Return other values as is + return typeof value === 'bigint' ? Number(value) : value; + } + + private convertThriftTypes(record: Record): any { + const result: Record = {}; + + this.schema.forEach((column) => { + const typeDescriptor = column.typeDesc.types[0]?.primitiveEntry; + const field = column.columnName; + const value = record[field]; + result[field] = value === null ? null : convertThriftValue(typeDescriptor, value); + }); + + return result; + } +} diff --git a/lib/result/ArrowResultHandler.ts b/lib/result/ArrowResultHandler.ts index a1076293..6978a7d6 100644 --- a/lib/result/ArrowResultHandler.ts +++ b/lib/result/ArrowResultHandler.ts @@ -1,158 +1,46 @@ import { Buffer } from 'buffer'; -import { - tableFromIPC, - Schema, - Field, - TypeMap, - DataType, - Type, - StructRow, - MapRow, - Vector, - util as arrowUtils, -} from 'apache-arrow'; -import { TRowSet, TTableSchema, TColumnDesc } from '../../thrift/TCLIService_types'; +import { TRowSet } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; -import { getSchemaColumns, convertThriftValue } from './utils'; -const { isArrowBigNumSymbol, bigNumToBigInt } = arrowUtils; - -type ArrowSchema = Schema; -type ArrowSchemaField = Field>; - -export default class ArrowResultHandler implements IResultsProvider> { +export default class ArrowResultHandler implements IResultsProvider> { protected readonly context: IClientContext; private readonly source: IResultsProvider; - private readonly schema: Array; - private readonly arrowSchema?: Buffer; - constructor( - context: IClientContext, - source: IResultsProvider, - schema?: TTableSchema, - arrowSchema?: Buffer, - ) { + constructor(context: IClientContext, source: IResultsProvider, arrowSchema?: Buffer) { this.context = context; this.source = source; - this.schema = getSchemaColumns(schema); this.arrowSchema = arrowSchema; } public async hasMore() { + if (!this.arrowSchema) { + return false; + } return this.source.hasMore(); } public async fetchNext(options: ResultsProviderFetchNextOptions) { - if (this.schema.length === 0 || !this.arrowSchema) { - return []; - } - - const data = await this.source.fetchNext(options); - - const batches = await this.getBatches(data); - if (batches.length === 0) { + if (!this.arrowSchema) { return []; } - const table = tableFromIPC([this.arrowSchema, ...batches]); - return this.getRows(table.schema, table.toArray()); - } - - protected async getBatches(rowSet?: TRowSet): Promise> { - const result: Array = []; + const rowSet = await this.source.fetchNext(options); + const batches: Array = []; rowSet?.arrowBatches?.forEach((arrowBatch) => { if (arrowBatch.batch) { - result.push(arrowBatch.batch); + batches.push(arrowBatch.batch); } }); - return result; - } - - private getRows(schema: ArrowSchema, rows: Array): Array { - return rows.map((row) => { - // First, convert native Arrow values to corresponding plain JS objects - const record = this.convertArrowTypes(row, undefined, schema.fields); - // Second, cast all the values to original Thrift types - return this.convertThriftTypes(record); - }); - } - - private convertArrowTypes(value: any, valueType: DataType | undefined, fields: Array = []): any { - if (value === null) { - return value; - } - - const fieldsMap: Record = {}; - for (const field of fields) { - fieldsMap[field.name] = field; - } - - // Convert structures to plain JS object and process all its fields recursively - if (value instanceof StructRow) { - const result = value.toJSON(); - for (const key of Object.keys(result)) { - const field: ArrowSchemaField | undefined = fieldsMap[key]; - result[key] = this.convertArrowTypes(result[key], field?.type, field?.type.children || []); - } - return result; - } - if (value instanceof MapRow) { - const result = value.toJSON(); - // Map type consists of its key and value types. We need only value type here, key will be cast to string anyway - const field = fieldsMap.entries?.type.children.find((item) => item.name === 'value'); - for (const key of Object.keys(result)) { - result[key] = this.convertArrowTypes(result[key], field?.type, field?.type.children || []); - } - return result; - } - - // Convert lists to JS array and process items recursively - if (value instanceof Vector) { - const result = value.toJSON(); - // Array type contains the only child which defines a type of each array's element - const field = fieldsMap.element; - return result.map((item) => this.convertArrowTypes(item, field?.type, field?.type.children || [])); - } - - if (DataType.isTimestamp(valueType)) { - return new Date(value); - } - - // Convert big number values to BigInt - // Decimals are also represented as big numbers in Arrow, so additionally process them (convert to float) - if (value instanceof Object && value[isArrowBigNumSymbol]) { - const result = bigNumToBigInt(value); - if (DataType.isDecimal(valueType)) { - return Number(result) / 10 ** valueType.scale; - } - return result; - } - - // Convert binary data to Buffer - if (value instanceof Uint8Array) { - return Buffer.from(value); + if (batches.length === 0) { + return []; } - // Return other values as is - return typeof value === 'bigint' ? Number(value) : value; - } - - private convertThriftTypes(record: Record): any { - const result: Record = {}; - - this.schema.forEach((column) => { - const typeDescriptor = column.typeDesc.types[0]?.primitiveEntry; - const field = column.columnName; - const value = record[field]; - result[field] = value === null ? null : convertThriftValue(typeDescriptor, value); - }); - - return result; + return [this.arrowSchema, ...batches]; } } diff --git a/lib/result/CloudFetchResultHandler.ts b/lib/result/CloudFetchResultHandler.ts index db52b95f..a8e94760 100644 --- a/lib/result/CloudFetchResultHandler.ts +++ b/lib/result/CloudFetchResultHandler.ts @@ -1,29 +1,33 @@ import { Buffer } from 'buffer'; import fetch, { RequestInfo, RequestInit } from 'node-fetch'; -import { TRowSet, TSparkArrowResultLink, TTableSchema } from '../../thrift/TCLIService_types'; +import { TRowSet, TSparkArrowResultLink } from '../../thrift/TCLIService_types'; import IClientContext from '../contracts/IClientContext'; -import IResultsProvider from './IResultsProvider'; -import ArrowResultHandler from './ArrowResultHandler'; +import IResultsProvider, { ResultsProviderFetchNextOptions } from './IResultsProvider'; + +export default class CloudFetchResultHandler implements IResultsProvider> { + protected readonly context: IClientContext; + + private readonly source: IResultsProvider; -export default class CloudFetchResultHandler extends ArrowResultHandler { private pendingLinks: Array = []; private downloadedBatches: Array = []; - constructor(context: IClientContext, source: IResultsProvider, schema?: TTableSchema) { - // Arrow schema returned in metadata is not needed for CloudFetch results: - // each batch already contains schema and could be decoded as is - super(context, source, schema, Buffer.alloc(0)); + constructor(context: IClientContext, source: IResultsProvider) { + this.context = context; + this.source = source; } public async hasMore() { if (this.pendingLinks.length > 0 || this.downloadedBatches.length > 0) { return true; } - return super.hasMore(); + return this.source.hasMore(); } - protected async getBatches(data?: TRowSet): Promise> { + public async fetchNext(options: ResultsProviderFetchNextOptions) { + const data = await this.source.fetchNext(options); + data?.resultLinks?.forEach((link) => { this.pendingLinks.push(link); }); diff --git a/tests/e2e/arrow.test.js b/tests/e2e/arrow.test.js index f513882e..d23c9552 100644 --- a/tests/e2e/arrow.test.js +++ b/tests/e2e/arrow.test.js @@ -4,6 +4,7 @@ const config = require('./utils/config'); const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; +const ArrowResultConverter = require('../../dist/result/ArrowResultConverter').default; const ResultSlicer = require('../../dist/result/ResultSlicer').default; const fixtures = require('../fixtures/compatibility'); @@ -83,7 +84,7 @@ describe('Arrow support', () => { const resultHandler = await operation.getResultHandler(); expect(resultHandler).to.be.instanceof(ResultSlicer); - expect(resultHandler.source).to.be.not.instanceof(ArrowResultHandler); + expect(resultHandler.source).to.be.not.instanceof(ArrowResultConverter); await operation.close(); }, @@ -103,7 +104,8 @@ describe('Arrow support', () => { const resultHandler = await operation.getResultHandler(); expect(resultHandler).to.be.instanceof(ResultSlicer); - expect(resultHandler.source).to.be.instanceof(ArrowResultHandler); + expect(resultHandler.source).to.be.instanceof(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceof(ArrowResultHandler); await operation.close(); }, @@ -124,7 +126,8 @@ describe('Arrow support', () => { const resultHandler = await operation.getResultHandler(); expect(resultHandler).to.be.instanceof(ResultSlicer); - expect(resultHandler.source).to.be.instanceof(ArrowResultHandler); + expect(resultHandler.source).to.be.instanceof(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceof(ArrowResultHandler); await operation.close(); }, @@ -150,7 +153,8 @@ describe('Arrow support', () => { // We use some internals here to check that server returned response with multiple batches const resultHandler = await operation.getResultHandler(); expect(resultHandler).to.be.instanceof(ResultSlicer); - expect(resultHandler.source).to.be.instanceof(ArrowResultHandler); + expect(resultHandler.source).to.be.instanceof(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceof(ArrowResultHandler); sinon.spy(operation._data, 'fetchNext'); diff --git a/tests/e2e/cloudfetch.test.js b/tests/e2e/cloudfetch.test.js index 5573b434..4f6c11c7 100644 --- a/tests/e2e/cloudfetch.test.js +++ b/tests/e2e/cloudfetch.test.js @@ -1,9 +1,9 @@ const { expect } = require('chai'); const sinon = require('sinon'); const config = require('./utils/config'); -const logger = require('./utils/logger')(config.logger); const { DBSQLClient } = require('../..'); const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; +const ArrowResultConverter = require('../../dist/result/ArrowResultConverter').default; const ResultSlicer = require('../../dist/result/ResultSlicer').default; async function openSession(customConfig) { @@ -53,9 +53,10 @@ describe('CloudFetch', () => { // Check if we're actually getting data via CloudFetch const resultHandler = await operation.getResultHandler(); expect(resultHandler).to.be.instanceof(ResultSlicer); - expect(resultHandler.source).to.be.instanceOf(CloudFetchResultHandler); + expect(resultHandler.source).to.be.instanceof(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceOf(CloudFetchResultHandler); - const cfResultHandler = resultHandler.source; + const cfResultHandler = resultHandler.source.source; // Fetch first chunk and check if result handler behaves properly. // With the count of rows we queried, there should be at least one row set, diff --git a/tests/unit/DBSQLOperation.test.js b/tests/unit/DBSQLOperation.test.js index be08f527..83fb35fd 100644 --- a/tests/unit/DBSQLOperation.test.js +++ b/tests/unit/DBSQLOperation.test.js @@ -7,6 +7,7 @@ const StatusError = require('../../dist/errors/StatusError').default; const OperationStateError = require('../../dist/errors/OperationStateError').default; const HiveDriverError = require('../../dist/errors/HiveDriverError').default; const JsonResultHandler = require('../../dist/result/JsonResultHandler').default; +const ArrowResultConverter = require('../../dist/result/ArrowResultConverter').default; const ArrowResultHandler = require('../../dist/result/ArrowResultHandler').default; const CloudFetchResultHandler = require('../../dist/result/CloudFetchResultHandler').default; const ResultSlicer = require('../../dist/result/ResultSlicer').default; @@ -898,7 +899,8 @@ describe('DBSQLOperation', () => { const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; expect(resultHandler).to.be.instanceOf(ResultSlicer); - expect(resultHandler.source).to.be.instanceOf(ArrowResultHandler); + expect(resultHandler.source).to.be.instanceOf(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceOf(ArrowResultHandler); } cloudFetchHandler: { @@ -909,7 +911,8 @@ describe('DBSQLOperation', () => { const resultHandler = await operation.getResultHandler(); expect(context.driver.getResultSetMetadata.called).to.be.true; expect(resultHandler).to.be.instanceOf(ResultSlicer); - expect(resultHandler.source).to.be.instanceOf(CloudFetchResultHandler); + expect(resultHandler.source).to.be.instanceOf(ArrowResultConverter); + expect(resultHandler.source.source).to.be.instanceOf(CloudFetchResultHandler); } }); }); diff --git a/tests/unit/result/ArrowResultConverter.test.js b/tests/unit/result/ArrowResultConverter.test.js new file mode 100644 index 00000000..3ff87a15 --- /dev/null +++ b/tests/unit/result/ArrowResultConverter.test.js @@ -0,0 +1,113 @@ +const { expect } = require('chai'); +const fs = require('fs'); +const path = require('path'); +const ArrowResultConverter = require('../../../dist/result/ArrowResultConverter').default; +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); + +const sampleThriftSchema = { + columns: [ + { + columnName: '1', + typeDesc: { + types: [ + { + primitiveEntry: { + type: 3, + typeQualifiers: null, + }, + }, + ], + }, + position: 1, + }, + ], +}; + +const sampleArrowSchema = Buffer.from([ + 255, 255, 255, 255, 208, 0, 0, 0, 16, 0, 0, 0, 0, 0, 10, 0, 14, 0, 6, 0, 13, 0, 8, 0, 10, 0, 0, 0, 0, 0, 4, 0, 16, 0, + 0, 0, 0, 1, 10, 0, 12, 0, 0, 0, 8, 0, 4, 0, 10, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 24, 0, 0, 0, + 0, 0, 18, 0, 24, 0, 20, 0, 0, 0, 19, 0, 12, 0, 0, 0, 8, 0, 4, 0, 18, 0, 0, 0, 20, 0, 0, 0, 80, 0, 0, 0, 88, 0, 0, 0, + 0, 0, 0, 2, 92, 0, 0, 0, 1, 0, 0, 0, 12, 0, 0, 0, 8, 0, 12, 0, 8, 0, 4, 0, 8, 0, 0, 0, 8, 0, 0, 0, 12, 0, 0, 0, 3, 0, + 0, 0, 73, 78, 84, 0, 22, 0, 0, 0, 83, 112, 97, 114, 107, 58, 68, 97, 116, 97, 84, 121, 112, 101, 58, 83, 113, 108, 78, + 97, 109, 101, 0, 0, 0, 0, 0, 0, 8, 0, 12, 0, 8, 0, 7, 0, 8, 0, 0, 0, 0, 0, 0, 1, 32, 0, 0, 0, 1, 0, 0, 0, 49, 0, 0, 0, + 0, 0, 0, 0, +]); + +const sampleArrowBatch = [ + sampleArrowSchema, + Buffer.from([ + 255, 255, 255, 255, 136, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 12, 0, 22, 0, 14, 0, 21, 0, 16, 0, 4, 0, 12, 0, 0, 0, 16, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 16, 0, 0, 0, 0, 3, 10, 0, 24, 0, 12, 0, 8, 0, 4, 0, 10, 0, 0, 0, 20, 0, 0, 0, 56, + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, + 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + ]), +]; + +const thriftSchemaAllNulls = JSON.parse( + fs.readFileSync(path.join(__dirname, 'fixtures/thriftSchemaAllNulls.json')).toString('utf-8'), +); + +const arrowBatchAllNulls = [ + fs.readFileSync(path.join(__dirname, 'fixtures/arrowSchemaAllNulls.arrow')), + fs.readFileSync(path.join(__dirname, 'fixtures/dataAllNulls.arrow')), +]; + +describe('ArrowResultHandler', () => { + it('should convert data', async () => { + const context = {}; + const rowSetProvider = new ResultsProviderMock([sampleArrowBatch]); + const result = new ArrowResultConverter(context, rowSetProvider, sampleThriftSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([{ 1: 1 }]); + }); + + it('should return empty array if no data to process', async () => { + const context = {}; + const rowSetProvider = new ResultsProviderMock([], []); + const result = new ArrowResultConverter(context, rowSetProvider, sampleThriftSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; + }); + + it('should return empty array if no schema available', async () => { + const context = {}; + const rowSetProvider = new ResultsProviderMock([sampleArrowBatch]); + const result = new ArrowResultConverter(context, rowSetProvider); + expect(await result.hasMore()).to.be.false; + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + }); + + it('should detect nulls', async () => { + const context = {}; + const rowSetProvider = new ResultsProviderMock([arrowBatchAllNulls]); + const result = new ArrowResultConverter(context, rowSetProvider, thriftSchemaAllNulls); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ + { + boolean_field: null, + + tinyint_field: null, + smallint_field: null, + int_field: null, + bigint_field: null, + + float_field: null, + double_field: null, + decimal_field: null, + + string_field: null, + char_field: null, + varchar_field: null, + + timestamp_field: null, + date_field: null, + day_interval_field: null, + month_interval_field: null, + + binary_field: null, + + struct_field: null, + array_field: null, + }, + ]); + }); +}); diff --git a/tests/unit/result/ArrowResultHandler.test.js b/tests/unit/result/ArrowResultHandler.test.js index 03cdb5e1..9c24e680 100644 --- a/tests/unit/result/ArrowResultHandler.test.js +++ b/tests/unit/result/ArrowResultHandler.test.js @@ -2,26 +2,7 @@ const { expect } = require('chai'); const fs = require('fs'); const path = require('path'); const ArrowResultHandler = require('../../../dist/result/ArrowResultHandler').default; -const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); - -const sampleThriftSchema = { - columns: [ - { - columnName: '1', - typeDesc: { - types: [ - { - primitiveEntry: { - type: 3, - typeQualifiers: null, - }, - }, - ], - }, - position: 1, - }, - ], -}; +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); const sampleArrowSchema = Buffer.from([ 255, 255, 255, 255, 208, 0, 0, 0, 16, 0, 0, 0, 0, 0, 10, 0, 14, 0, 6, 0, 13, 0, 8, 0, 10, 0, 0, 0, 0, 0, 4, 0, 16, 0, @@ -33,11 +14,6 @@ const sampleArrowSchema = Buffer.from([ 0, 0, 0, 0, ]); -const sampleEmptyArrowBatch = { - batch: undefined, - rowCount: 0, -}; - const sampleArrowBatch = { batch: Buffer.from([ 255, 255, 255, 255, 136, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 12, 0, 22, 0, 14, 0, 21, 0, 16, 0, 4, 0, 12, 0, 0, 0, 16, @@ -51,36 +27,25 @@ const sampleArrowBatch = { const sampleRowSet1 = { startRowOffset: 0, - arrowBatches: undefined, + arrowBatches: [sampleArrowBatch], }; const sampleRowSet2 = { startRowOffset: 0, - arrowBatches: [], + arrowBatches: undefined, }; const sampleRowSet3 = { startRowOffset: 0, - arrowBatches: [sampleEmptyArrowBatch], + arrowBatches: [], }; const sampleRowSet4 = { - startRowOffset: 0, - arrowBatches: [sampleArrowBatch], -}; - -const thriftSchemaAllNulls = JSON.parse( - fs.readFileSync(path.join(__dirname, 'fixtures/thriftSchemaAllNulls.json')).toString('utf-8'), -); - -const arrowSchemaAllNulls = fs.readFileSync(path.join(__dirname, 'fixtures/arrowSchemaAllNulls.arrow')); - -const rowSetAllNulls = { startRowOffset: 0, arrowBatches: [ { - batch: fs.readFileSync(path.join(__dirname, 'fixtures/dataAllNulls.arrow')), - rowCount: 1, + batch: undefined, + rowCount: 0, }, ], }; @@ -88,8 +53,8 @@ const rowSetAllNulls = { describe('ArrowResultHandler', () => { it('should not buffer any data', async () => { const context = {}; - const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); - const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + const rowSetProvider = new ResultsProviderMock([sampleRowSet1]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); expect(await rowSetProvider.hasMore()).to.be.true; expect(await result.hasMore()).to.be.true; @@ -98,76 +63,39 @@ describe('ArrowResultHandler', () => { expect(await result.hasMore()).to.be.false; }); - it('should convert data', async () => { + it('should return empty array if no data to process', async () => { const context = {}; - case1: { - const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); - const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + const rowSetProvider = new ResultsProviderMock(); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; } case2: { - const rowSetProvider = new RowSetProviderMock([sampleRowSet2]); - const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + const rowSetProvider = new ResultsProviderMock([sampleRowSet2]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; } case3: { - const rowSetProvider = new RowSetProviderMock([sampleRowSet3]); - const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); + const rowSetProvider = new ResultsProviderMock([sampleRowSet3]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; } case4: { - const rowSetProvider = new RowSetProviderMock([sampleRowSet4]); - const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); - expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([{ 1: 1 }]); + const rowSetProvider = new ResultsProviderMock([sampleRowSet4]); + const result = new ArrowResultHandler(context, rowSetProvider, sampleArrowSchema); + expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); + expect(await result.hasMore()).to.be.false; } }); - it('should return empty array if no data to process', async () => { - const context = {}; - const rowSetProvider = new RowSetProviderMock(); - const result = new ArrowResultHandler(context, rowSetProvider, sampleThriftSchema, sampleArrowSchema); - expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); - }); - it('should return empty array if no schema available', async () => { const context = {}; - const rowSetProvider = new RowSetProviderMock([sampleRowSet4]); + const rowSetProvider = new ResultsProviderMock([sampleRowSet2]); const result = new ArrowResultHandler(context, rowSetProvider); expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); - }); - - it('should detect nulls', async () => { - const context = {}; - const rowSetProvider = new RowSetProviderMock([rowSetAllNulls]); - const result = new ArrowResultHandler(context, rowSetProvider, thriftSchemaAllNulls, arrowSchemaAllNulls); - expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([ - { - boolean_field: null, - - tinyint_field: null, - smallint_field: null, - int_field: null, - bigint_field: null, - - float_field: null, - double_field: null, - decimal_field: null, - - string_field: null, - char_field: null, - varchar_field: null, - - timestamp_field: null, - date_field: null, - day_interval_field: null, - month_interval_field: null, - - binary_field: null, - - struct_field: null, - array_field: null, - }, - ]); + expect(await result.hasMore()).to.be.false; }); }); diff --git a/tests/unit/result/CloudFetchResultHandler.test.js b/tests/unit/result/CloudFetchResultHandler.test.js index 29367c55..962f3906 100644 --- a/tests/unit/result/CloudFetchResultHandler.test.js +++ b/tests/unit/result/CloudFetchResultHandler.test.js @@ -2,28 +2,9 @@ const { expect, AssertionError } = require('chai'); const sinon = require('sinon'); const Int64 = require('node-int64'); const CloudFetchResultHandler = require('../../../dist/result/CloudFetchResultHandler').default; -const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); const DBSQLClient = require('../../../dist/DBSQLClient').default; -const sampleThriftSchema = { - columns: [ - { - columnName: '1', - typeDesc: { - types: [ - { - primitiveEntry: { - type: 3, - typeQualifiers: null, - }, - }, - ], - }, - position: 1, - }, - ], -}; - const sampleArrowSchema = Buffer.from([ 255, 255, 255, 255, 208, 0, 0, 0, 16, 0, 0, 0, 0, 0, 10, 0, 14, 0, 6, 0, 13, 0, 8, 0, 10, 0, 0, 0, 0, 0, 4, 0, 16, 0, 0, 0, 0, 1, 10, 0, 12, 0, 0, 0, 8, 0, 4, 0, 10, 0, 0, 0, 8, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 24, 0, 0, 0, @@ -97,14 +78,14 @@ const sampleExpiredRowSet = { describe('CloudFetchResultHandler', () => { it('should report pending data if there are any', async () => { - const rowSetProvider = new RowSetProviderMock(); + const rowSetProvider = new ResultsProviderMock(); const clientConfig = DBSQLClient.getDefaultConfig(); const context = { getConfig: () => clientConfig, }; - const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); case1: { result.pendingLinks = []; @@ -129,12 +110,15 @@ describe('CloudFetchResultHandler', () => { const clientConfig = DBSQLClient.getDefaultConfig(); clientConfig.cloudFetchConcurrentDownloads = 0; // this will prevent it from downloading batches - const rowSetProvider = new RowSetProviderMock(); + const rowSets = [sampleRowSet1, sampleEmptyRowSet, sampleRowSet2]; + const expectedLinksCount = rowSets.reduce((prev, item) => prev + (item.resultLinks?.length ?? 0), 0); + + const rowSetProvider = new ResultsProviderMock(rowSets); const context = { getConfig: () => clientConfig, }; - const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); sinon.stub(result, 'fetch').returns( Promise.resolve({ @@ -145,18 +129,13 @@ describe('CloudFetchResultHandler', () => { }), ); - const rowSets = [sampleRowSet1, sampleEmptyRowSet, sampleRowSet2]; - const expectedLinksCount = rowSets.reduce((prev, item) => prev + (item.resultLinks?.length ?? 0), 0); + do { + await result.fetchNext({ limit: 100000 }); + } while (await rowSetProvider.hasMore()); - const batches = []; - for (const rowSet of rowSets) { - const items = await result.getBatches(rowSet); - batches.push(...items); - } - - expect(batches.length).to.be.equal(0); - expect(result.fetch.called).to.be.false; expect(result.pendingLinks.length).to.be.equal(expectedLinksCount); + expect(result.downloadedBatches.length).to.be.equal(0); + expect(result.fetch.called).to.be.false; }); it('should download batches according to settings', async () => { @@ -168,12 +147,12 @@ describe('CloudFetchResultHandler', () => { resultLinks: [...sampleRowSet1.resultLinks, ...sampleRowSet2.resultLinks], }; const expectedLinksCount = rowSet.resultLinks.length; - const rowSetProvider = new RowSetProviderMock([rowSet]); + const rowSetProvider = new ResultsProviderMock([rowSet]); const context = { getConfig: () => clientConfig, }; - const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); sinon.stub(result, 'fetch').returns( Promise.resolve({ @@ -225,12 +204,12 @@ describe('CloudFetchResultHandler', () => { const clientConfig = DBSQLClient.getDefaultConfig(); clientConfig.cloudFetchConcurrentDownloads = 1; - const rowSetProvider = new RowSetProviderMock([sampleRowSet1]); + const rowSetProvider = new ResultsProviderMock([sampleRowSet1]); const context = { getConfig: () => clientConfig, }; - const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); sinon.stub(result, 'fetch').returns( Promise.resolve({ @@ -254,14 +233,14 @@ describe('CloudFetchResultHandler', () => { }); it('should handle expired links', async () => { - const rowSetProvider = new RowSetProviderMock([sampleExpiredRowSet]); + const rowSetProvider = new ResultsProviderMock([sampleExpiredRowSet]); const clientConfig = DBSQLClient.getDefaultConfig(); const context = { getConfig: () => clientConfig, }; - const result = new CloudFetchResultHandler(context, rowSetProvider, sampleThriftSchema); + const result = new CloudFetchResultHandler(context, rowSetProvider); sinon.stub(result, 'fetch').returns( Promise.resolve({ diff --git a/tests/unit/result/JsonResultHandler.test.js b/tests/unit/result/JsonResultHandler.test.js index db6ff01d..d6c3bf09 100644 --- a/tests/unit/result/JsonResultHandler.test.js +++ b/tests/unit/result/JsonResultHandler.test.js @@ -2,7 +2,7 @@ const { expect } = require('chai'); const JsonResultHandler = require('../../../dist/result/JsonResultHandler').default; const { TCLIService_types } = require('../../../').thrift; const Int64 = require('node-int64'); -const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); const getColumnSchema = (columnName, type, position) => { if (type === undefined) { @@ -40,7 +40,7 @@ describe('JsonResultHandler', () => { ]; const context = {}; - const rowSetProvider = new RowSetProviderMock(data); + const rowSetProvider = new ResultsProviderMock(data); const result = new JsonResultHandler(context, rowSetProvider, schema); expect(await rowSetProvider.hasMore()).to.be.true; @@ -133,7 +133,7 @@ describe('JsonResultHandler', () => { ]; const context = {}; - const rowSetProvider = new RowSetProviderMock(data); + const rowSetProvider = new ResultsProviderMock(data); const result = new JsonResultHandler(context, rowSetProvider, schema); @@ -206,7 +206,7 @@ describe('JsonResultHandler', () => { ]; const context = {}; - const rowSetProvider = new RowSetProviderMock(data); + const rowSetProvider = new ResultsProviderMock(data); const result = new JsonResultHandler(context, rowSetProvider, schema); @@ -228,7 +228,7 @@ describe('JsonResultHandler', () => { it('should detect nulls', () => { const context = {}; - const rowSetProvider = new RowSetProviderMock(); + const rowSetProvider = new ResultsProviderMock(); const result = new JsonResultHandler(context, rowSetProvider, null); const buf = Buffer.from([0x55, 0xaa, 0xc3]); @@ -343,7 +343,7 @@ describe('JsonResultHandler', () => { ]; const context = {}; - const rowSetProvider = new RowSetProviderMock(data); + const rowSetProvider = new ResultsProviderMock(data); const result = new JsonResultHandler(context, rowSetProvider, schema); @@ -375,7 +375,7 @@ describe('JsonResultHandler', () => { }; const context = {}; - const rowSetProvider = new RowSetProviderMock(); + const rowSetProvider = new ResultsProviderMock(); const result = new JsonResultHandler(context, rowSetProvider, schema); expect(await result.fetchNext({ limit: 10000 })).to.be.deep.eq([]); @@ -393,7 +393,7 @@ describe('JsonResultHandler', () => { ]; const context = {}; - const rowSetProvider = new RowSetProviderMock(data); + const rowSetProvider = new ResultsProviderMock(data); const result = new JsonResultHandler(context, rowSetProvider); @@ -429,7 +429,7 @@ describe('JsonResultHandler', () => { ]; const context = {}; - const rowSetProvider = new RowSetProviderMock(data); + const rowSetProvider = new ResultsProviderMock(data); const result = new JsonResultHandler(context, rowSetProvider, schema); diff --git a/tests/unit/result/ResultSlicer.test.js b/tests/unit/result/ResultSlicer.test.js index 7458b462..715d250b 100644 --- a/tests/unit/result/ResultSlicer.test.js +++ b/tests/unit/result/ResultSlicer.test.js @@ -1,28 +1,18 @@ const { expect } = require('chai'); const sinon = require('sinon'); const ResultSlicer = require('../../../dist/result/ResultSlicer').default; - -class ResultsProviderMock { - constructor(chunks) { - this.chunks = chunks; - } - - async hasMore() { - return this.chunks.length > 0; - } - - async fetchNext() { - return this.chunks.shift() ?? []; - } -} +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); describe('ResultSlicer', () => { it('should return chunks of requested size', async () => { - const provider = new ResultsProviderMock([ - [10, 11, 12, 13, 14, 15], - [20, 21, 22, 23, 24, 25], - [30, 31, 32, 33, 34, 35], - ]); + const provider = new ResultsProviderMock( + [ + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + ], + [], + ); const slicer = new ResultSlicer({}, provider); @@ -40,11 +30,14 @@ describe('ResultSlicer', () => { }); it('should return raw chunks', async () => { - const provider = new ResultsProviderMock([ - [10, 11, 12, 13, 14, 15], - [20, 21, 22, 23, 24, 25], - [30, 31, 32, 33, 34, 35], - ]); + const provider = new ResultsProviderMock( + [ + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + ], + [], + ); sinon.spy(provider, 'fetchNext'); const slicer = new ResultSlicer({}, provider); @@ -61,11 +54,14 @@ describe('ResultSlicer', () => { }); it('should switch between returning sliced and raw chunks', async () => { - const provider = new ResultsProviderMock([ - [10, 11, 12, 13, 14, 15], - [20, 21, 22, 23, 24, 25], - [30, 31, 32, 33, 34, 35], - ]); + const provider = new ResultsProviderMock( + [ + [10, 11, 12, 13, 14, 15], + [20, 21, 22, 23, 24, 25], + [30, 31, 32, 33, 34, 35], + ], + [], + ); const slicer = new ResultSlicer({}, provider); diff --git a/tests/unit/result/compatibility.test.js b/tests/unit/result/compatibility.test.js index 6d047d57..c01f6674 100644 --- a/tests/unit/result/compatibility.test.js +++ b/tests/unit/result/compatibility.test.js @@ -1,5 +1,6 @@ const { expect } = require('chai'); const ArrowResultHandler = require('../../../dist/result/ArrowResultHandler').default; +const ArrowResultConverter = require('../../../dist/result/ArrowResultConverter').default; const JsonResultHandler = require('../../../dist/result/JsonResultHandler').default; const { fixArrowResult } = require('../../fixtures/compatibility'); @@ -7,12 +8,12 @@ const fixtureColumn = require('../../fixtures/compatibility/column'); const fixtureArrow = require('../../fixtures/compatibility/arrow'); const fixtureArrowNT = require('../../fixtures/compatibility/arrow_native_types'); -const RowSetProviderMock = require('./fixtures/RowSetProviderMock'); +const ResultsProviderMock = require('./fixtures/ResultsProviderMock'); describe('Result handlers compatibility tests', () => { it('colum-based data', async () => { const context = {}; - const rowSetProvider = new RowSetProviderMock(fixtureColumn.rowSets); + const rowSetProvider = new ResultsProviderMock(fixtureColumn.rowSets); const result = new JsonResultHandler(context, rowSetProvider, fixtureColumn.schema); const rows = await result.fetchNext({ limit: 10000 }); expect(rows).to.deep.equal(fixtureColumn.expected); @@ -20,16 +21,24 @@ describe('Result handlers compatibility tests', () => { it('arrow-based data without native types', async () => { const context = {}; - const rowSetProvider = new RowSetProviderMock(fixtureArrow.rowSets); - const result = new ArrowResultHandler(context, rowSetProvider, fixtureArrow.schema, fixtureArrow.arrowSchema); + const rowSetProvider = new ResultsProviderMock(fixtureArrow.rowSets); + const result = new ArrowResultConverter( + context, + new ArrowResultHandler(context, rowSetProvider, fixtureArrow.arrowSchema), + fixtureArrow.schema, + ); const rows = await result.fetchNext({ limit: 10000 }); expect(fixArrowResult(rows)).to.deep.equal(fixtureArrow.expected); }); it('arrow-based data with native types', async () => { const context = {}; - const rowSetProvider = new RowSetProviderMock(fixtureArrowNT.rowSets); - const result = new ArrowResultHandler(context, rowSetProvider, fixtureArrowNT.schema, fixtureArrowNT.arrowSchema); + const rowSetProvider = new ResultsProviderMock(fixtureArrowNT.rowSets); + const result = new ArrowResultConverter( + context, + new ArrowResultHandler(context, rowSetProvider, fixtureArrowNT.arrowSchema), + fixtureArrowNT.schema, + ); const rows = await result.fetchNext({ limit: 10000 }); expect(fixArrowResult(rows)).to.deep.equal(fixtureArrowNT.expected); }); diff --git a/tests/unit/result/fixtures/ResultsProviderMock.js b/tests/unit/result/fixtures/ResultsProviderMock.js new file mode 100644 index 00000000..a1dba3e0 --- /dev/null +++ b/tests/unit/result/fixtures/ResultsProviderMock.js @@ -0,0 +1,16 @@ +class ResultsProviderMock { + constructor(items, emptyItem) { + this.items = Array.isArray(items) ? [...items] : []; + this.emptyItem = emptyItem; + } + + async hasMore() { + return this.items.length > 0; + } + + async fetchNext() { + return this.items.shift() ?? this.emptyItem; + } +} + +module.exports = ResultsProviderMock; diff --git a/tests/unit/result/fixtures/RowSetProviderMock.js b/tests/unit/result/fixtures/RowSetProviderMock.js deleted file mode 100644 index 1e878a01..00000000 --- a/tests/unit/result/fixtures/RowSetProviderMock.js +++ /dev/null @@ -1,15 +0,0 @@ -class RowSetProviderMock { - constructor(rowSets) { - this.rowSets = Array.isArray(rowSets) ? [...rowSets] : []; - } - - async hasMore() { - return this.rowSets.length > 0; - } - - async fetchNext() { - return this.rowSets.shift(); - } -} - -module.exports = RowSetProviderMock; From 4fbe55b7fdf200737dc27c48a7942b83af001162 Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Mon, 18 Dec 2023 21:21:20 +0200 Subject: [PATCH 07/11] Add Raymond to codeowners file (#214) Signed-off-by: Levko Kravets --- CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CODEOWNERS b/CODEOWNERS index 0695e203..34f987d8 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1 +1 @@ -* @arikfr @superdupershant @yunbodeng-db @kravets-levko @susodapop @nithinkdb @andrefurlan-db +* @arikfr @superdupershant @yunbodeng-db @kravets-levko @susodapop @nithinkdb @andrefurlan-db @rcypher-databricks From 47d715175067290d67a36ba195b7a095cd1ae3ce Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Mon, 18 Dec 2023 21:21:51 +0200 Subject: [PATCH 08/11] Remove protocol version check for query parameters (#213) * Remove protocol version check for query parameters Signed-off-by: Levko Kravets * Re-enable query parameters tests Signed-off-by: Levko Kravets --------- Signed-off-by: Levko Kravets --- lib/DBSQLSession.ts | 12 ------------ tests/e2e/query_parameters.test.js | 9 ++++----- 2 files changed, 4 insertions(+), 17 deletions(-) diff --git a/lib/DBSQLSession.ts b/lib/DBSQLSession.ts index cf14dcd9..aa23f7d1 100644 --- a/lib/DBSQLSession.ts +++ b/lib/DBSQLSession.ts @@ -2,7 +2,6 @@ import * as fs from 'fs'; import * as path from 'path'; import { stringify, NIL, parse } from 'uuid'; import fetch, { HeadersInit } from 'node-fetch'; -import { Thrift } from 'thrift'; import { TSessionHandle, TStatus, @@ -10,7 +9,6 @@ import { TSparkDirectResults, TSparkArrowTypes, TSparkParameter, - TProtocolVersion, } from '../thrift/TCLIService_types'; import { Int64 } from './hive/Types'; import IDBSQLSession, { @@ -98,16 +96,6 @@ function getQueryParameters( return []; } - if ( - !sessionHandle.serverProtocolVersion || - sessionHandle.serverProtocolVersion < TProtocolVersion.SPARK_CLI_SERVICE_PROTOCOL_V8 - ) { - throw new Thrift.TProtocolException( - Thrift.TProtocolExceptionType.BAD_VERSION, - 'Parameterized operations are not supported by this server. Support will begin with server version DBR 14.1', - ); - } - const result: Array = []; if (namedParameters !== undefined) { diff --git a/tests/e2e/query_parameters.test.js b/tests/e2e/query_parameters.test.js index 6f7fdf00..6efa5160 100644 --- a/tests/e2e/query_parameters.test.js +++ b/tests/e2e/query_parameters.test.js @@ -19,9 +19,8 @@ const openSession = async () => { }); }; -// TODO: Temporarily disable those tests until we figure out issues with E2E test env describe('Query parameters', () => { - it.skip('should use named parameters', async () => { + it('should use named parameters', async () => { const session = await openSession(); const operation = await session.executeStatement( ` @@ -72,7 +71,7 @@ describe('Query parameters', () => { ]); }); - it.skip('should accept primitives as values for named parameters', async () => { + it('should accept primitives as values for named parameters', async () => { const session = await openSession(); const operation = await session.executeStatement( ` @@ -117,7 +116,7 @@ describe('Query parameters', () => { ]); }); - it.skip('should use ordinal parameters', async () => { + it('should use ordinal parameters', async () => { const session = await openSession(); const operation = await session.executeStatement( ` @@ -168,7 +167,7 @@ describe('Query parameters', () => { ]); }); - it.skip('should accept primitives as values for ordinal parameters', async () => { + it('should accept primitives as values for ordinal parameters', async () => { const session = await openSession(); const operation = await session.executeStatement( ` From bdc712699358c0189563172a5ba82e306ceaef21 Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Mon, 18 Dec 2023 23:44:29 +0200 Subject: [PATCH 09/11] [PECO-1261] Improve CloudFetch downloading queue (#209) [PECO-1261] Wait only for the currently needed link, download the rest in background Signed-off-by: Levko Kravets --- lib/result/CloudFetchResultHandler.ts | 18 +++++----- tests/e2e/cloudfetch.test.js | 4 +-- .../result/CloudFetchResultHandler.test.js | 36 +++++++++++-------- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/lib/result/CloudFetchResultHandler.ts b/lib/result/CloudFetchResultHandler.ts index a8e94760..f1743628 100644 --- a/lib/result/CloudFetchResultHandler.ts +++ b/lib/result/CloudFetchResultHandler.ts @@ -11,7 +11,7 @@ export default class CloudFetchResultHandler implements IResultsProvider = []; - private downloadedBatches: Array = []; + private downloadTasks: Array> = []; constructor(context: IClientContext, source: IResultsProvider) { this.context = context; @@ -19,7 +19,7 @@ export default class CloudFetchResultHandler implements IResultsProvider 0 || this.downloadedBatches.length > 0) { + if (this.pendingLinks.length > 0 || this.downloadTasks.length > 0) { return true; } return this.source.hasMore(); @@ -32,15 +32,17 @@ export default class CloudFetchResultHandler implements IResultsProvider 0) { + const links = this.pendingLinks.splice(0, freeTaskSlotsCount); const tasks = links.map((link) => this.downloadLink(link)); - const batches = await Promise.all(tasks); - this.downloadedBatches.push(...batches); + this.downloadTasks.push(...tasks); } - return this.downloadedBatches.splice(0, 1); + const batch = await this.downloadTasks.shift(); + return batch ? [batch] : []; } private async downloadLink(link: TSparkArrowResultLink): Promise { diff --git a/tests/e2e/cloudfetch.test.js b/tests/e2e/cloudfetch.test.js index 4f6c11c7..296e88e7 100644 --- a/tests/e2e/cloudfetch.test.js +++ b/tests/e2e/cloudfetch.test.js @@ -64,7 +64,7 @@ describe('CloudFetch', () => { // result handler should download 5 of them and schedule the rest expect(await cfResultHandler.hasMore()).to.be.false; expect(cfResultHandler.pendingLinks.length).to.be.equal(0); - expect(cfResultHandler.downloadedBatches.length).to.be.equal(0); + expect(cfResultHandler.downloadTasks.length).to.be.equal(0); sinon.spy(operation._data, 'fetchNext'); @@ -76,7 +76,7 @@ describe('CloudFetch', () => { expect(await cfResultHandler.hasMore()).to.be.true; // expected batches minus first 5 already fetched expect(cfResultHandler.pendingLinks.length).to.be.equal(resultLinksCount - cloudFetchConcurrentDownloads); - expect(cfResultHandler.downloadedBatches.length).to.be.equal(cloudFetchConcurrentDownloads - 1); + expect(cfResultHandler.downloadTasks.length).to.be.equal(cloudFetchConcurrentDownloads - 1); let fetchedRowCount = chunk.length; while (await operation.hasMoreRows()) { diff --git a/tests/unit/result/CloudFetchResultHandler.test.js b/tests/unit/result/CloudFetchResultHandler.test.js index 962f3906..bbe9638f 100644 --- a/tests/unit/result/CloudFetchResultHandler.test.js +++ b/tests/unit/result/CloudFetchResultHandler.test.js @@ -89,19 +89,19 @@ describe('CloudFetchResultHandler', () => { case1: { result.pendingLinks = []; - result.downloadedBatches = []; + result.downloadTasks = []; expect(await result.hasMore()).to.be.false; } case2: { result.pendingLinks = [{}]; // just anything here - result.downloadedBatches = []; + result.downloadTasks = []; expect(await result.hasMore()).to.be.true; } case3: { result.pendingLinks = []; - result.downloadedBatches = [{}]; // just anything here + result.downloadTasks = [{}]; // just anything here expect(await result.hasMore()).to.be.true; } }); @@ -134,19 +134,19 @@ describe('CloudFetchResultHandler', () => { } while (await rowSetProvider.hasMore()); expect(result.pendingLinks.length).to.be.equal(expectedLinksCount); - expect(result.downloadedBatches.length).to.be.equal(0); + expect(result.downloadTasks.length).to.be.equal(0); expect(result.fetch.called).to.be.false; }); it('should download batches according to settings', async () => { const clientConfig = DBSQLClient.getDefaultConfig(); - clientConfig.cloudFetchConcurrentDownloads = 2; + clientConfig.cloudFetchConcurrentDownloads = 3; const rowSet = { startRowOffset: 0, resultLinks: [...sampleRowSet1.resultLinks, ...sampleRowSet2.resultLinks], }; - const expectedLinksCount = rowSet.resultLinks.length; + const expectedLinksCount = rowSet.resultLinks.length; // 5 const rowSetProvider = new ResultsProviderMock([rowSet]); const context = { getConfig: () => clientConfig, @@ -166,24 +166,28 @@ describe('CloudFetchResultHandler', () => { expect(await rowSetProvider.hasMore()).to.be.true; initialFetch: { + // `cloudFetchConcurrentDownloads` out of `expectedLinksCount` links should be scheduled immediately + // first one should be `await`-ed and returned from `fetchNext` const items = await result.fetchNext({ limit: 10000 }); expect(items.length).to.be.gt(0); expect(await rowSetProvider.hasMore()).to.be.false; expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads); expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads); - expect(result.downloadedBatches.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); + expect(result.downloadTasks.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); } secondFetch: { - // It should return previously fetched batch, not performing additional network requests + // It should return previously fetched batch, and schedule one more const items = await result.fetchNext({ limit: 10000 }); expect(items.length).to.be.gt(0); expect(await rowSetProvider.hasMore()).to.be.false; - expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads); // no new fetches - expect(result.pendingLinks.length).to.be.equal(expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads); - expect(result.downloadedBatches.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 2); + expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads + 1); + expect(result.pendingLinks.length).to.be.equal( + expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads - 1, + ); + expect(result.downloadTasks.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); } thirdFetch: { @@ -192,11 +196,11 @@ describe('CloudFetchResultHandler', () => { expect(items.length).to.be.gt(0); expect(await rowSetProvider.hasMore()).to.be.false; - expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads * 2); + expect(result.fetch.callCount).to.be.equal(clientConfig.cloudFetchConcurrentDownloads + 2); expect(result.pendingLinks.length).to.be.equal( - expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads * 2, + expectedLinksCount - clientConfig.cloudFetchConcurrentDownloads - 2, ); - expect(result.downloadedBatches.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); + expect(result.downloadTasks.length).to.be.equal(clientConfig.cloudFetchConcurrentDownloads - 1); } }); @@ -251,6 +255,10 @@ describe('CloudFetchResultHandler', () => { }), ); + // There are two link in the batch - first one is valid and second one is expired + // The first fetch has to be successful, and the second one should fail + await result.fetchNext({ limit: 10000 }); + try { await result.fetchNext({ limit: 10000 }); expect.fail('It should throw an error'); From 5030d0fa2203f63a82cd62eebb18cf43e2689d71 Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Tue, 19 Dec 2023 00:03:00 +0200 Subject: [PATCH 10/11] [PECO-1258] Don't fetch results beyond the end of dataset (#205) * Refine conditions of results availability and adjust tests Signed-off-by: Levko Kravets * [PECO-1258] Don't fetch results beyond the end of dataset Signed-off-by: Levko Kravets * Remove irrelevant changes Signed-off-by: Levko Kravets --------- Signed-off-by: Levko Kravets --- lib/DBSQLOperation/index.ts | 9 ++---- lib/result/RowSetProvider.ts | 41 +++++++++++++++++------ tests/e2e/cloudfetch.test.js | 4 +-- tests/unit/DBSQLOperation.test.js | 54 +++++++++++++++++++------------ 4 files changed, 69 insertions(+), 39 deletions(-) diff --git a/lib/DBSQLOperation/index.ts b/lib/DBSQLOperation/index.ts index dc6129f6..8a0bf707 100644 --- a/lib/DBSQLOperation/index.ts +++ b/lib/DBSQLOperation/index.ts @@ -68,8 +68,6 @@ export default class DBSQLOperation implements IOperation { // to `getOperationStatus()` may fail with irrelevant errors, e.g. HTTP 404 private operationStatus?: TGetOperationStatusResp; - private hasResultSet: boolean = false; - private resultHandler?: ResultSlicer; constructor({ handle, directResults, context }: DBSQLOperationConstructorOptions) { @@ -78,7 +76,6 @@ export default class DBSQLOperation implements IOperation { const useOnlyPrefetchedResults = Boolean(directResults?.closeOperation); - this.hasResultSet = this.operationHandle.hasResultSet; if (directResults?.operationStatus) { this.processOperationStatusResponse(directResults.operationStatus); } @@ -139,7 +136,7 @@ export default class DBSQLOperation implements IOperation { public async fetchChunk(options?: FetchOptions): Promise> { await this.failIfClosed(); - if (!this.hasResultSet) { + if (!this.operationHandle.hasResultSet) { return []; } @@ -271,7 +268,7 @@ export default class DBSQLOperation implements IOperation { public async getSchema(options?: GetSchemaOptions): Promise { await this.failIfClosed(); - if (!this.hasResultSet) { + if (!this.operationHandle.hasResultSet) { return null; } @@ -412,7 +409,7 @@ export default class DBSQLOperation implements IOperation { this.state = response.operationState ?? this.state; if (typeof response.hasResultSet === 'boolean') { - this.hasResultSet = response.hasResultSet; + this.operationHandle.hasResultSet = response.hasResultSet; } const isInProgress = [ diff --git a/lib/result/RowSetProvider.ts b/lib/result/RowSetProvider.ts index b131e905..5661d208 100644 --- a/lib/result/RowSetProvider.ts +++ b/lib/result/RowSetProvider.ts @@ -47,7 +47,16 @@ export default class RowSetProvider implements IResultsProvider 0) { - this.hasMoreRows = true; - } else if (this.returnOnlyPrefetchedResults) { - this.hasMoreRows = false; - } else { - this.hasMoreRows = checkIfOperationHasMoreRows(response); - } - + this.hasMoreRowsFlag = checkIfOperationHasMoreRows(response); return response.results; } @@ -86,6 +87,16 @@ export default class RowSetProvider implements IResultsProvider 0) { + return true; + } + // We end up here if no more prefetched results available (checked above) + if (this.returnOnlyPrefetchedResults) { + return false; + } + return this.hasMoreRows; } } diff --git a/tests/e2e/cloudfetch.test.js b/tests/e2e/cloudfetch.test.js index 296e88e7..4dc41e43 100644 --- a/tests/e2e/cloudfetch.test.js +++ b/tests/e2e/cloudfetch.test.js @@ -42,7 +42,7 @@ describe('CloudFetch', () => { LEFT JOIN (SELECT 1) AS t2 `, { - maxRows: 100000, + maxRows: null, // disable DirectResults useCloudFetch: true, // tell server that we would like to use CloudFetch }, ); @@ -62,7 +62,7 @@ describe('CloudFetch', () => { // With the count of rows we queried, there should be at least one row set, // containing 8 result links. After fetching the first chunk, // result handler should download 5 of them and schedule the rest - expect(await cfResultHandler.hasMore()).to.be.false; + expect(await cfResultHandler.hasMore()).to.be.true; expect(cfResultHandler.pendingLinks.length).to.be.equal(0); expect(cfResultHandler.downloadTasks.length).to.be.equal(0); diff --git a/tests/unit/DBSQLOperation.test.js b/tests/unit/DBSQLOperation.test.js index 83fb35fd..99cc1e66 100644 --- a/tests/unit/DBSQLOperation.test.js +++ b/tests/unit/DBSQLOperation.test.js @@ -129,7 +129,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(operation.state).to.equal(TOperationState.INITIALIZED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should pick up state from directResults', async () => { @@ -149,7 +149,7 @@ describe('DBSQLOperation', () => { }); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should fetch status and update internal state', async () => { @@ -165,14 +165,14 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(operation.state).to.equal(TOperationState.INITIALIZED_STATE); - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; const status = await operation.status(); expect(context.driver.getOperationStatus.called).to.be.true; expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should request progress', async () => { @@ -204,7 +204,7 @@ describe('DBSQLOperation', () => { const operation = new DBSQLOperation({ handle, context }); expect(operation.state).to.equal(TOperationState.INITIALIZED_STATE); - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; // First call - should fetch data and cache context.driver.getOperationStatusResp = { @@ -216,7 +216,7 @@ describe('DBSQLOperation', () => { expect(context.driver.getOperationStatus.callCount).to.equal(1); expect(status1.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; // Second call - should return cached data context.driver.getOperationStatusResp = { @@ -228,7 +228,7 @@ describe('DBSQLOperation', () => { expect(context.driver.getOperationStatus.callCount).to.equal(1); expect(status2.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should fetch status if directResults status is not finished', async () => { @@ -254,14 +254,14 @@ describe('DBSQLOperation', () => { }); expect(operation.state).to.equal(TOperationState.RUNNING_STATE); // from directResults - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; const status = await operation.status(false); expect(context.driver.getOperationStatus.called).to.be.true; expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.true; + expect(operation.operationHandle.hasResultSet).to.be.true; }); it('should not fetch status if directResults status is finished', async () => { @@ -287,14 +287,14 @@ describe('DBSQLOperation', () => { }); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); // from directResults - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; const status = await operation.status(false); expect(context.driver.getOperationStatus.called).to.be.false; expect(status.operationState).to.equal(TOperationState.FINISHED_STATE); expect(operation.state).to.equal(TOperationState.FINISHED_STATE); - expect(operation.hasResultSet).to.be.false; + expect(operation.operationHandle.hasResultSet).to.be.false; }); it('should throw an error in case of a status error', async () => { @@ -1025,6 +1025,7 @@ describe('DBSQLOperation', () => { handle.hasResultSet = true; context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; + context.driver.getOperationStatusResp.hasResultSet = true; sinon.spy(context.driver, 'getResultSetMetadata'); sinon.spy(context.driver, 'fetchResults'); @@ -1175,7 +1176,7 @@ describe('DBSQLOperation', () => { }); describe('hasMoreRows', () => { - it('should return False until first chunk of data fetched', async () => { + it('should return initial value prior to first fetch', async () => { const context = new ClientContextMock(); const handle = new OperationHandleMock(); @@ -1183,12 +1184,15 @@ describe('DBSQLOperation', () => { context.driver.getOperationStatusResp.operationState = TOperationState.FINISHED_STATE; context.driver.getOperationStatusResp.hasResultSet = true; - context.driver.fetchResultsResp.hasMoreRows = true; + context.driver.fetchResultsResp.hasMoreRows = false; + context.driver.fetchResultsResp.results = undefined; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; - await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; + await operation.fetchChunk({ disableBuffering: true }); + expect(await operation.hasMoreRows()).to.be.false; + expect(operation._data.hasMoreRowsFlag).to.be.false; }); it('should return False if operation was closed', async () => { @@ -1202,7 +1206,7 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = true; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; + expect(await operation.hasMoreRows()).to.be.true; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; await operation.close(); @@ -1220,7 +1224,7 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = true; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; + expect(await operation.hasMoreRows()).to.be.true; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; await operation.cancel(); @@ -1238,9 +1242,11 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = true; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; + expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.true; }); it('should return True if hasMoreRows flag is False but there is actual data', async () => { @@ -1254,9 +1260,11 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = false; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; + expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.true; }); it('should return True if hasMoreRows flag is unset but there is actual data', async () => { @@ -1270,9 +1278,11 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.hasMoreRows = undefined; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; + expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.true; }); it('should return False if hasMoreRows flag is False and there is no data', async () => { @@ -1287,9 +1297,11 @@ describe('DBSQLOperation', () => { context.driver.fetchResultsResp.results = undefined; const operation = new DBSQLOperation({ handle, context }); - expect(await operation.hasMoreRows()).to.be.false; + expect(await operation.hasMoreRows()).to.be.true; + expect(operation._data.hasMoreRowsFlag).to.be.undefined; await operation.fetchChunk({ disableBuffering: true }); expect(await operation.hasMoreRows()).to.be.false; + expect(operation._data.hasMoreRowsFlag).to.be.false; }); }); }); From a86f828aebbcf68767e56f2a3465087065cbd11e Mon Sep 17 00:00:00 2001 From: Levko Kravets Date: Wed, 20 Dec 2023 10:45:36 +0200 Subject: [PATCH 11/11] Prepare release 1.7.0 (#215) Signed-off-by: Levko Kravets --- CHANGELOG.md | 10 ++++++++++ package-lock.json | 4 ++-- package.json | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91c5d7b9..c103fe1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,15 @@ # Release History +## 1.7.0 + +- Fixed behavior of `maxRows` option of `IOperation.fetchChunk()`. Now it will return chunks + of requested size (databricks/databricks-sql-nodejs#200) +- Improved CloudFetch memory usage and overall performance (databricks/databricks-sql-nodejs#204, + databricks/databricks-sql-nodejs#207, databricks/databricks-sql-nodejs#209) +- Remove protocol version check when using query parameters (databricks/databricks-sql-nodejs#213) +- Fix `IOperation.hasMoreRows()` behavior to avoid fetching data beyond the end of dataset. + Also, now it will work properly prior to fetching first chunk (databricks/databricks-sql-nodejs#205) + ## 1.6.1 - Make default logger singleton (databricks/databricks-sql-nodejs#199) diff --git a/package-lock.json b/package-lock.json index 5fa9584e..51ddbe11 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@databricks/sql", - "version": "1.6.1", + "version": "1.7.0", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@databricks/sql", - "version": "1.6.1", + "version": "1.7.0", "license": "Apache 2.0", "dependencies": { "apache-arrow": "^13.0.0", diff --git a/package.json b/package.json index 3fec127a..61d627ca 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@databricks/sql", - "version": "1.6.1", + "version": "1.7.0", "description": "Driver for connection to Databricks SQL via Thrift API.", "main": "dist/index.js", "types": "dist/index.d.ts",