From 14768f46f0f9b4e38dd330b7bdeb3e437e00ba2d Mon Sep 17 00:00:00 2001 From: lmangani Date: Tue, 31 Dec 2024 13:18:42 +0000 Subject: [PATCH 01/32] clickhouse-cpp scanner --- .gitmodules | 3 + chsql/CMakeLists.txt | 84 ++++-- chsql/src/clickhouse_scan.cpp | 369 ++++++++++++++++++++++++++ chsql/src/include/clickhouse_scan.hpp | 12 + chsql/vcpkg.json | 11 +- contribs/clickhouse-cpp | 1 + 6 files changed, 454 insertions(+), 26 deletions(-) create mode 100644 chsql/src/clickhouse_scan.cpp create mode 100644 chsql/src/include/clickhouse_scan.hpp create mode 160000 contribs/clickhouse-cpp diff --git a/.gitmodules b/.gitmodules index f37c3c4..83999ad 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,3 +8,6 @@ url = https://github.com/duckdb/extension-ci-tools branch = main update = merge +[submodule "contribs/clickhouse-cpp"] + path = contribs/clickhouse-cpp + url = https://github.com/ClickHouse/clickhouse-cpp.git diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index f2b1f46..e6bcd5e 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -1,34 +1,70 @@ -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.12) + # Set extension name here set(TARGET_NAME chsql) -# DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then -# used in cmake with find_package. Feel free to remove or replace with other dependencies. -# Note that it should also be removed from vcpkg.json to prevent needlessly installing it.. + +project(${TARGET_NAME}) + +# Configure clickhouse-cpp options +set(CH_CPP_BUILD_SHARED OFF CACHE BOOL "Build shared library") +set(CH_CPP_BUILD_STATICALY_LINKED_LIB ON CACHE BOOL "Build static library") +set(CH_CPP_BUILD_ONLY_LIB ON CACHE BOOL "Build only library") +set(CH_CPP_WITH_OPENSSL ON CACHE BOOL "Use OpenSSL") + +# Add clickhouse-cpp +add_subdirectory( + ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-cpp + EXCLUDE_FROM_ALL +) + +# Find OpenSSL package find_package(OpenSSL REQUIRED) + set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) -project(${TARGET_NAME}) include_directories( - ./src/include - ./src - ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include - ../duckdb/third_party/lz4 - ../duckdb/third_party/parquet - ../duckdb/third_party/thrift - ../duckdb/third_party/snappy - ../duckdb/third_party/zstd/include - ../duckdb/third_party/mbedtls - ../duckdb/third_party/mbedtls/include - ../duckdb/third_party/brotli/include) -set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp) + ./src/include + ./src + ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include + ../duckdb/third_party/lz4 + ../duckdb/third_party/parquet + ../duckdb/third_party/thrift + ../duckdb/third_party/snappy + ../duckdb/third_party/zstd/include + ../duckdb/third_party/mbedtls + ../duckdb/third_party/mbedtls/include + ../duckdb/third_party/brotli/include + ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp/contrib/absl +) + +# Update extension sources to include new file +set(EXTENSION_SOURCES + src/chsql_extension.cpp + src/duck_flock.cpp + src/clickhouse_scan.cpp +) + build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) -# Link OpenSSL in both the static library as the loadable extension -target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) -target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) + +# Link libraries using plain signature +target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib) +target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib) + +# Install targets install( - TARGETS ${EXTENSION_NAME} - EXPORT "${DUCKDB_EXPORT_SET}" - LIBRARY DESTINATION "${INSTALL_LIB_DIR}" - ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") + TARGETS ${EXTENSION_NAME} clickhouse-cpp-lib + EXPORT "${DUCKDB_EXPORT_SET}" + LIBRARY DESTINATION "${INSTALL_LIB_DIR}" + ARCHIVE DESTINATION "${INSTALL_LIB_DIR}" +) + +# Make sure the export set includes clickhouse-cpp-lib +export( + TARGETS ${EXTENSION_NAME} clickhouse-cpp-lib + NAMESPACE duckdb:: + FILE DuckDBExports.cmake +) diff --git a/chsql/src/clickhouse_scan.cpp b/chsql/src/clickhouse_scan.cpp new file mode 100644 index 0000000..bd8eabd --- /dev/null +++ b/chsql/src/clickhouse_scan.cpp @@ -0,0 +1,369 @@ +#include "clickhouse_scan.hpp" +#include "duckdb/common/exception.hpp" +#include "duckdb/main/secret/secret_manager.hpp" +#include "duckdb/common/types/chunk_collection.hpp" +#include + +namespace duckdb { + +struct ClickHouseBindData : public TableFunctionData { + string query; + string host; + string port; + string user; + string password; + string database; + bool finished; + vector types; + vector names; + + ClickHouseBindData(string query, string host, string port, string user, string password, string database) + : query(query), host(host), port(port), user(user), password(password), database(database), finished(false) {} +}; + +// Convert ClickHouse type to DuckDB LogicalType +static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef& column) { + switch (column->Type()->GetCode()) { + // Integer types + case clickhouse::Type::Int8: + return LogicalType::TINYINT; + case clickhouse::Type::Int16: + return LogicalType::SMALLINT; + case clickhouse::Type::Int32: + return LogicalType::INTEGER; + case clickhouse::Type::Int64: + return LogicalType::BIGINT; + case clickhouse::Type::Int128: + return LogicalType::HUGEINT; + + // Unsigned integer types + case clickhouse::Type::UInt8: + return LogicalType::UTINYINT; + case clickhouse::Type::UInt16: + return LogicalType::USMALLINT; + case clickhouse::Type::UInt32: + return LogicalType::UINTEGER; + case clickhouse::Type::UInt64: + return LogicalType::UBIGINT; + + // Floating point types + case clickhouse::Type::Float32: + return LogicalType::FLOAT; + case clickhouse::Type::Float64: + return LogicalType::DOUBLE; + + // String types + case clickhouse::Type::String: + case clickhouse::Type::FixedString: + return LogicalType::VARCHAR; + + // Date and Time types + case clickhouse::Type::Date: + return LogicalType::DATE; + case clickhouse::Type::Date32: + return LogicalType::DATE; + case clickhouse::Type::DateTime: + return LogicalType::TIMESTAMP; + case clickhouse::Type::DateTime64: + return LogicalType::TIMESTAMP; + + // Boolean type + case clickhouse::Type::Nothing: + return LogicalType::BOOLEAN; + + // Decimal types + case clickhouse::Type::Decimal: + case clickhouse::Type::Decimal32: + case clickhouse::Type::Decimal64: + case clickhouse::Type::Decimal128: + // Get precision and scale from the type + auto decimal_type = static_cast(column->Type().get()); + return LogicalType::DECIMAL(decimal_type->GetPrecision(), decimal_type->GetScale()); + } +} + +static void ClickHouseScanFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &bind_data = data_p.bind_data->Cast(); + + if (bind_data.finished) { + return; + } + + try { + // Initialize ClickHouse client + clickhouse::Client client(clickhouse::ClientOptions() + .SetHost(bind_data.host) + .SetPort(std::stoi(bind_data.port)) + .SetUser(bind_data.user) + .SetPassword(bind_data.password) + .SetDefaultDatabase(bind_data.database) + .SetPingBeforeQuery(true)); + + // Execute query + client.Select(bind_data.query, [&](const clickhouse::Block& block) { + idx_t row_count = block.GetRowCount(); + output.SetCardinality(row_count); + + for (idx_t col_idx = 0; col_idx < block.GetColumnCount(); col_idx++) { + auto& target = output.data[col_idx]; + auto& source = block[col_idx]; + + // Convert and copy data based on type + switch (bind_data.types[col_idx].id()) { + // String types + case LogicalTypeId::VARCHAR: { + if (source->Type()->GetCode() == clickhouse::Type::FixedString) { + auto& strings = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = StringVector::AddString(target, strings->At(row_idx)); + } + } else { + auto& strings = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = StringVector::AddString(target, strings->At(row_idx)); + } + } + break; + } + + // Integer types + case LogicalTypeId::TINYINT: { + auto& integers = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = integers->At(row_idx); + } + break; + } + case LogicalTypeId::SMALLINT: { + auto& integers = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = integers->At(row_idx); + } + break; + } + case LogicalTypeId::INTEGER: { + auto& integers = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = integers->At(row_idx); + } + break; + } + case LogicalTypeId::BIGINT: { + auto& integers = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = integers->At(row_idx); + } + break; + } + case LogicalTypeId::HUGEINT: { + auto& integers = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + // Assuming ClickHouse returns Int128 as two 64-bit integers + auto value = integers->At(row_idx); + target_vector[row_idx] = hugeint_t(value.high, value.low); + } + break; + } + + // Unsigned integer types + case LogicalTypeId::UTINYINT: { + auto& integers = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = integers->At(row_idx); + } + break; + } + case LogicalTypeId::USMALLINT: { + auto& integers = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = integers->At(row_idx); + } + break; + } + case LogicalTypeId::UINTEGER: { + auto& integers = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = integers->At(row_idx); + } + break; + } + case LogicalTypeId::UBIGINT: { + auto& integers = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = integers->At(row_idx); + } + break; + } + + // Floating point types + case LogicalTypeId::FLOAT: { + auto& floats = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = floats->At(row_idx); + } + break; + } + case LogicalTypeId::DOUBLE: { + auto& doubles = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = doubles->At(row_idx); + } + break; + } + + // Date and Time types + case LogicalTypeId::DATE: { + if (source->Type()->GetCode() == clickhouse::Type::Date32) { + auto& dates = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + // Convert from days since epoch + target_vector[row_idx] = date_t(dates->At(row_idx)); + } + } else { + auto& dates = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = date_t(dates->At(row_idx)); + } + } + break; + } + case LogicalTypeId::TIMESTAMP: { + if (source->Type()->GetCode() == clickhouse::Type::DateTime64) { + auto& timestamps = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + // Convert from microseconds since epoch + target_vector[row_idx] = timestamp_t(timestamps->At(row_idx)); + } + } else { + auto& timestamps = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + // Convert from seconds since epoch + target_vector[row_idx] = timestamp_t(timestamps->At(row_idx) * Interval::MICROS_PER_SEC); + } + } + break; + } + + // Decimal types + case LogicalTypeId::DECIMAL: { + switch (source->Type()->GetCode()) { + case clickhouse::Type::Decimal32: { + auto& decimals = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = hugeint_t(decimals->At(row_idx)); + } + break; + } + case clickhouse::Type::Decimal64: { + auto& decimals = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = hugeint_t(decimals->At(row_idx)); + } + break; + } + case clickhouse::Type::Decimal128: { + auto& decimals = source->As(); + auto& target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + auto value = decimals->At(row_idx); + target_vector[row_idx] = hugeint_t(value.high, value.low); + } + break; + } + default: + throw NotImplementedException("Uns + } + } + }); + + bind_data.finished = true; + + } catch (const std::exception& e) { + throw IOException("ClickHouse error: " + string(e.what())); + } +} + +static unique_ptr ClickHouseScanBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto query = input.inputs[0].GetValue(); + + // Get ClickHouse connection details from secrets + auto &secret_manager = SecretManager::Get(context); + auto transaction = CatalogTransaction::GetSystemCatalogTransaction(context); + auto secret_match = secret_manager.LookupSecret(transaction, "clickhouse", "clickhouse"); + + if (!secret_match.HasMatch()) { + throw InvalidInputException("No 'clickhouse' secret found. Please create a secret with CREATE SECRET first."); + } + + auto &secret = secret_match.GetSecret(); + const auto *kv_secret = dynamic_cast(&secret); + if (!kv_secret) { + throw InvalidInputException("Invalid secret format for 'clickhouse' secret"); + } + + // Extract connection parameters from secret + string host, port, user, password, database; + Value val; + + if (kv_secret->TryGetValue("host", val)) host = val.ToString(); + if (kv_secret->TryGetValue("port", val)) port = val.ToString(); + if (kv_secret->TryGetValue("user", val)) user = val.ToString(); + if (kv_secret->TryGetValue("password", val)) password = val.ToString(); + if (kv_secret->TryGetValue("database", val)) database = val.ToString(); + + // Create bind data + auto result = make_uniq(query, host, port, user, password, database); + + // Initialize client to fetch schema + try { + clickhouse::Client client(clickhouse::ClientOptions() + .SetHost(host) + .SetPort(std::stoi(port)) + .SetUser(user) + .SetPassword(password) + .SetDefaultDatabase(database)); + + // Execute query to get schema + client.Select(query, [&](const clickhouse::Block& block) { + for (size_t i = 0; i < block.GetColumnCount(); i++) { + auto column = block[i]; + return_types.push_back(ConvertClickHouseType(column)); + names.push_back(block.GetColumnName(i)); + } + }); + + result->types = return_types; + result->names = names; + + return std::move(result); + } catch (const std::exception& e) { + throw IOException("ClickHouse error during bind: " + string(e.what())); + } +} + +void RegisterClickHouseScanFunction(DatabaseInstance &instance) { + TableFunction clickhouse_scan("clickhouse_scan", {LogicalType::VARCHAR}, ClickHouseScanFunction, ClickHouseScanBind); + ExtensionUtil::RegisterFunction(instance, clickhouse_scan); +} + +} // namespace duckdb diff --git a/chsql/src/include/clickhouse_scan.hpp b/chsql/src/include/clickhouse_scan.hpp new file mode 100644 index 0000000..b4d90ec --- /dev/null +++ b/chsql/src/include/clickhouse_scan.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include "duckdb.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/common/types/data_chunk.hpp" +#include "duckdb/main/client_context.hpp" + +namespace duckdb { + +void RegisterClickHouseScanFunction(DatabaseInstance &instance); + +} // namespace duckdb diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index 85936bf..aa435a6 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -1,5 +1,12 @@ { "dependencies": [ - "openssl" + "openssl", + { + "name": "abseil", + "version>=": "20230125.3" + }, + "cityhash", + "lz4", + "zstd" ] -} \ No newline at end of file +} diff --git a/contribs/clickhouse-cpp b/contribs/clickhouse-cpp new file mode 160000 index 0000000..2a49a25 --- /dev/null +++ b/contribs/clickhouse-cpp @@ -0,0 +1 @@ +Subproject commit 2a49a25b573b1194f621070711440ea125577f50 From 2594a4f71c35713e10ce7804514a0b32e2e3a76d Mon Sep 17 00:00:00 2001 From: lmangani Date: Tue, 31 Dec 2024 13:23:10 +0000 Subject: [PATCH 02/32] fix vcpkg --- chsql/vcpkg.json | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index aa435a6..0a444c1 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -1,10 +1,7 @@ { "dependencies": [ "openssl", - { - "name": "abseil", - "version>=": "20230125.3" - }, + "abseil", "cityhash", "lz4", "zstd" From 76e22f79d3ec361d25a11c90fdf31fcd85641468 Mon Sep 17 00:00:00 2001 From: lmangani Date: Tue, 31 Dec 2024 13:32:22 +0000 Subject: [PATCH 03/32] fix cmake --- chsql/CMakeLists.txt | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index e6bcd5e..a912396 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -51,20 +51,13 @@ build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link libraries using plain signature -target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib) -target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib) +target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib absl_int128 cityhash lz4 zstdstatic) +target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib absl_int128 cityhash lz4 zstdstatic) # Install targets install( - TARGETS ${EXTENSION_NAME} clickhouse-cpp-lib + TARGETS ${EXTENSION_NAME} EXPORT "${DUCKDB_EXPORT_SET}" LIBRARY DESTINATION "${INSTALL_LIB_DIR}" ARCHIVE DESTINATION "${INSTALL_LIB_DIR}" ) - -# Make sure the export set includes clickhouse-cpp-lib -export( - TARGETS ${EXTENSION_NAME} clickhouse-cpp-lib - NAMESPACE duckdb:: - FILE DuckDBExports.cmake -) From d15b0c011a8103c9a7f52c6f008a08dafeb1ac67 Mon Sep 17 00:00:00 2001 From: lmangani Date: Tue, 31 Dec 2024 13:37:13 +0000 Subject: [PATCH 04/32] fix cmake --- chsql/CMakeLists.txt | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index a912396..b616335 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -1,5 +1,7 @@ cmake_minimum_required(VERSION 3.12) +set(CMAKE_CXX_STANDARD 17) + # Set extension name here set(TARGET_NAME chsql) @@ -28,14 +30,6 @@ include_directories( ./src/include ./src ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include - ../duckdb/third_party/lz4 - ../duckdb/third_party/parquet - ../duckdb/third_party/thrift - ../duckdb/third_party/snappy - ../duckdb/third_party/zstd/include - ../duckdb/third_party/mbedtls - ../duckdb/third_party/mbedtls/include - ../duckdb/third_party/brotli/include ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp/contrib/absl ) @@ -51,13 +45,20 @@ build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link libraries using plain signature -target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib absl_int128 cityhash lz4 zstdstatic) -target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib absl_int128 cityhash lz4 zstdstatic) +target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib) +target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib) # Install targets install( - TARGETS ${EXTENSION_NAME} + TARGETS ${EXTENSION_NAME} clickhouse-cpp-lib absl_int128 cityhash lz4 zstdstatic EXPORT "${DUCKDB_EXPORT_SET}" LIBRARY DESTINATION "${INSTALL_LIB_DIR}" ARCHIVE DESTINATION "${INSTALL_LIB_DIR}" ) + +# Make sure the export set includes clickhouse-cpp-lib and its dependencies +export( + TARGETS ${EXTENSION_NAME} clickhouse-cpp-lib absl_int128 cityhash lz4 zstdstatic + NAMESPACE duckdb:: + FILE DuckDBExports.cmake +) From 0e6f6e9df5b9ee7250eb7e9b7013e8dab77befe6 Mon Sep 17 00:00:00 2001 From: lmangani Date: Tue, 31 Dec 2024 13:42:32 +0000 Subject: [PATCH 05/32] fix cmake --- chsql/CMakeLists.txt | 7 ++++--- chsql/src/chsql_extension.cpp | 5 +++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index b616335..97a5e86 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -22,6 +22,7 @@ add_subdirectory( # Find OpenSSL package find_package(OpenSSL REQUIRED) +find_package(Zstd REQUIRED) set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) @@ -45,12 +46,12 @@ build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link libraries using plain signature -target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib) -target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp-lib) +target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto Zstd::Zstd clickhouse-cpp-lib) +target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto Zstd::Zstd clickhouse-cpp-lib) # Install targets install( - TARGETS ${EXTENSION_NAME} clickhouse-cpp-lib absl_int128 cityhash lz4 zstdstatic + TARGETS ${EXTENSION_NAME} EXPORT "${DUCKDB_EXPORT_SET}" LIBRARY DESTINATION "${INSTALL_LIB_DIR}" ARCHIVE DESTINATION "${INSTALL_LIB_DIR}" diff --git a/chsql/src/chsql_extension.cpp b/chsql/src/chsql_extension.cpp index 5dd8fe7..66eed7f 100644 --- a/chsql/src/chsql_extension.cpp +++ b/chsql/src/chsql_extension.cpp @@ -13,6 +13,8 @@ // OpenSSL linked through vcpkg #include #include "parquet_ordered_scan.cpp" +#include "clickhouse_scan.hpp" + namespace duckdb { // To add a new scalar SQL macro, add a new macro to this array! @@ -228,6 +230,9 @@ static void LoadInternal(DatabaseInstance &instance) { ExtensionUtil::RegisterFunction(instance, ReadParquetOrderedFunction()); // Flock ExtensionUtil::RegisterFunction(instance, DuckFlockTableFunction()); + // Clickhouse Scan + RegisterClickHouseScanFunction(instance); + } void ChsqlExtension::Load(DuckDB &db) { From 95d47da1ff74258a99fb56551ac5b4ef60616cf9 Mon Sep 17 00:00:00 2001 From: akvlad Date: Tue, 31 Dec 2024 16:42:11 +0200 Subject: [PATCH 06/32] use clickhouse internal zstd --- chsql/CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 97a5e86..c3fc823 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -22,7 +22,6 @@ add_subdirectory( # Find OpenSSL package find_package(OpenSSL REQUIRED) -find_package(Zstd REQUIRED) set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) @@ -46,8 +45,8 @@ build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link libraries using plain signature -target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto Zstd::Zstd clickhouse-cpp-lib) -target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto Zstd::Zstd clickhouse-cpp-lib) +target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp) +target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp) # Install targets install( From 5b97d41529b3521c448795d9a8cbcf964652bf80 Mon Sep 17 00:00:00 2001 From: lmangani Date: Tue, 31 Dec 2024 15:05:50 +0000 Subject: [PATCH 07/32] patch scanner --- chsql/src/clickhouse_scan.cpp | 211 ++++------------------------------ 1 file changed, 20 insertions(+), 191 deletions(-) diff --git a/chsql/src/clickhouse_scan.cpp b/chsql/src/clickhouse_scan.cpp index bd8eabd..0497f40 100644 --- a/chsql/src/clickhouse_scan.cpp +++ b/chsql/src/clickhouse_scan.cpp @@ -1,7 +1,10 @@ #include "clickhouse_scan.hpp" #include "duckdb/common/exception.hpp" #include "duckdb/main/secret/secret_manager.hpp" -#include "duckdb/common/types/chunk_collection.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/main/client_context.hpp" +#include "duckdb/common/string_util.hpp" +#include "duckdb/main/extension_util.hpp" #include namespace duckdb { @@ -22,7 +25,7 @@ struct ClickHouseBindData : public TableFunctionData { }; // Convert ClickHouse type to DuckDB LogicalType -static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef& column) { +static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef column) { switch (column->Type()->GetCode()) { // Integer types case clickhouse::Type::Int8: @@ -33,8 +36,6 @@ static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef& column) { return LogicalType::INTEGER; case clickhouse::Type::Int64: return LogicalType::BIGINT; - case clickhouse::Type::Int128: - return LogicalType::HUGEINT; // Unsigned integer types case clickhouse::Type::UInt8: @@ -59,31 +60,20 @@ static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef& column) { // Date and Time types case clickhouse::Type::Date: - return LogicalType::DATE; case clickhouse::Type::Date32: return LogicalType::DATE; case clickhouse::Type::DateTime: - return LogicalType::TIMESTAMP; case clickhouse::Type::DateTime64: return LogicalType::TIMESTAMP; - // Boolean type - case clickhouse::Type::Nothing: - return LogicalType::BOOLEAN; - - // Decimal types - case clickhouse::Type::Decimal: - case clickhouse::Type::Decimal32: - case clickhouse::Type::Decimal64: - case clickhouse::Type::Decimal128: - // Get precision and scale from the type - auto decimal_type = static_cast(column->Type().get()); - return LogicalType::DECIMAL(decimal_type->GetPrecision(), decimal_type->GetScale()); + // Default to VARCHAR for unsupported types + default: + return LogicalType::VARCHAR; } } static void ClickHouseScanFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { - auto &bind_data = data_p.bind_data->Cast(); + auto &bind_data = const_cast(data_p.bind_data->Cast()); if (bind_data.finished) { return; @@ -105,192 +95,31 @@ static void ClickHouseScanFunction(ClientContext &context, TableFunctionInput &d output.SetCardinality(row_count); for (idx_t col_idx = 0; col_idx < block.GetColumnCount(); col_idx++) { - auto& target = output.data[col_idx]; - auto& source = block[col_idx]; + const auto source = block[col_idx]; + auto &target = output.data[col_idx]; // Convert and copy data based on type switch (bind_data.types[col_idx].id()) { - // String types case LogicalTypeId::VARCHAR: { - if (source->Type()->GetCode() == clickhouse::Type::FixedString) { - auto& strings = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = StringVector::AddString(target, strings->At(row_idx)); - } - } else { - auto& strings = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = StringVector::AddString(target, strings->At(row_idx)); - } - } - break; - } - - // Integer types - case LogicalTypeId::TINYINT: { - auto& integers = source->As(); - auto& target_vector = FlatVector::GetData(target); + const auto strings = source->As(); + auto target_vector = FlatVector::GetData(target); for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = integers->At(row_idx); - } - break; - } - case LogicalTypeId::SMALLINT: { - auto& integers = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = integers->At(row_idx); + auto sv = strings->At(row_idx); + target_vector[row_idx] = StringVector::AddString(target, sv.data(), sv.size()); } break; } case LogicalTypeId::INTEGER: { - auto& integers = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = integers->At(row_idx); - } - break; - } - case LogicalTypeId::BIGINT: { - auto& integers = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = integers->At(row_idx); - } - break; - } - case LogicalTypeId::HUGEINT: { - auto& integers = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - // Assuming ClickHouse returns Int128 as two 64-bit integers - auto value = integers->At(row_idx); - target_vector[row_idx] = hugeint_t(value.high, value.low); - } - break; - } - - // Unsigned integer types - case LogicalTypeId::UTINYINT: { - auto& integers = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = integers->At(row_idx); - } - break; - } - case LogicalTypeId::USMALLINT: { - auto& integers = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = integers->At(row_idx); - } - break; - } - case LogicalTypeId::UINTEGER: { - auto& integers = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = integers->At(row_idx); - } - break; - } - case LogicalTypeId::UBIGINT: { - auto& integers = source->As(); - auto& target_vector = FlatVector::GetData(target); + const auto integers = source->As(); + auto target_vector = FlatVector::GetData(target); for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { target_vector[row_idx] = integers->At(row_idx); } break; } - - // Floating point types - case LogicalTypeId::FLOAT: { - auto& floats = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = floats->At(row_idx); - } - break; - } - case LogicalTypeId::DOUBLE: { - auto& doubles = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = doubles->At(row_idx); - } - break; - } - - // Date and Time types - case LogicalTypeId::DATE: { - if (source->Type()->GetCode() == clickhouse::Type::Date32) { - auto& dates = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - // Convert from days since epoch - target_vector[row_idx] = date_t(dates->At(row_idx)); - } - } else { - auto& dates = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = date_t(dates->At(row_idx)); - } - } - break; - } - case LogicalTypeId::TIMESTAMP: { - if (source->Type()->GetCode() == clickhouse::Type::DateTime64) { - auto& timestamps = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - // Convert from microseconds since epoch - target_vector[row_idx] = timestamp_t(timestamps->At(row_idx)); - } - } else { - auto& timestamps = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - // Convert from seconds since epoch - target_vector[row_idx] = timestamp_t(timestamps->At(row_idx) * Interval::MICROS_PER_SEC); - } - } - break; - } - - // Decimal types - case LogicalTypeId::DECIMAL: { - switch (source->Type()->GetCode()) { - case clickhouse::Type::Decimal32: { - auto& decimals = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = hugeint_t(decimals->At(row_idx)); - } - break; - } - case clickhouse::Type::Decimal64: { - auto& decimals = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - target_vector[row_idx] = hugeint_t(decimals->At(row_idx)); - } - break; - } - case clickhouse::Type::Decimal128: { - auto& decimals = source->As(); - auto& target_vector = FlatVector::GetData(target); - for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { - auto value = decimals->At(row_idx); - target_vector[row_idx] = hugeint_t(value.high, value.low); - } - break; - } - default: - throw NotImplementedException("Uns + // Add remaining type conversions here + default: + throw NotImplementedException("Type not yet implemented in scan function"); } } }); From 6826304e6b801f775c3fd4bb9785f55b8cecb391 Mon Sep 17 00:00:00 2001 From: akvlad Date: Thu, 2 Jan 2025 00:17:02 +0200 Subject: [PATCH 08/32] use clickhouse-cpp as an external cmake project --- chsql/CMakeLists.txt | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index c3fc823..862e7bb 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -7,19 +7,32 @@ set(TARGET_NAME chsql) project(${TARGET_NAME}) +include(ExternalProject) +ExternalProject_Add(clickhouse_cpp_external + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp + CMAKE_ARGS + -DCMAKE_CXX_FLAGS=-fPIC + INSTALL_COMMAND "" # Skip install step + BUILD_BYPRODUCTS + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/cityhash/cityhash/libcityhash.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/lz4/lz4/liblz4.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/zstd/zstd/libzstdstatic.a +) +add_library(clickhouse_cpp_lib STATIC IMPORTED) +set_target_properties(clickhouse_cpp_lib PROPERTIES + IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a + INTERFACE_LINK_LIBRARIES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/cityhash/cityhash/libcityhash.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/lz4/lz4/liblz4.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/zstd/zstd/libzstdstatic.a" +) +add_dependencies(clickhouse_cpp_lib clickhouse_cpp_external) + # Configure clickhouse-cpp options set(CH_CPP_BUILD_SHARED OFF CACHE BOOL "Build shared library") set(CH_CPP_BUILD_STATICALY_LINKED_LIB ON CACHE BOOL "Build static library") set(CH_CPP_BUILD_ONLY_LIB ON CACHE BOOL "Build only library") set(CH_CPP_WITH_OPENSSL ON CACHE BOOL "Use OpenSSL") -# Add clickhouse-cpp -add_subdirectory( - ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse-cpp - EXCLUDE_FROM_ALL -) - # Find OpenSSL package find_package(OpenSSL REQUIRED) @@ -30,8 +43,8 @@ include_directories( ./src/include ./src ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include - ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp/contrib/absl + ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp/contrib/absl ) # Update extension sources to include new file @@ -45,8 +58,8 @@ build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link libraries using plain signature -target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp) -target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse-cpp) +target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse_cpp_lib) +target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse_cpp_lib) # Install targets install( @@ -55,10 +68,3 @@ install( LIBRARY DESTINATION "${INSTALL_LIB_DIR}" ARCHIVE DESTINATION "${INSTALL_LIB_DIR}" ) - -# Make sure the export set includes clickhouse-cpp-lib and its dependencies -export( - TARGETS ${EXTENSION_NAME} clickhouse-cpp-lib absl_int128 cityhash lz4 zstdstatic - NAMESPACE duckdb:: - FILE DuckDBExports.cmake -) From ec264ba83316828afd4076e54f650cd6c6e36669 Mon Sep 17 00:00:00 2001 From: akvlad Date: Thu, 2 Jan 2025 00:54:11 +0200 Subject: [PATCH 09/32] OSX fix --- chsql/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 862e7bb..239cc66 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -11,6 +11,8 @@ include(ExternalProject) ExternalProject_Add(clickhouse_cpp_external SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp CMAKE_ARGS + -DCMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" + -DCMAKE_OSX_DEPLOYMENT_TARGET="${CMAKE_OSX_DEPLOYMENT_TARGET}" -DCMAKE_CXX_FLAGS=-fPIC INSTALL_COMMAND "" # Skip install step BUILD_BYPRODUCTS From 01b203d5c527451cfde5acde3a2191f90c9236a4 Mon Sep 17 00:00:00 2001 From: akvlad Date: Thu, 2 Jan 2025 09:42:58 +0200 Subject: [PATCH 10/32] OSX fix debug --- chsql/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 239cc66..623d527 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -11,8 +11,8 @@ include(ExternalProject) ExternalProject_Add(clickhouse_cpp_external SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp CMAKE_ARGS - -DCMAKE_OSX_ARCHITECTURES="${CMAKE_OSX_ARCHITECTURES}" - -DCMAKE_OSX_DEPLOYMENT_TARGET="${CMAKE_OSX_DEPLOYMENT_TARGET}" + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} -DCMAKE_CXX_FLAGS=-fPIC INSTALL_COMMAND "" # Skip install step BUILD_BYPRODUCTS From 89468582b0222a2ac435b6d3affbaae323aaf4a0 Mon Sep 17 00:00:00 2001 From: akvlad Date: Thu, 2 Jan 2025 10:09:26 +0200 Subject: [PATCH 11/32] linux fix debug --- chsql/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 623d527..f7e4d39 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -13,7 +13,8 @@ ExternalProject_Add(clickhouse_cpp_external CMAKE_ARGS -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} - -DCMAKE_CXX_FLAGS=-fPIC + -DCMAKE_CXX_FLAGS=-fPIC ${ARCH_FLAGS} + -DCMAKE_C_FLAGS=${ARCH_FLAGS} INSTALL_COMMAND "" # Skip install step BUILD_BYPRODUCTS ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a From 37753047faf808ff4511b57155606789f97069dc Mon Sep 17 00:00:00 2001 From: akvlad Date: Thu, 2 Jan 2025 19:35:08 +0200 Subject: [PATCH 12/32] linux arm64 debug --- chsql/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index f7e4d39..cd56276 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -7,6 +7,10 @@ set(TARGET_NAME chsql) project(${TARGET_NAME}) +if(DUCKDB_EXPLICIT_PLATFORM MATCHER "linux_arm64") + set(ARCH_FLAGS "-march=armv8-a") +endif() + include(ExternalProject) ExternalProject_Add(clickhouse_cpp_external SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp From e69a946606ad4f52edff6351426f35a46b27a712 Mon Sep 17 00:00:00 2001 From: akvlad Date: Thu, 2 Jan 2025 19:39:15 +0200 Subject: [PATCH 13/32] linux arm64 debug --- chsql/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index cd56276..845c883 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -7,7 +7,7 @@ set(TARGET_NAME chsql) project(${TARGET_NAME}) -if(DUCKDB_EXPLICIT_PLATFORM MATCHER "linux_arm64") +if(DUCKDB_EXPLICIT_PLATFORM MATCHES "linux_arm64") set(ARCH_FLAGS "-march=armv8-a") endif() From e503662c437dd51ceff671505d8f42c02ab0a84d Mon Sep 17 00:00:00 2001 From: akvlad Date: Thu, 2 Jan 2025 22:09:46 +0200 Subject: [PATCH 14/32] linux arm64 debug --- chsql/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 845c883..3c8db31 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -7,7 +7,7 @@ set(TARGET_NAME chsql) project(${TARGET_NAME}) -if(DUCKDB_EXPLICIT_PLATFORM MATCHES "linux_arm64") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") set(ARCH_FLAGS "-march=armv8-a") endif() @@ -17,7 +17,7 @@ ExternalProject_Add(clickhouse_cpp_external CMAKE_ARGS -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} - -DCMAKE_CXX_FLAGS=-fPIC ${ARCH_FLAGS} + "-DCMAKE_CXX_FLAGS=-fPIC ${ARCH_FLAGS}" -DCMAKE_C_FLAGS=${ARCH_FLAGS} INSTALL_COMMAND "" # Skip install step BUILD_BYPRODUCTS From 4f3e00ca5e803fa5239398431e7e80944455dd9b Mon Sep 17 00:00:00 2001 From: akvlad Date: Thu, 2 Jan 2025 22:52:58 +0200 Subject: [PATCH 15/32] linux arm64 debug --- chsql/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 3c8db31..2cf19fe 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -7,7 +7,7 @@ set(TARGET_NAME chsql) project(${TARGET_NAME}) -if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR DUCKDB_EXPLICIT_PLATFORM MATCHES "linux_arm64") set(ARCH_FLAGS "-march=armv8-a") endif() From 43183ff9a67bc5afd71170e1ebb0f74e8e6cf791 Mon Sep 17 00:00:00 2001 From: akvlad Date: Thu, 2 Jan 2025 23:26:11 +0200 Subject: [PATCH 16/32] linux arm64 debug --- chsql/CMakeLists.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 2cf19fe..1fd4839 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -7,9 +7,9 @@ set(TARGET_NAME chsql) project(${TARGET_NAME}) -if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR DUCKDB_EXPLICIT_PLATFORM MATCHES "linux_arm64") - set(ARCH_FLAGS "-march=armv8-a") -endif() +#if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR DUCKDB_EXPLICIT_PLATFORM MATCHES "linux_arm64") +# set(ARCH_FLAGS "-march=armv8-a") +#endif() include(ExternalProject) ExternalProject_Add(clickhouse_cpp_external @@ -17,6 +17,8 @@ ExternalProject_Add(clickhouse_cpp_external CMAKE_ARGS -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} "-DCMAKE_CXX_FLAGS=-fPIC ${ARCH_FLAGS}" -DCMAKE_C_FLAGS=${ARCH_FLAGS} INSTALL_COMMAND "" # Skip install step From 36704f67bff8a0c12184dff7f8943bc492a15541 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 01:32:33 +0100 Subject: [PATCH 17/32] Use static OpenSSL for WIN --- chsql/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 1fd4839..52cd454 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -45,6 +45,10 @@ set(CH_CPP_WITH_OPENSSL ON CACHE BOOL "Use OpenSSL") # Find OpenSSL package find_package(OpenSSL REQUIRED) +if(MINGW) + set(OPENSSL_USE_STATIC_LIBS TRUE) +endif() + set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) From 28b2a6eb646aee4d31887bdbf971d3a6c72f1fbb Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 01:39:22 +0100 Subject: [PATCH 18/32] Add "clickhouse-cpp" to vcpkg.json --- chsql/vcpkg.json | 1 + 1 file changed, 1 insertion(+) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index 0a444c1..6d2ac4c 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -1,6 +1,7 @@ { "dependencies": [ "openssl", + "clickhouse-cpp", "abseil", "cityhash", "lz4", From ceb372886fe4d127a6e12a9454546b6e13a6f9bf Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 01:56:32 +0100 Subject: [PATCH 19/32] Platform specific vcpkg.json --- chsql/vcpkg.json | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index 6d2ac4c..f65a4eb 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -1,10 +1,16 @@ { "dependencies": [ "openssl", - "clickhouse-cpp", + { + "name": "clickhouse-cpp", + "platform": "!wasm32" + }, "abseil", - "cityhash", "lz4", - "zstd" + "zstd", + { + "name": "cityhash", + "platform": "!windows" + } ] } From bbf6f017b9ea7e6d073229d0f3e88c806bb81fb4 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 02:45:49 +0100 Subject: [PATCH 20/32] Update vcpkg.json --- chsql/vcpkg.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index f65a4eb..689a30c 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -10,7 +10,7 @@ "zstd", { "name": "cityhash", - "platform": "!windows" + "platform": "!windows | !MinGW" } ] } From 982aa0141ff91f86c7e1a368f6c8b1799b5b8a54 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 02:51:34 +0100 Subject: [PATCH 21/32] Update vcpkg.json --- chsql/vcpkg.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index 689a30c..bec68b1 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -10,7 +10,7 @@ "zstd", { "name": "cityhash", - "platform": "!windows | !MinGW" + "platform": "!windows, !mingw" } ] } From 8bfec6b7baad9d1b0e90eda24706a12787cb6901 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 03:00:40 +0100 Subject: [PATCH 22/32] Update vcpkg.json --- chsql/vcpkg.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index bec68b1..f675c4d 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -10,7 +10,7 @@ "zstd", { "name": "cityhash", - "platform": "!windows, !mingw" + "platform": "!(windows & x86 & static) | !windows " } ] } From 9da8c544fba3987f272fbe1dc1f4e5e2876febd3 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 03:21:48 +0100 Subject: [PATCH 23/32] Selective builder --- chsql/CMakeLists.txt | 85 +++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 52cd454..89169e0 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -2,7 +2,6 @@ cmake_minimum_required(VERSION 3.12) set(CMAKE_CXX_STANDARD 17) -# Set extension name here set(TARGET_NAME chsql) project(${TARGET_NAME}) @@ -11,36 +10,38 @@ project(${TARGET_NAME}) # set(ARCH_FLAGS "-march=armv8-a") #endif() -include(ExternalProject) -ExternalProject_Add(clickhouse_cpp_external - SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp - CMAKE_ARGS - -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} - -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - "-DCMAKE_CXX_FLAGS=-fPIC ${ARCH_FLAGS}" - -DCMAKE_C_FLAGS=${ARCH_FLAGS} - INSTALL_COMMAND "" # Skip install step - BUILD_BYPRODUCTS - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/cityhash/cityhash/libcityhash.a - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/lz4/lz4/liblz4.a - ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/zstd/zstd/libzstdstatic.a -) -add_library(clickhouse_cpp_lib STATIC IMPORTED) -set_target_properties(clickhouse_cpp_lib PROPERTIES - IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a - INTERFACE_LINK_LIBRARIES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/cityhash/cityhash/libcityhash.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/lz4/lz4/liblz4.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/zstd/zstd/libzstdstatic.a" -) -add_dependencies(clickhouse_cpp_lib clickhouse_cpp_external) +if(NOT EMSCRIPTEN OR NOT MINGW) + include(ExternalProject) + ExternalProject_Add(clickhouse_cpp_external + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp + CMAKE_ARGS + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + "-DCMAKE_CXX_FLAGS=-fPIC ${ARCH_FLAGS}" + -DCMAKE_C_FLAGS=${ARCH_FLAGS} + INSTALL_COMMAND "" # Skip install step + BUILD_BYPRODUCTS + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/cityhash/cityhash/libcityhash.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/lz4/lz4/liblz4.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/zstd/zstd/libzstdstatic.a + ) + add_library(clickhouse_cpp_lib STATIC IMPORTED) + set_target_properties(clickhouse_cpp_lib PROPERTIES + IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a + INTERFACE_LINK_LIBRARIES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse[...] + ) + add_dependencies(clickhouse_cpp_lib clickhouse_cpp_external) -# Configure clickhouse-cpp options -set(CH_CPP_BUILD_SHARED OFF CACHE BOOL "Build shared library") -set(CH_CPP_BUILD_STATICALY_LINKED_LIB ON CACHE BOOL "Build static library") -set(CH_CPP_BUILD_ONLY_LIB ON CACHE BOOL "Build only library") -set(CH_CPP_WITH_OPENSSL ON CACHE BOOL "Use OpenSSL") + # Configure clickhouse-cpp options + set(CH_CPP_BUILD_SHARED OFF CACHE BOOL "Build shared library") + set(CH_CPP_BUILD_STATICALY_LINKED_LIB ON CACHE BOOL "Build static library") + set(CH_CPP_BUILD_ONLY_LIB ON CACHE BOOL "Build only library") + set(CH_CPP_WITH_OPENSSL ON CACHE BOOL "Use OpenSSL") +endif() # Find OpenSSL package find_package(OpenSSL REQUIRED) @@ -52,27 +53,39 @@ endif() set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) -include_directories( +set(EXTENSION_INCLUDES ./src/include ./src ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include - ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp - ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp/contrib/absl ) +if(NOT EMSCRIPTEN OR NOT MINGW) + list(APPEND EXTENSION_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp/contrib/absl) +endif() + +include_directories(EXTENSION_INCLUDES) + # Update extension sources to include new file set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp - src/clickhouse_scan.cpp ) +if(NOT EMSCRIPTEN OR NOT MINGW) + list(APPEND EXTENSION_SOURCES src/clickhouse_scan.cpp) +endif() + build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) # Link libraries using plain signature -target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse_cpp_lib) -target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto clickhouse_cpp_lib) +target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) +target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) + +if(NOT EMSCRIPTEN OR NOT MINGW) + target_link_libraries(${EXTENSION_NAME} clickhouse_cpp_lib) + target_link_libraries(${LOADABLE_EXTENSION_NAME} clickhouse_cpp_lib) +endif() # Install targets install( From c2b0c150029ce121873d2b55579100d0f82c6875 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 03:28:22 +0100 Subject: [PATCH 24/32] Update CMakeLists.txt --- chsql/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 89169e0..31a92d5 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -32,10 +32,10 @@ if(NOT EMSCRIPTEN OR NOT MINGW) add_library(clickhouse_cpp_lib STATIC IMPORTED) set_target_properties(clickhouse_cpp_lib PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a - INTERFACE_LINK_LIBRARIES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse[...] + INTERFACE_LINK_LIBRARIES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/cityhash/cityhash/libcityhash.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/lz4/lz4/liblz4.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/zstd/zstd/libzstdstatic.a" ) + add_dependencies(clickhouse_cpp_lib clickhouse_cpp_external) - # Configure clickhouse-cpp options set(CH_CPP_BUILD_SHARED OFF CACHE BOOL "Build shared library") set(CH_CPP_BUILD_STATICALY_LINKED_LIB ON CACHE BOOL "Build static library") @@ -65,12 +65,12 @@ endif() include_directories(EXTENSION_INCLUDES) -# Update extension sources to include new file set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp ) +# Include clickhouse_scan for supported platforms if(NOT EMSCRIPTEN OR NOT MINGW) list(APPEND EXTENSION_SOURCES src/clickhouse_scan.cpp) endif() From 5e035bc5771dd70e1a097a0d3485fdef8632c1a2 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 03:34:41 +0100 Subject: [PATCH 25/32] Update CMakeLists.txt --- chsql/CMakeLists.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index 31a92d5..abe443f 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -53,18 +53,22 @@ endif() set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) -set(EXTENSION_INCLUDES +if(NOT EMSCRIPTEN OR NOT MINGW) +include_directories( + ./src/include + ./src + ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include + ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp/contrib/absl +) +else() +include_directories( ./src/include ./src ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include ) - -if(NOT EMSCRIPTEN OR NOT MINGW) - list(APPEND EXTENSION_INCLUDES ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp/contrib/absl) endif() -include_directories(EXTENSION_INCLUDES) - set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp From 0386a1b3419e7c95556f14751e30c6aaba061c07 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 03:53:39 +0100 Subject: [PATCH 26/32] Conditional include and register --- chsql/src/chsql_extension.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/chsql/src/chsql_extension.cpp b/chsql/src/chsql_extension.cpp index 66eed7f..fb27a91 100644 --- a/chsql/src/chsql_extension.cpp +++ b/chsql/src/chsql_extension.cpp @@ -13,8 +13,11 @@ // OpenSSL linked through vcpkg #include #include "parquet_ordered_scan.cpp" -#include "clickhouse_scan.hpp" +#if !defined(EMSCRIPTEN) && !defined(MINGW) +#include "clickhouse_scan.hpp" +#endif + namespace duckdb { // To add a new scalar SQL macro, add a new macro to this array! @@ -230,9 +233,12 @@ static void LoadInternal(DatabaseInstance &instance) { ExtensionUtil::RegisterFunction(instance, ReadParquetOrderedFunction()); // Flock ExtensionUtil::RegisterFunction(instance, DuckFlockTableFunction()); - // Clickhouse Scan - RegisterClickHouseScanFunction(instance); +#if !defined(EMSCRIPTEN) && !defined(MINGW) + // Clickhouse Scan for supported platforms + RegisterClickHouseScanFunction(instance); +#endif + } void ChsqlExtension::Load(DuckDB &db) { From 4f4fbdde20ca074fff17d29cc1a9e6d75ecadf76 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 12:06:23 +0100 Subject: [PATCH 27/32] Remove cityhash from vcpkg.json (TEST) --- chsql/vcpkg.json | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index f675c4d..86ec48b 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -7,10 +7,6 @@ }, "abseil", "lz4", - "zstd", - { - "name": "cityhash", - "platform": "!(windows & x86 & static) | !windows " - } + "zstd" ] } From 08acb5e80b7f46b5867f2e6e340a288368b25021 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 12:19:39 +0100 Subject: [PATCH 28/32] Update chsql_extension.cpp --- chsql/src/chsql_extension.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/chsql/src/chsql_extension.cpp b/chsql/src/chsql_extension.cpp index fb27a91..85c407c 100644 --- a/chsql/src/chsql_extension.cpp +++ b/chsql/src/chsql_extension.cpp @@ -12,9 +12,9 @@ // OpenSSL linked through vcpkg #include -#include "parquet_ordered_scan.cpp" #if !defined(EMSCRIPTEN) && !defined(MINGW) +#include "parquet_ordered_scan.cpp" #include "clickhouse_scan.hpp" #endif @@ -230,13 +230,16 @@ static void LoadInternal(DatabaseInstance &instance) { auto table_info = DefaultTableFunctionGenerator::CreateTableMacroInfo(chsql_table_macros[index]); ExtensionUtil::RegisterFunction(instance, *table_info); } + +#if !defined(EMSCRIPTEN) + // Parquet Reader ExtensionUtil::RegisterFunction(instance, ReadParquetOrderedFunction()); - // Flock - ExtensionUtil::RegisterFunction(instance, DuckFlockTableFunction()); - -#if !defined(EMSCRIPTEN) && !defined(MINGW) - // Clickhouse Scan for supported platforms - RegisterClickHouseScanFunction(instance); + // Flock Table + ExtensionUtil::RegisterFunction(instance, DuckFlockTableFunction()); +#if !defined(MINGW) + // Clickhouse Scan for supported platforms + RegisterClickHouseScanFunction(instance); +#endif #endif } From 094d5daae6d28f299e2e2aadd8a0901a77764e36 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 12:29:20 +0100 Subject: [PATCH 29/32] Update vcpkg.json --- chsql/vcpkg.json | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index 86ec48b..e65fb71 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -3,10 +3,19 @@ "openssl", { "name": "clickhouse-cpp", - "platform": "!wasm32" + "platform": "!wasm32,!mingw,!windows" }, - "abseil", - "lz4", - "zstd" + { + "name":"abseil", + "platform": "!wasm32,!mingw,!windows" + }, + { + "name":"lz4", + "platform": "!wasm32,!mingw,!windows" + }, + { + "name":"zstd", + "platform": "!wasm32,!mingw,!windows" + } ] } From 3272226932741928d20d309c5e3be79becd8a447 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 12:38:09 +0100 Subject: [PATCH 30/32] Try overrides in vcpkg.json --- chsql/vcpkg.json | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index e65fb71..94c4277 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -1,20 +1,34 @@ { "dependencies": [ "openssl", + "clickhouse-cpp", + "abseil", + "lz4", + "zstd" + ], + "overrides": [ { - "name": "clickhouse-cpp", + "name": "clickhouse-cpp", + "version": "0", + "port-version": 0, "platform": "!wasm32,!mingw,!windows" }, { - "name":"abseil", + "name": "abseil", + "version": "0", + "port-version": 0, "platform": "!wasm32,!mingw,!windows" }, { - "name":"lz4", + "name": "lz4", + "version": "0", + "port-version": 0, "platform": "!wasm32,!mingw,!windows" }, { - "name":"zstd", + "name": "zstd", + "version": "0", + "port-version": 0, "platform": "!wasm32,!mingw,!windows" } ] From ccf65b2e92f920def6701f4e59823832026c47fd Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 13:02:29 +0100 Subject: [PATCH 31/32] shrink vcpkg.json --- chsql/vcpkg.json | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index 94c4277..5e85d4d 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -1,35 +1,9 @@ { "dependencies": [ "openssl", - "clickhouse-cpp", - "abseil", - "lz4", - "zstd" - ], - "overrides": [ { "name": "clickhouse-cpp", - "version": "0", - "port-version": 0, - "platform": "!wasm32,!mingw,!windows" - }, - { - "name": "abseil", - "version": "0", - "port-version": 0, - "platform": "!wasm32,!mingw,!windows" - }, - { - "name": "lz4", - "version": "0", - "port-version": 0, - "platform": "!wasm32,!mingw,!windows" - }, - { - "name": "zstd", - "version": "0", - "port-version": 0, - "platform": "!wasm32,!mingw,!windows" + "platform": "linux, osx" } ] } From 0d67955b7d80a1f755b52dc3d3f1e92bce924518 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Fri, 3 Jan 2025 13:12:21 +0100 Subject: [PATCH 32/32] Update CMakeLists.txt --- chsql/CMakeLists.txt | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index abe443f..ededcf9 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -65,18 +65,17 @@ else() include_directories( ./src/include ./src - ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include ) endif() set(EXTENSION_SOURCES src/chsql_extension.cpp - src/duck_flock.cpp ) # Include clickhouse_scan for supported platforms if(NOT EMSCRIPTEN OR NOT MINGW) - list(APPEND EXTENSION_SOURCES src/clickhouse_scan.cpp) + list(APPEND EXTENSION_SOURCES src/clickhouse_scan.cpp src/duck_flock.cpp +) endif() build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})