diff --git a/.gitmodules b/.gitmodules index f37c3c4..83999ad 100644 --- a/.gitmodules +++ b/.gitmodules @@ -8,3 +8,6 @@ url = https://github.com/duckdb/extension-ci-tools branch = main update = merge +[submodule "contribs/clickhouse-cpp"] + path = contribs/clickhouse-cpp + url = https://github.com/ClickHouse/clickhouse-cpp.git diff --git a/chsql/CMakeLists.txt b/chsql/CMakeLists.txt index f2b1f46..ededcf9 100644 --- a/chsql/CMakeLists.txt +++ b/chsql/CMakeLists.txt @@ -1,34 +1,99 @@ -cmake_minimum_required(VERSION 3.5) -# Set extension name here +cmake_minimum_required(VERSION 3.12) + +set(CMAKE_CXX_STANDARD 17) + set(TARGET_NAME chsql) -# DuckDB's extension distribution supports vcpkg. As such, dependencies can be added in ./vcpkg.json and then -# used in cmake with find_package. Feel free to remove or replace with other dependencies. -# Note that it should also be removed from vcpkg.json to prevent needlessly installing it.. + +project(${TARGET_NAME}) + +#if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR DUCKDB_EXPLICIT_PLATFORM MATCHES "linux_arm64") +# set(ARCH_FLAGS "-march=armv8-a") +#endif() + +if(NOT EMSCRIPTEN OR NOT MINGW) + include(ExternalProject) + ExternalProject_Add(clickhouse_cpp_external + SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp + CMAKE_ARGS + -DCMAKE_OSX_ARCHITECTURES=${CMAKE_OSX_ARCHITECTURES} + -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + "-DCMAKE_CXX_FLAGS=-fPIC ${ARCH_FLAGS}" + -DCMAKE_C_FLAGS=${ARCH_FLAGS} + INSTALL_COMMAND "" # Skip install step + BUILD_BYPRODUCTS + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/cityhash/cityhash/libcityhash.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/lz4/lz4/liblz4.a + ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/zstd/zstd/libzstdstatic.a + ) + add_library(clickhouse_cpp_lib STATIC IMPORTED) + set_target_properties(clickhouse_cpp_lib PROPERTIES + IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/clickhouse/libclickhouse-cpp-lib.a + INTERFACE_LINK_LIBRARIES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/absl/absl/libabsl_int128.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/cityhash/cityhash/libcityhash.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/lz4/lz4/liblz4.a;${CMAKE_CURRENT_BINARY_DIR}/clickhouse_cpp_external-prefix/src/clickhouse_cpp_external-build/contrib/zstd/zstd/libzstdstatic.a" + ) + + add_dependencies(clickhouse_cpp_lib clickhouse_cpp_external) + # Configure clickhouse-cpp options + set(CH_CPP_BUILD_SHARED OFF CACHE BOOL "Build shared library") + set(CH_CPP_BUILD_STATICALY_LINKED_LIB ON CACHE BOOL "Build static library") + set(CH_CPP_BUILD_ONLY_LIB ON CACHE BOOL "Build only library") + set(CH_CPP_WITH_OPENSSL ON CACHE BOOL "Use OpenSSL") +endif() + +# Find OpenSSL package find_package(OpenSSL REQUIRED) + +if(MINGW) + set(OPENSSL_USE_STATIC_LIBS TRUE) +endif() + set(EXTENSION_NAME ${TARGET_NAME}_extension) set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) -project(${TARGET_NAME}) +if(NOT EMSCRIPTEN OR NOT MINGW) +include_directories( + ./src/include + ./src + ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include + ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../contribs/clickhouse-cpp/contrib/absl +) +else() include_directories( - ./src/include - ./src - ${CMAKE_CURRENT_SOURCE_DIR}/../duckdb/extension/parquet/include - ../duckdb/third_party/lz4 - ../duckdb/third_party/parquet - ../duckdb/third_party/thrift - ../duckdb/third_party/snappy - ../duckdb/third_party/zstd/include - ../duckdb/third_party/mbedtls - ../duckdb/third_party/mbedtls/include - ../duckdb/third_party/brotli/include) -set(EXTENSION_SOURCES src/chsql_extension.cpp src/duck_flock.cpp) + ./src/include + ./src +) +endif() + +set(EXTENSION_SOURCES + src/chsql_extension.cpp +) + +# Include clickhouse_scan for supported platforms +if(NOT EMSCRIPTEN OR NOT MINGW) + list(APPEND EXTENSION_SOURCES src/clickhouse_scan.cpp src/duck_flock.cpp +) +endif() + build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) -# Link OpenSSL in both the static library as the loadable extension + +# Link libraries using plain signature target_link_libraries(${EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) target_link_libraries(${LOADABLE_EXTENSION_NAME} OpenSSL::SSL OpenSSL::Crypto) + +if(NOT EMSCRIPTEN OR NOT MINGW) + target_link_libraries(${EXTENSION_NAME} clickhouse_cpp_lib) + target_link_libraries(${LOADABLE_EXTENSION_NAME} clickhouse_cpp_lib) +endif() + +# Install targets install( - TARGETS ${EXTENSION_NAME} - EXPORT "${DUCKDB_EXPORT_SET}" - LIBRARY DESTINATION "${INSTALL_LIB_DIR}" - ARCHIVE DESTINATION "${INSTALL_LIB_DIR}") + TARGETS ${EXTENSION_NAME} + EXPORT "${DUCKDB_EXPORT_SET}" + LIBRARY DESTINATION "${INSTALL_LIB_DIR}" + ARCHIVE DESTINATION "${INSTALL_LIB_DIR}" +) diff --git a/chsql/src/chsql_extension.cpp b/chsql/src/chsql_extension.cpp index 5dd8fe7..85c407c 100644 --- a/chsql/src/chsql_extension.cpp +++ b/chsql/src/chsql_extension.cpp @@ -12,7 +12,12 @@ // OpenSSL linked through vcpkg #include + +#if !defined(EMSCRIPTEN) && !defined(MINGW) #include "parquet_ordered_scan.cpp" +#include "clickhouse_scan.hpp" +#endif + namespace duckdb { // To add a new scalar SQL macro, add a new macro to this array! @@ -225,9 +230,18 @@ static void LoadInternal(DatabaseInstance &instance) { auto table_info = DefaultTableFunctionGenerator::CreateTableMacroInfo(chsql_table_macros[index]); ExtensionUtil::RegisterFunction(instance, *table_info); } + +#if !defined(EMSCRIPTEN) + // Parquet Reader ExtensionUtil::RegisterFunction(instance, ReadParquetOrderedFunction()); - // Flock - ExtensionUtil::RegisterFunction(instance, DuckFlockTableFunction()); + // Flock Table + ExtensionUtil::RegisterFunction(instance, DuckFlockTableFunction()); +#if !defined(MINGW) + // Clickhouse Scan for supported platforms + RegisterClickHouseScanFunction(instance); +#endif +#endif + } void ChsqlExtension::Load(DuckDB &db) { diff --git a/chsql/src/clickhouse_scan.cpp b/chsql/src/clickhouse_scan.cpp new file mode 100644 index 0000000..0497f40 --- /dev/null +++ b/chsql/src/clickhouse_scan.cpp @@ -0,0 +1,198 @@ +#include "clickhouse_scan.hpp" +#include "duckdb/common/exception.hpp" +#include "duckdb/main/secret/secret_manager.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/main/client_context.hpp" +#include "duckdb/common/string_util.hpp" +#include "duckdb/main/extension_util.hpp" +#include + +namespace duckdb { + +struct ClickHouseBindData : public TableFunctionData { + string query; + string host; + string port; + string user; + string password; + string database; + bool finished; + vector types; + vector names; + + ClickHouseBindData(string query, string host, string port, string user, string password, string database) + : query(query), host(host), port(port), user(user), password(password), database(database), finished(false) {} +}; + +// Convert ClickHouse type to DuckDB LogicalType +static LogicalType ConvertClickHouseType(const clickhouse::ColumnRef column) { + switch (column->Type()->GetCode()) { + // Integer types + case clickhouse::Type::Int8: + return LogicalType::TINYINT; + case clickhouse::Type::Int16: + return LogicalType::SMALLINT; + case clickhouse::Type::Int32: + return LogicalType::INTEGER; + case clickhouse::Type::Int64: + return LogicalType::BIGINT; + + // Unsigned integer types + case clickhouse::Type::UInt8: + return LogicalType::UTINYINT; + case clickhouse::Type::UInt16: + return LogicalType::USMALLINT; + case clickhouse::Type::UInt32: + return LogicalType::UINTEGER; + case clickhouse::Type::UInt64: + return LogicalType::UBIGINT; + + // Floating point types + case clickhouse::Type::Float32: + return LogicalType::FLOAT; + case clickhouse::Type::Float64: + return LogicalType::DOUBLE; + + // String types + case clickhouse::Type::String: + case clickhouse::Type::FixedString: + return LogicalType::VARCHAR; + + // Date and Time types + case clickhouse::Type::Date: + case clickhouse::Type::Date32: + return LogicalType::DATE; + case clickhouse::Type::DateTime: + case clickhouse::Type::DateTime64: + return LogicalType::TIMESTAMP; + + // Default to VARCHAR for unsupported types + default: + return LogicalType::VARCHAR; + } +} + +static void ClickHouseScanFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { + auto &bind_data = const_cast(data_p.bind_data->Cast()); + + if (bind_data.finished) { + return; + } + + try { + // Initialize ClickHouse client + clickhouse::Client client(clickhouse::ClientOptions() + .SetHost(bind_data.host) + .SetPort(std::stoi(bind_data.port)) + .SetUser(bind_data.user) + .SetPassword(bind_data.password) + .SetDefaultDatabase(bind_data.database) + .SetPingBeforeQuery(true)); + + // Execute query + client.Select(bind_data.query, [&](const clickhouse::Block& block) { + idx_t row_count = block.GetRowCount(); + output.SetCardinality(row_count); + + for (idx_t col_idx = 0; col_idx < block.GetColumnCount(); col_idx++) { + const auto source = block[col_idx]; + auto &target = output.data[col_idx]; + + // Convert and copy data based on type + switch (bind_data.types[col_idx].id()) { + case LogicalTypeId::VARCHAR: { + const auto strings = source->As(); + auto target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + auto sv = strings->At(row_idx); + target_vector[row_idx] = StringVector::AddString(target, sv.data(), sv.size()); + } + break; + } + case LogicalTypeId::INTEGER: { + const auto integers = source->As(); + auto target_vector = FlatVector::GetData(target); + for (idx_t row_idx = 0; row_idx < row_count; row_idx++) { + target_vector[row_idx] = integers->At(row_idx); + } + break; + } + // Add remaining type conversions here + default: + throw NotImplementedException("Type not yet implemented in scan function"); + } + } + }); + + bind_data.finished = true; + + } catch (const std::exception& e) { + throw IOException("ClickHouse error: " + string(e.what())); + } +} + +static unique_ptr ClickHouseScanBind(ClientContext &context, TableFunctionBindInput &input, + vector &return_types, vector &names) { + auto query = input.inputs[0].GetValue(); + + // Get ClickHouse connection details from secrets + auto &secret_manager = SecretManager::Get(context); + auto transaction = CatalogTransaction::GetSystemCatalogTransaction(context); + auto secret_match = secret_manager.LookupSecret(transaction, "clickhouse", "clickhouse"); + + if (!secret_match.HasMatch()) { + throw InvalidInputException("No 'clickhouse' secret found. Please create a secret with CREATE SECRET first."); + } + + auto &secret = secret_match.GetSecret(); + const auto *kv_secret = dynamic_cast(&secret); + if (!kv_secret) { + throw InvalidInputException("Invalid secret format for 'clickhouse' secret"); + } + + // Extract connection parameters from secret + string host, port, user, password, database; + Value val; + + if (kv_secret->TryGetValue("host", val)) host = val.ToString(); + if (kv_secret->TryGetValue("port", val)) port = val.ToString(); + if (kv_secret->TryGetValue("user", val)) user = val.ToString(); + if (kv_secret->TryGetValue("password", val)) password = val.ToString(); + if (kv_secret->TryGetValue("database", val)) database = val.ToString(); + + // Create bind data + auto result = make_uniq(query, host, port, user, password, database); + + // Initialize client to fetch schema + try { + clickhouse::Client client(clickhouse::ClientOptions() + .SetHost(host) + .SetPort(std::stoi(port)) + .SetUser(user) + .SetPassword(password) + .SetDefaultDatabase(database)); + + // Execute query to get schema + client.Select(query, [&](const clickhouse::Block& block) { + for (size_t i = 0; i < block.GetColumnCount(); i++) { + auto column = block[i]; + return_types.push_back(ConvertClickHouseType(column)); + names.push_back(block.GetColumnName(i)); + } + }); + + result->types = return_types; + result->names = names; + + return std::move(result); + } catch (const std::exception& e) { + throw IOException("ClickHouse error during bind: " + string(e.what())); + } +} + +void RegisterClickHouseScanFunction(DatabaseInstance &instance) { + TableFunction clickhouse_scan("clickhouse_scan", {LogicalType::VARCHAR}, ClickHouseScanFunction, ClickHouseScanBind); + ExtensionUtil::RegisterFunction(instance, clickhouse_scan); +} + +} // namespace duckdb diff --git a/chsql/src/include/clickhouse_scan.hpp b/chsql/src/include/clickhouse_scan.hpp new file mode 100644 index 0000000..b4d90ec --- /dev/null +++ b/chsql/src/include/clickhouse_scan.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include "duckdb.hpp" +#include "duckdb/function/table_function.hpp" +#include "duckdb/common/types/data_chunk.hpp" +#include "duckdb/main/client_context.hpp" + +namespace duckdb { + +void RegisterClickHouseScanFunction(DatabaseInstance &instance); + +} // namespace duckdb diff --git a/chsql/vcpkg.json b/chsql/vcpkg.json index 85936bf..5e85d4d 100644 --- a/chsql/vcpkg.json +++ b/chsql/vcpkg.json @@ -1,5 +1,9 @@ { "dependencies": [ - "openssl" + "openssl", + { + "name": "clickhouse-cpp", + "platform": "linux, osx" + } ] -} \ No newline at end of file +} diff --git a/contribs/clickhouse-cpp b/contribs/clickhouse-cpp new file mode 160000 index 0000000..2a49a25 --- /dev/null +++ b/contribs/clickhouse-cpp @@ -0,0 +1 @@ +Subproject commit 2a49a25b573b1194f621070711440ea125577f50