From 5a508b02a9f07715412afd1702380277287d82f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Tue, 8 Jul 2025 21:38:38 +0200 Subject: [PATCH 1/2] improve f64 support (for convert mostly) --- model.cpp | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/model.cpp b/model.cpp index 559c876c..27aeefa6 100644 --- a/model.cpp +++ b/model.cpp @@ -835,6 +835,12 @@ void convert_tensor(void* src, } else if (src_type == GGML_TYPE_F32) { if (dst_type == GGML_TYPE_F16) { ggml_fp32_to_fp16_row((float*)src, (ggml_fp16_t*)dst, n); + } else if (dst_type == GGML_TYPE_F64) { + double* ddst = (double*)dst; + float* fsrc = (float*)src; + for (int64_t i = 0; i < n; i++) { + ddst[i] = (double)(fsrc[i]); + } } else { std::vector imatrix(n_per_row, 1.0f); // dummy importance matrix const float* im = imatrix.data(); @@ -843,6 +849,41 @@ void convert_tensor(void* src, } else if (dst_type == GGML_TYPE_F32) { if (src_type == GGML_TYPE_F16) { ggml_fp16_to_fp32_row((ggml_fp16_t*)src, (float*)dst, n); + } else if (src_type == GGML_TYPE_F64) { + float* fdst = (float*)dst; + double* dsrc = (double*)src; + for (int64_t i = 0; i < n; i++) { + fdst[i] = (float)(dsrc[i]); + } + } else { + auto qtype = ggml_get_type_traits(src_type); + if (qtype->to_float == NULL) { + throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", + ggml_type_name(src_type))); + } + qtype->to_float(src, (float*)dst, n); + } + } else if (src_type == GGML_TYPE_F64) { + if (dst_type == GGML_TYPE_F16) { + // ggml_fp32_to_fp16_row((float*)src, (ggml_fp16_t*)dst, n); + ggml_fp16_t* fdst = (ggml_fp16_t*)dst; + double* dsrc = (double*)src; + for (int64_t i = 0; i < n; i++) { + fdst[i] = ggml_fp32_to_fp16((float)dsrc[i]); + } + } else { + std::vector imatrix(n_per_row, 1.0f); // dummy importance matrix + const float* im = imatrix.data(); + ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, im); + } + } else if (dst_type == GGML_TYPE_F64) { + if (src_type == GGML_TYPE_F16) { + // ggml_fp16_to_fp32_row((ggml_fp16_t*)src, (float*)dst, n); + double* ddst = (double*)dst; + ggml_fp16_t* fsrc = (ggml_fp16_t*)src; + for (int64_t i = 0; i < n; i++) { + ddst[i] = (double)ggml_fp16_to_fp32(fsrc[i]); + } } else { auto qtype = ggml_get_type_traits(src_type); if (qtype->to_float == NULL) { From 0aa6ca7f1089be1d51a83291f472f345c882d4c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20du=20Hamel?= Date: Tue, 8 Jul 2025 23:55:59 +0200 Subject: [PATCH 2/2] f64<->quant --- model.cpp | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/model.cpp b/model.cpp index 27aeefa6..70f05a9a 100644 --- a/model.cpp +++ b/model.cpp @@ -874,7 +874,13 @@ void convert_tensor(void* src, } else { std::vector imatrix(n_per_row, 1.0f); // dummy importance matrix const float* im = imatrix.data(); - ggml_quantize_chunk(dst_type, (float*)src, dst, 0, nrows, n_per_row, im); + float* fsrc = (float*)malloc(n * sizeof(float)); + double* dsrc = (double*)src; + for (int64_t i = 0; i < n; i++) { + fsrc[i] = (float)(dsrc[i]); + } + ggml_quantize_chunk(dst_type, fsrc, dst, 0, nrows, n_per_row, im); + free(fsrc); } } else if (dst_type == GGML_TYPE_F64) { if (src_type == GGML_TYPE_F16) { @@ -890,7 +896,13 @@ void convert_tensor(void* src, throw std::runtime_error(format("type %s unsupported for integer quantization: no dequantization available", ggml_type_name(src_type))); } - qtype->to_float(src, (float*)dst, n); + float* fdst = (float*)malloc(n * sizeof(float)); + qtype->to_float(src, fdst, n); + double* ddst = (double*)dst; + for (int64_t i = 0; i < n; i++) { + ddst[i] = (double)(fdst[i]); + } + free(fdst); } } else { // src_type == GGML_TYPE_F16 => dst_type is quantized