From 4a0c6d0c978653ef69afd31b2218d6e24b22ebbe Mon Sep 17 00:00:00 2001 From: Michael Simacek Date: Tue, 27 May 2025 16:49:04 +0200 Subject: [PATCH 1/2] Disable numpy resize refcheck in svmlight format --- sklearn/datasets/_svmlight_format_fast.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sklearn/datasets/_svmlight_format_fast.pyx b/sklearn/datasets/_svmlight_format_fast.pyx index 76a595407c11b..44d2dd76288b3 100644 --- a/sklearn/datasets/_svmlight_format_fast.pyx +++ b/sklearn/datasets/_svmlight_format_fast.pyx @@ -78,7 +78,9 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based, if n_features and features[0].startswith(qid_prefix): _, value = features[0].split(COLON, 1) if query_id: - query.resize(len(query) + 1) + # Disable refcheck because it doesn't work on all Python + # implementations. We know we are the sole owner at this point + query.resize(len(query) + 1, refcheck=False) query[len(query) - 1] = np.int64(value) features.pop(0) n_features -= 1 From e26bf58e837bc01f7b81de3f1957f86f8b350f76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20=C5=A0im=C3=A1=C4=8Dek?= Date: Thu, 3 Jul 2025 16:18:59 +0200 Subject: [PATCH 2/2] Use append instead of resize in svmlight format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jérémie du Boisberranger --- sklearn/datasets/_svmlight_format_fast.pyx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/sklearn/datasets/_svmlight_format_fast.pyx b/sklearn/datasets/_svmlight_format_fast.pyx index 44d2dd76288b3..0cc442495d815 100644 --- a/sklearn/datasets/_svmlight_format_fast.pyx +++ b/sklearn/datasets/_svmlight_format_fast.pyx @@ -78,10 +78,7 @@ def _load_svmlight_file(f, dtype, bint multilabel, bint zero_based, if n_features and features[0].startswith(qid_prefix): _, value = features[0].split(COLON, 1) if query_id: - # Disable refcheck because it doesn't work on all Python - # implementations. We know we are the sole owner at this point - query.resize(len(query) + 1, refcheck=False) - query[len(query) - 1] = np.int64(value) + query = np.append(query, np.int64(value)) features.pop(0) n_features -= 1