Skip to content

Commit 33e21ba

Browse files
Fix: Unify nn.Module device placement across all quantizers and base class
This commit addresses multiple instances where torch.nn.Module objects (including nn.Linear and PreTrainedModel) were incorrectly passed to the `move_to_device` function, which is designed for tensors. This could lead to AttributeError exceptions. The following changes were made: - In AWQQuantizer, GPTQQuantizer, and GGUFQuantizer, `nn.Linear` layer instances are now moved to the target device using `layer.to(device)`. - In BaseQuantizer, `PreTrainedModel` instances are now moved to the primary device using `model.to(device)`. These changes ensure that all nn.Module device placements utilize the standard `.to()` method, preventing errors and ensuring correct and consistent behavior across the library. This supersedes previous individual fixes for QuantizedLinear by addressing the issue at all identified points for various module types.
1 parent 5b434ed commit 33e21ba

File tree

4 files changed

+4
-4
lines changed

4 files changed

+4
-4
lines changed

quantllm/quant/awq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def _quantize_layer(
195195
quantized = quantized.to(target_device)
196196

197197
# Ensure layer parameters are on the target_device for computation
198-
layer = move_to_device(layer, target_device)
198+
layer = layer.to(target_device)
199199

200200
# Copy bias if exists, ensuring it's on the target device
201201
if layer.bias is not None:

quantllm/quant/gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def _quantize_layer(
203203
"""Quantize a single layer to GGUF format with memory-efficient processing."""
204204
target_device = torch.device('cpu') if self.cpu_offload else self.device_manager.primary_device
205205

206-
layer = move_to_device(layer, target_device)
206+
layer = layer.to(target_device)
207207

208208
# Initialize quantized layer and move to target_device
209209
quantized = QuantizedLinear(

quantllm/quant/gptq.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def _quantize_layer(self, layer: nn.Linear, H: torch.Tensor) -> QuantizedLinear:
187187
# Ensure tensors are on the correct device
188188
H = move_to_device(H, target_device)
189189
# Original layer's weights should be moved to target_device before processing
190-
layer = move_to_device(layer, target_device)
190+
layer = layer.to(target_device)
191191
W = layer.weight.data # W is now on target_device
192192

193193
# Initialize quantized layer

quantllm/quant/quantization_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,7 @@ def _prepare_model_instance(self, model_to_prepare: PreTrainedModel, make_copy:
521521
prepared_model.eval()
522522
if self.device_manager.primary_device is not None:
523523
self.logger.log_info(f"Moving model to device: {self.device_manager.primary_device}")
524-
prepared_model = move_to_device(prepared_model, self.device_manager.primary_device)
524+
prepared_model = prepared_model.to(self.device_manager.primary_device)
525525

526526
self.logger.log_info("Model preparation (copy, eval, device move) completed successfully.")
527527
return prepared_model

0 commit comments

Comments
 (0)