|
@@ -359,7 +359,7 @@ def train(
|
|
|
# Without max_memory, device_map="auto" is too conservative and
|
|
# Without max_memory, device_map="auto" is too conservative and
|
|
|
# offloads layers to CPU where bitsandbytes 4-bit cannot run.
|
|
# offloads layers to CPU where bitsandbytes 4-bit cannot run.
|
|
|
if torch.cuda.is_available():
|
|
if torch.cuda.is_available():
|
|
|
- gpu_mem = torch.cuda.get_device_properties(0).total_mem / (1024**3)
|
|
|
|
|
|
|
+ gpu_mem = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
|
|
max_memory = {0: f"{int(gpu_mem * 0.92)}GiB", "cpu": "30GiB"}
|
|
max_memory = {0: f"{int(gpu_mem * 0.92)}GiB", "cpu": "30GiB"}
|
|
|
logger.info(f"GPU memory: {gpu_mem:.1f} GiB, max_memory: {max_memory}")
|
|
logger.info(f"GPU memory: {gpu_mem:.1f} GiB, max_memory: {max_memory}")
|
|
|
else:
|
|
else:
|