update 30b req

2025-12-28 05:35:00 +00:00 · 2025-12-08 15:09:39 -08:00 · 2025-12-08 15:09:39 -08:00 · 1c1aacb42e
commit 1c1aacb42e
parent 949b38564a
1 changed files with 7 additions and 1 deletions
--- a/vlm.py
+++ b/vlm.py
@ -226,9 +226,15 @@ class VLMManager:
                    from transformers import BitsAndBytesConfig
                    load_kwargs["quantization_config"] = BitsAndBytesConfig(
                        load_in_8bit=True,
+                        llm_int8_enable_fp32_cpu_offload=True,
                    )
                    load_kwargs["device_map"] = "auto"
-                    print("Using 8-bit quantization")
+                    # Use disk offloading for very large models during quantization
+                    offload_dir = Path(tempfile.gettempdir()) / "vlm_offload"
+                    offload_dir.mkdir(exist_ok=True)
+                    load_kwargs["offload_folder"] = str(offload_dir)
+                    load_kwargs["offload_state_dict"] = True
+                    print(f"Using 8-bit quantization (offloading to {offload_dir})")
                except ImportError:
                    print("Warning: bitsandbytes not installed, falling back to bfloat16")
                    load_kwargs["torch_dtype"] = torch.bfloat16