mirror of
https://github.com/lllyasviel/stable-diffusion-webui-forge.git
synced 2025-12-27 21:26:07 +00:00
Add BF16 to GGUF (#2877)
This commit is contained in:
parent
0ced1d0cd0
commit
17a42e5877
@ -13,6 +13,7 @@ quants_mapping = {
|
||||
gguf.GGMLQuantizationType.Q5_K: gguf.Q5_K,
|
||||
gguf.GGMLQuantizationType.Q6_K: gguf.Q6_K,
|
||||
gguf.GGMLQuantizationType.Q8_0: gguf.Q8_0,
|
||||
gguf.GGMLQuantizationType.BF16: gguf.BF16,
|
||||
}
|
||||
|
||||
|
||||
|
||||
3
packages_3rdparty/gguf/quants.py
vendored
3
packages_3rdparty/gguf/quants.py
vendored
@ -268,6 +268,9 @@ class BF16(__Quant, qtype=GGMLQuantizationType.BF16):
|
||||
def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
|
||||
return (blocks.view(np.int16).astype(np.int32) << 16).view(np.float32)
|
||||
|
||||
@classmethod
|
||||
def dequantize_blocks_pytorch(cls, blocks, block_size, type_size, parameter) -> torch.Tensor:
|
||||
return (blocks.view(torch.int16).to(torch.int32) << 16).view(torch.float32)
|
||||
|
||||
class Q4_0(__Quant, qtype=GGMLQuantizationType.Q4_0):
|
||||
@classmethod
|
||||
|
||||
Loading…
Reference in New Issue
Block a user