-
Notifications
You must be signed in to change notification settings - Fork 224
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
95 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,40 +1,98 @@ | ||
import { GGMLQuantizationType } from "./types"; | ||
|
||
export const QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, string> = { | ||
[GGMLQuantizationType.F32]: "32-bit standard IEEE 754 single-precision floating-point number.", // src: https://en.wikipedia.org/wiki/Single-precision_floating-point_format | ||
[GGMLQuantizationType.F16]: "16-bit standard IEEE 754 half-precision floating-point number.", // src: https://en.wikipedia.org/wiki/Half-precision_floating-point_format | ||
[GGMLQuantizationType.Q4_0]: | ||
"4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", // src: https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249 | ||
[GGMLQuantizationType.Q4_1]: | ||
"4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", // src: https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290 | ||
[GGMLQuantizationType.Q5_0]: | ||
"5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", // src: https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249 | ||
[GGMLQuantizationType.Q5_1]: | ||
"5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", // src: https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290 | ||
[GGMLQuantizationType.Q8_0]: | ||
"8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", // src: https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249 | ||
[GGMLQuantizationType.Q8_1]: | ||
"8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", // src: https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290 | ||
[GGMLQuantizationType.Q2_K]: `2-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weight. Weight formula: w = q * block_scale(4-bit) + block_min(4-bit), resulting in 2.5625 bits-per-weight.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305 | ||
[GGMLQuantizationType.Q3_K]: `3-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(6-bit), resulting. 3.4375 bits-per-weight`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305 | ||
[GGMLQuantizationType.Q4_K]: `4-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 4.5 bits-per-weight.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305 | ||
[GGMLQuantizationType.Q5_K]: `5-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 5.5 bits-per-weight.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305 | ||
[GGMLQuantizationType.Q6_K]: `6-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(8-bit), resulting in 6.5625 bits-per-weight.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305 | ||
[GGMLQuantizationType.Q8_K]: `8-bit quantization (q). Each block has 256 weights. Only used for quantizing intermediate results. All 2-6 bit dot products are implemented for this quantization type. Weight formula: w = q * block_scale.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305 | ||
[GGMLQuantizationType.IQ2_XXS]: | ||
"2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.06 bits-per-weight.", // src: https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70 | ||
[GGMLQuantizationType.IQ2_XS]: | ||
"2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.31 bits-per-weight.", // src: https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70 | ||
[GGMLQuantizationType.IQ3_XXS]: | ||
"3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.06 bits-per-weight.", // src: https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70 | ||
[GGMLQuantizationType.IQ1_S]: | ||
"1-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 1.56 bits-per-weight.", // src: https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70 | ||
[GGMLQuantizationType.IQ4_NL]: | ||
"4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix", | ||
[GGMLQuantizationType.IQ3_S]: | ||
"3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.44 bits-per-weight.", // src: https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70 | ||
[GGMLQuantizationType.IQ2_S]: | ||
"2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.5 bits-per-weight.", // src: https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70 | ||
[GGMLQuantizationType.IQ4_XS]: | ||
"4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 4.25 bits-per-weight.", // src: https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70 | ||
export const QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, { txt: string; src_url?: string }> = { | ||
[GGMLQuantizationType.F32]: { | ||
txt: "32-bit standard IEEE 754 single-precision floating-point number.", | ||
src_url: "https://en.wikipedia.org/wiki/Single-precision_floating-point_format", | ||
}, | ||
[GGMLQuantizationType.F16]: { | ||
txt: "16-bit standard IEEE 754 half-precision floating-point number.", | ||
src_url: "https://en.wikipedia.org/wiki/Half-precision_floating-point_format", | ||
}, | ||
[GGMLQuantizationType.Q4_0]: { | ||
txt: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", | ||
}, | ||
[GGMLQuantizationType.Q4_1]: { | ||
txt: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", | ||
}, | ||
[GGMLQuantizationType.Q5_0]: { | ||
txt: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", | ||
}, | ||
[GGMLQuantizationType.Q5_1]: { | ||
txt: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", | ||
}, | ||
[GGMLQuantizationType.Q8_0]: { | ||
txt: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", | ||
}, | ||
[GGMLQuantizationType.Q8_1]: { | ||
txt: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", | ||
}, | ||
[GGMLQuantizationType.Q2_K]: { | ||
txt: `2-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weight. Weight formula: w = q * block_scale(4-bit) + block_min(4-bit), resulting in 2.5625 bits-per-weight.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q3_K]: { | ||
txt: `3-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(6-bit), resulting. 3.4375 bits-per-weight`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q4_K]: { | ||
txt: `4-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 4.5 bits-per-weight.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q5_K]: { | ||
txt: `5-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 5.5 bits-per-weight.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q6_K]: { | ||
txt: `6-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(8-bit), resulting in 6.5625 bits-per-weight.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q8_K]: { | ||
txt: `8-bit quantization (q). Each block has 256 weights. Only used for quantizing intermediate results. All 2-6 bit dot products are implemented for this quantization type. Weight formula: w = q * block_scale.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.IQ2_XXS]: { | ||
txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.06 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ2_XS]: { | ||
txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.31 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ3_XXS]: { | ||
txt: "3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.06 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ1_S]: { | ||
txt: "1-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 1.56 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ4_NL]: { | ||
txt: "4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix", | ||
}, | ||
[GGMLQuantizationType.IQ3_S]: { | ||
txt: "3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.44 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ2_S]: { | ||
txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.5 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ4_XS]: { | ||
txt: "4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 4.25 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
}; |