chore

huggingface · mishig25 · Apr 10, 2024 · Apr 9, 2024 · Apr 10, 2024 · Apr 10, 2024
commit 7e83f0bbd202f6bea6ab6e692c577f5d570f89ad
diff --git a/packages/gguf/src/quant_descriptions.ts b/packages/gguf/src/quant_descriptions.ts
@@ -13,7 +13,7 @@ export const QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, string> = {
 	[GGMLQuantizationType.Q3_K]: `3-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Block scales (d) is quantized with 6 bits, resulting. 3.4375 bits-per-weight. Weights are obtained by w = d * q.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
 	[GGMLQuantizationType.Q4_K]: `4-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Block scales (d) & mins (m) are quantized with 6 bits, resulting. 4.5 bits-per-weight are obtained by w = d * q + m.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
 	[GGMLQuantizationType.Q5_K]: `5-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Block scales (d) & mins (m) are quantized with 6 bits, resulting in 5.5 bits-per-weight. Weights are obtained by w = d * q + m.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
-	[GGMLQuantizationType.Q6_K]: `6-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Scales (d) are quantized with 8 bits, resulting in 6.5625 bits-per-weight. Weights are obtained by w = d * q.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
+	[GGMLQuantizationType.Q6_K]: `6-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Block scales (d) is quantized with 8 bits, resulting in 6.5625 bits-per-weight. Weights are obtained by w = d * q.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
 	[GGMLQuantizationType.Q8_K]: `8-bit quantization (q). Only used for quantizing intermediate results. The difference to the existing Q8_0 is that the block size is 256. All 2-6 bit dot products are implemented for this quantization type. Weights are obtained by w = d * q.`, // src: https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305
 	[GGMLQuantizationType.IQ2_XXS]: "", // todo: add description
 	[GGMLQuantizationType.IQ2_XS]: "", // todo: add description