[safetensors] parameters count based on quantization config

mishig25 · mishig25 · commit e126515f4adc · 2025-08-06T16:41:39.000+08:00
diff --git a/packages/hub/src/lib/parse-safetensors-metadata.spec.ts b/packages/hub/src/lib/parse-safetensors-metadata.spec.ts
@@ -142,4 +142,139 @@ describe("parseSafetensorsMetadata", () => {
 		assert.strictEqual(safetensorsShardFileInfo?.shard, "00005");
 		assert.strictEqual(safetensorsShardFileInfo?.total, "00072");
 	});
+
+	it("should support sub-byte data types", async () => {
+		const newDataTypes: Array<"F4" | "F6_E2M3" | "F6_E3M2" | "E8M0"> = ["F4", "F6_E2M3", "F6_E3M2", "E8M0"];
+
+		for (const dtype of newDataTypes) {
+			const tensorInfo = {
+				dtype,
+				shape: [1, 2],
+				data_offsets: [0, 1] as [number, number],
+			};
+
+			assert.ok(typeof tensorInfo.dtype === "string");
+			assert.ok(["F4", "F6_E2M3", "F6_E3M2", "E8M0"].includes(tensorInfo.dtype));
+		}
+	});
+
+	it("should handle parameter counting with sub-byte data types", () => {
+		const mockHeader = {
+			tensor_f4: {
+				dtype: "F4" as const,
+				shape: [10, 20],
+				data_offsets: [0, 100] as [number, number],
+			},
+			tensor_f6_e2m3: {
+				dtype: "F6_E2M3" as const,
+				shape: [5, 10],
+				data_offsets: [100, 150] as [number, number],
+			},
+			tensor_f6_e3m2: {
+				dtype: "F6_E3M2" as const,
+				shape: [8, 12],
+				data_offsets: [150, 246] as [number, number],
+			},
+			tensor_e8m0: {
+				dtype: "E8M0" as const,
+				shape: [4, 6],
+				data_offsets: [246, 270] as [number, number],
+			},
+			__metadata__: { format: "pt" },
+		};
+
+		const computeNumOfParamsByDtypeSingleFile = (header: typeof mockHeader) => {
+			const counter: Partial<Record<string, number>> = {};
+			const tensors = Object.fromEntries(Object.entries(header).filter(([key]) => key !== "__metadata__"));
+
+			for (const [, v] of Object.entries(tensors) as [
+				string,
+				{ dtype: string; shape: number[]; data_offsets: [number, number] },
+			][]) {
+				if (v.shape.length === 0) {
+					continue;
+				}
+				counter[v.dtype] = (counter[v.dtype] ?? 0) + v.shape.reduce((a: number, b: number) => a * b);
+			}
+			return counter;
+		};
+
+		const parameterCount = computeNumOfParamsByDtypeSingleFile(mockHeader);
+
+		assert.strictEqual(parameterCount.F4, 200);
+		assert.strictEqual(parameterCount.F6_E2M3, 50);
+		assert.strictEqual(parameterCount.F6_E3M2, 96);
+		assert.strictEqual(parameterCount.E8M0, 24);
+	});
+
+	it("fetch info for openai/gpt-oss-20b (large sharded model)", async () => {
+		const parse = await parseSafetensorsMetadata({
+			repo: "openai/gpt-oss-20b",
+			computeParametersCount: true,
+			revision: "bbf09307421df45099c1e7dcbd64e3106ce5b403",
+		});
+
+		assert(parse.sharded);
+
+		assert.ok(Object.keys(parse.headers).length > 1);
+		assert.ok(parse.parameterCount);
+
+		const totalParams = parse.parameterTotal || sum(Object.values(parse.parameterCount));
+
+		assert.strictEqual(totalParams, 21_511_953_984); // 21.5B
+
+		assert.ok(parse.parameterCount.BF16 && parse.parameterCount.U8);
+
+		assert.strictEqual(Object.keys(parse.headers).length, 3);
+	});
+
+	it("should support FP4 and UE8 data types in type system", () => {
+		const newDataTypes: Array<"FP4" | "UE8"> = ["FP4", "UE8"];
+
+		for (const dtype of newDataTypes) {
+			const tensorInfo = {
+				dtype,
+				shape: [1, 2],
+				data_offsets: [0, 1] as [number, number],
+			};
+
+			assert.ok(typeof tensorInfo.dtype === "string");
+			assert.ok(["FP4", "UE8"].includes(tensorInfo.dtype));
+		}
+
+		const mockHeader = {
+			tensor_fp4: {
+				dtype: "FP4" as const,
+				shape: [100, 200],
+				data_offsets: [0, 5000] as [number, number],
+			},
+			tensor_ue8: {
+				dtype: "UE8" as const,
+				shape: [50, 100],
+				data_offsets: [5000, 10000] as [number, number],
+			},
+			__metadata__: { format: "pt" },
+		};
+
+		const computeNumOfParamsByDtypeSingleFile = (header: typeof mockHeader) => {
+			const counter: Partial<Record<string, number>> = {};
+			const tensors = Object.fromEntries(Object.entries(header).filter(([key]) => key !== "__metadata__"));
+
+			for (const [, v] of Object.entries(tensors) as [
+				string,
+				{ dtype: string; shape: number[]; data_offsets: [number, number] },
+			][]) {
+				if (v.shape.length === 0) {
+					continue;
+				}
+				counter[v.dtype] = (counter[v.dtype] ?? 0) + v.shape.reduce((a: number, b: number) => a * b);
+			}
+			return counter;
+		};
+
+		const parameterCount = computeNumOfParamsByDtypeSingleFile(mockHeader);
+
+		assert.strictEqual(parameterCount.FP4, 20000);
+		assert.strictEqual(parameterCount.UE8, 5000);
+	});
 });
diff --git a/packages/hub/src/lib/parse-safetensors-metadata.ts b/packages/hub/src/lib/parse-safetensors-metadata.ts
@@ -48,13 +48,19 @@ export type Dtype =
 	| "F16"
 	| "F8_E4M3"
 	| "F8_E5M2"
+	| "E8M0"
+	| "F6_E3M2"
+	| "F6_E2M3"
+	| "F4"
+	| "FP4"
 	| "BF16"
 	| "I64"
 	| "I32"
 	| "I16"
 	| "I8"
 	| "U16"
 	| "U8"
+	| "UE8"
 	| "BOOL";
 
 export interface TensorInfo {
@@ -92,6 +98,35 @@ export type SafetensorsParseFromRepo =
 			parameterTotal?: number;
 	  };
 
+/**
+ * Fetches and parses model config.json
+ */
+async function fetchModelConfig(
+	params: {
+		repo: RepoDesignation;
+		revision?: string;
+		hubUrl?: string;
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<ModelConfig | null> {
+	try {
+		const configBlob = await downloadFile({
+			...params,
+			path: "config.json",
+		});
+
+		if (!configBlob) {
+			return null;
+		}
+
+		const config = JSON.parse(await configBlob.text());
+		return config as ModelConfig;
+	} catch (error) {
+		// Config file might not exist or be inaccessible
+		return null;
+	}
+}
+
 async function parseSingleFile(
 	path: string,
 	params: {
@@ -252,6 +287,10 @@ export async function parseSafetensorsMetadata(
 		throw new TypeError("Only model repos should contain safetensors files.");
 	}
 
+	// Fetch model config for quantization information
+	const modelConfig = params.computeParametersCount ? await fetchModelConfig(params) : null;
+	const quantConfig = modelConfig?.quantization_config;
+
 	if (
 		(params.path && RE_SAFETENSORS_FILE.test(params.path)) ||
 		(await fileExists({ ...params, path: SAFETENSORS_FILE }))
@@ -262,17 +301,17 @@ export async function parseSafetensorsMetadata(
 			header,
 			...(params.computeParametersCount
 				? {
-						parameterCount: computeNumOfParamsByDtypeSingleFile(header),
+						parameterCount: computeNumOfParamsByDtypeSingleFile(header, quantConfig),
 						parameterTotal:
 							/// shortcut: get param count directly from metadata
 							header.__metadata__.total_parameters
 								? typeof header.__metadata__.total_parameters === "number"
 									? header.__metadata__.total_parameters
 									: typeof header.__metadata__.total_parameters === "string"
-									  ? parseInt(header.__metadata__.total_parameters)
-									  : undefined
+										? parseInt(header.__metadata__.total_parameters)
+										: undefined
 								: undefined,
-				  }
+					}
 				: undefined),
 		};
 	} else if (
@@ -289,41 +328,126 @@ export async function parseSafetensorsMetadata(
 			headers: shardedMap,
 			...(params.computeParametersCount
 				? {
-						parameterCount: computeNumOfParamsByDtypeSharded(shardedMap),
+						parameterCount: computeNumOfParamsByDtypeSharded(shardedMap, quantConfig),
 						parameterTotal:
 							/// shortcut: get param count directly from metadata
 							index.metadata?.total_parameters
 								? typeof index.metadata.total_parameters === "number"
 									? index.metadata.total_parameters
 									: typeof index.metadata.total_parameters === "string"
-									  ? parseInt(index.metadata.total_parameters)
-									  : undefined
+										? parseInt(index.metadata.total_parameters)
+										: undefined
 								: undefined,
-				  }
+					}
 				: undefined),
 		};
 	} else {
 		throw new Error("model id does not seem to contain safetensors weights");
 	}
 }
 
-function computeNumOfParamsByDtypeSingleFile(header: SafetensorsFileHeader): Partial<Record<Dtype, number>> {
+export interface QuantizationConfig {
+	quant_method?: string;
+	modules_to_not_convert?: string[];
+	bits?: number;
+	load_in_4bit?: boolean;
+	load_in_8bit?: boolean;
+}
+
+export interface ModelConfig {
+	quantization_config?: QuantizationConfig;
+}
+
+/**
+ * Determines if a tensor is quantized based on quantization config and tensor name
+ */
+function isQuantizedTensor(tensorName: string, quantConfig?: QuantizationConfig): boolean {
+	if (!quantConfig || !quantConfig.modules_to_not_convert) {
+		return false;
+	}
+
+	for (const pattern of quantConfig.modules_to_not_convert) {
+		const regexPattern = pattern.replace(/\*/g, ".*");
+		const regex = new RegExp(regexPattern);
+		if (regex.test(tensorName)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/**
+ * Gets the parameter multiplier for a quantized tensor based on quantization method
+ */
+function getQuantizationMultiplier(tensorName: string, dtype: Dtype, quantConfig?: QuantizationConfig): number {
+	if (!quantConfig || !isQuantizedTensor(tensorName, quantConfig)) {
+		return 1;
+	}
+
+	switch (quantConfig.quant_method) {
+		case "mxfp4":
+			if (dtype === "U8" && tensorName.includes("_blocks")) {
+				return 2;
+			}
+			return 1;
+
+		case "gptq":
+		case "awq":
+			if (quantConfig.bits === 4 && dtype === "U8") {
+				return 2;
+			}
+			if (quantConfig.bits === 2 && dtype === "U8") {
+				return 4;
+			}
+			return 1;
+
+		case "bitsandbytes":
+			if (quantConfig.load_in_4bit && dtype === "U8") {
+				return 2;
+			}
+			if (quantConfig.load_in_8bit) {
+				return 1;
+			}
+			return 1;
+
+		default:
+			if (quantConfig.load_in_4bit && dtype === "U8") {
+				return 2;
+			}
+			if (quantConfig.bits === 4 && dtype === "U8") {
+				return 2;
+			}
+			return 1;
+	}
+}
+
+function computeNumOfParamsByDtypeSingleFile(
+	header: SafetensorsFileHeader,
+	quantConfig?: QuantizationConfig
+): Partial<Record<Dtype, number>> {
 	const counter: Partial<Record<Dtype, number>> = {};
 	const tensors = omit(header, "__metadata__");
 
-	for (const [, v] of typedEntries(tensors)) {
+	for (const [tensorName, v] of typedEntries(tensors)) {
 		if (v.shape.length === 0) {
 			continue;
 		}
-		counter[v.dtype] = (counter[v.dtype] ?? 0) + v.shape.reduce((a, b) => a * b);
+
+		const elements = v.shape.reduce((a, b) => a * b);
+		const multiplier = quantConfig ? getQuantizationMultiplier(tensorName, v.dtype, quantConfig) : 1;
+		counter[v.dtype] = (counter[v.dtype] ?? 0) + elements * multiplier;
 	}
 	return counter;
 }
 
-function computeNumOfParamsByDtypeSharded(shardedMap: SafetensorsShardedHeaders): Partial<Record<Dtype, number>> {
+function computeNumOfParamsByDtypeSharded(
+	shardedMap: SafetensorsShardedHeaders,
+	quantConfig?: QuantizationConfig
+): Partial<Record<Dtype, number>> {
 	const counter: Partial<Record<Dtype, number>> = {};
 	for (const header of Object.values(shardedMap)) {
-		for (const [k, v] of typedEntries(computeNumOfParamsByDtypeSingleFile(header))) {
+		for (const [k, v] of typedEntries(computeNumOfParamsByDtypeSingleFile(header, quantConfig))) {
 			counter[k] = (counter[k] ?? 0) + (v ?? 0);
 		}
 	}