sd-ggml / stable-diffusion.cpp /ggml /examples /python /example_test_all_quants.py

Upload folder using huggingface_hub

9fcf2b6 over 2 years ago

1.95 kB

	from ggml import ffi, lib
	from ggml.utils import init, numpy, copy
	import numpy as np
	from math import pi, cos, sin, ceil

	import matplotlib.pyplot as plt

	ctx = init(mem_size=10010241024) # Will be auto-GC'd
	n = 256

	orig = np.array([
	[
	cos(j * 2 * pi / n) * (sin(i * 2 * pi / n))
	for j in range(n)
	]
	for i in range(n)
	], np.float32)
	orig_tensor = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F32, n, n)
	copy(orig, orig_tensor)

	quants = [
	type for type in range(lib.GGML_TYPE_COUNT)
	if lib.ggml_is_quantized(type) and
	type not in [lib.GGML_TYPE_Q8_1, lib.GGML_TYPE_Q8_K] # Apparently not supported
	]
	# quants = [lib.GGML_TYPE_Q2_K] # Test a single one

	def get_name(type):
	name = lib.ggml_type_name(type)
	return ffi.string(name).decode('utf-8') if name else '?'

	quants.sort(key=get_name)
	quants.insert(0, None)
	print(quants)

	ncols=4
	nrows = ceil(len(quants) / ncols)

	plt.figure(figsize=(ncols * 5, nrows * 5), layout='tight')

	for i, type in enumerate(quants):
	plt.subplot(nrows, ncols, i + 1)
	try:
	if type == None:
	plt.title('Original')
	plt.imshow(orig)
	else:
	quantized_tensor = lib.ggml_new_tensor_2d(ctx, type, n, n)
	copy(orig_tensor, quantized_tensor)
	quantized = numpy(quantized_tensor, allow_copy=True)
	d = quantized - orig
	results = {
	"l2": np.linalg.norm(d, 2),
	"linf": np.linalg.norm(d, np.inf),
	"compression":
	round(lib.ggml_nbytes(orig_tensor) /
	lib.ggml_nbytes(quantized_tensor), 1)
	}
	name = get_name(type)
	print(f'{name}: {results}')

	plt.title(f'{name} ({results["compression"]}x smaller)')
	plt.imshow(quantized, interpolation='nearest')

	except Exception as e:
	print(f'Error: {e}')

	plt.show()