| from ggml import ffi, lib |
| from ggml.utils import init, numpy, copy |
| import numpy as np |
| from math import pi, cos, sin, ceil |
|
|
| import matplotlib.pyplot as plt |
|
|
| ctx = init(mem_size=100*1024*1024) |
| n = 256 |
|
|
| orig = np.array([ |
| [ |
| cos(j * 2 * pi / n) * (sin(i * 2 * pi / n)) |
| for j in range(n) |
| ] |
| for i in range(n) |
| ], np.float32) |
| orig_tensor = lib.ggml_new_tensor_2d(ctx, lib.GGML_TYPE_F32, n, n) |
| copy(orig, orig_tensor) |
|
|
| quants = [ |
| type for type in range(lib.GGML_TYPE_COUNT) |
| if lib.ggml_is_quantized(type) and |
| type not in [lib.GGML_TYPE_Q8_1, lib.GGML_TYPE_Q8_K] |
| ] |
| |
|
|
| def get_name(type): |
| name = lib.ggml_type_name(type) |
| return ffi.string(name).decode('utf-8') if name else '?' |
|
|
| quants.sort(key=get_name) |
| quants.insert(0, None) |
| print(quants) |
|
|
| ncols=4 |
| nrows = ceil(len(quants) / ncols) |
|
|
| plt.figure(figsize=(ncols * 5, nrows * 5), layout='tight') |
|
|
| for i, type in enumerate(quants): |
| plt.subplot(nrows, ncols, i + 1) |
| try: |
| if type == None: |
| plt.title('Original') |
| plt.imshow(orig) |
| else: |
| quantized_tensor = lib.ggml_new_tensor_2d(ctx, type, n, n) |
| copy(orig_tensor, quantized_tensor) |
| quantized = numpy(quantized_tensor, allow_copy=True) |
| d = quantized - orig |
| results = { |
| "l2": np.linalg.norm(d, 2), |
| "linf": np.linalg.norm(d, np.inf), |
| "compression": |
| round(lib.ggml_nbytes(orig_tensor) / |
| lib.ggml_nbytes(quantized_tensor), 1) |
| } |
| name = get_name(type) |
| print(f'{name}: {results}') |
|
|
| plt.title(f'{name} ({results["compression"]}x smaller)') |
| plt.imshow(quantized, interpolation='nearest') |
| |
| except Exception as e: |
| print(f'Error: {e}') |
|
|
| plt.show() |