Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

- feat: add buffer protocol support to utils by @aisk in #175
- docs: add JupyterLite browser playground by @abetlen in #173

## [0.0.44]
Expand Down
78 changes: 65 additions & 13 deletions ggml/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,27 @@ class GGML_TYPE(enum.IntEnum):
GGML_TYPE_TO_NUMPY_DTYPE = {v: k for k, v in NUMPY_DTYPE_TO_GGML_TYPE.items()}


def to_buffer(tensor: ggml.ggml_tensor_p) -> memoryview:
"""Get a writable memoryview of a ggml tensor's raw data (zero-copy).

The returned buffer exposes the tensor's full byte span via the Python
buffer protocol. Its contents are only valid while the owning ggml context
is alive.

Parameters:
tensor: ggml tensor

Returns:
Writable memoryview backed by the tensor data
"""
data = ggml.ggml_get_data(tensor)
if data is None:
raise ValueError("tensor data is None")
nbytes = ggml.ggml_nbytes(tensor)
array = (ctypes.c_char * nbytes).from_address(data)
return memoryview(array)


def to_numpy(
tensor: ggml.ggml_tensor_p,
shape: Optional[Tuple[int, ...]] = None,
Expand All @@ -65,23 +86,13 @@ def to_numpy(
Numpy array with a view of data from tensor
"""
ggml_type = GGML_TYPE(tensor.contents.type)
if ggml_type == GGML_TYPE.F16:
ctypes_type = ctypes.c_uint16
else:
ctypes_type = np.ctypeslib.as_ctypes_type(GGML_TYPE_TO_NUMPY_DTYPE[ggml_type])

data = ggml.ggml_get_data(tensor)
if data is None:
raise ValueError("tensor data is None")
array = (ctypes_type * ggml.ggml_nelements(tensor)).from_address(data)
dtype = GGML_TYPE_TO_NUMPY_DTYPE[ggml_type]
array = np.frombuffer(to_buffer(tensor), dtype=dtype)
n_dims = ggml.ggml_n_dims(tensor)
shape_ = tuple(reversed(tensor.contents.ne[:n_dims]))
strides = tuple(reversed(tensor.contents.nb[:n_dims]))
output = np.ctypeslib.as_array(array)
if ggml_type == GGML_TYPE.F16:
output.dtype = np.float16 # type: ignore
return np.lib.stride_tricks.as_strided(
output, shape=shape if shape is not None else shape_, strides=strides
array, shape=shape if shape is not None else shape_, strides=strides
)


Expand Down Expand Up @@ -111,6 +122,47 @@ def from_numpy(x: npt.NDArray[Any], ctx: ggml.ggml_context_p) -> ggml.ggml_tenso
return tensor


def from_buffer(
buffer: Any,
ctx: ggml.ggml_context_p,
type: GGML_TYPE,
shape: Tuple[int, ...],
) -> ggml.ggml_tensor_p:
"""Create a new ggml tensor with data copied from a buffer-protocol object.

The buffer is assumed to be contiguous and laid out in row-major (C) order
with the given shape and ggml type. Unlike :func:`from_numpy`, this does not
require numpy and works with any object exposing the buffer protocol
(``bytes``, ``bytearray``, ``memoryview``, ``array.array``, etc.).

Parameters:
buffer: source buffer-protocol object
ctx: ggml context
type: ggml type of the tensor
shape: shape of the tensor in row-major (C) order

Returns:
New ggml tensor with data copied from buffer
"""
ne = tuple(reversed(shape))
tensor = ggml.ggml_new_tensor(
ctx,
type.value,
len(ne),
(ctypes.c_int64 * len(ne))(*ne),
)
src = memoryview(buffer).cast("B")
if ggml.ggml_get_data(tensor) is not None:
dst = to_buffer(tensor).cast("B")
if len(src) != len(dst):
raise ValueError(
f"buffer size ({len(src)} bytes) does not match tensor size "
f"({len(dst)} bytes)"
)
dst[:] = src
return tensor


def copy_to_cpu(
ctx: ggml.ggml_context_p, tensor: ggml.ggml_tensor_p
) -> ggml.ggml_tensor_p:
Expand Down
29 changes: 29 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import array

import ggml
import ggml.utils

Expand All @@ -20,6 +22,33 @@ def test_utils():
ggml.ggml_free(ctx)


def test_from_buffer_and_to_buffer():
params = ggml.ggml_init_params(mem_size=16 * 1024 * 1024)
ctx = ggml.ggml_init(params)
assert ctx is not None
# build a tensor from a plain array.array (no numpy)
data = array.array("f", [1, 2, 3, 4, 5, 6])
t = ggml.utils.from_buffer(data, ctx, ggml.utils.GGML_TYPE.F32, (2, 3))
assert ggml.utils.get_shape(t) == (3, 2)
# to_buffer exposes a writable zero-copy view of the same bytes
buf = ggml.utils.to_buffer(t)
assert buf.nbytes == data.itemsize * len(data)
assert buf.readonly is False
assert bytes(buf) == data.tobytes()
ggml.ggml_free(ctx)


def test_from_buffer_size_mismatch():
params = ggml.ggml_init_params(mem_size=16 * 1024 * 1024)
ctx = ggml.ggml_init(params)
assert ctx is not None
with pytest.raises(ValueError):
ggml.utils.from_buffer(
array.array("f", [1, 2, 3]), ctx, ggml.utils.GGML_TYPE.F32, (2, 3)
)
ggml.ggml_free(ctx)


def test_numpy_arrays():
params = ggml.ggml_init_params(mem_size=16 * 1024 * 1024)
ctx = ggml.ggml_init(params)
Expand Down
Loading