import io
import pathlib
from typing import BinaryIO, Union
from ..block import TensorBlock
from ..labels import Labels
from ..tensor import TensorMap
from ._labels import _save_labels, _save_labels_buffer_raw
from ._block import _save_block, _save_block_buffer_raw
from ._tensor import _save_tensor, _save_tensor_buffer_raw
from ._labels import load_labels, load_labels_buffer # noqa: F401
from ._block import ( # noqa: F401
create_numpy_array,
create_torch_array,
load_block,
load_block_buffer,
load_block_buffer_custom_array,
load_block_custom_array,
)
from ._tensor import ( # noqa: F401
load,
load_buffer,
load_buffer_custom_array,
load_custom_array,
)
[docs]
def save(
file: Union[str, pathlib.Path, BinaryIO],
data: Union[TensorMap, TensorBlock, Labels],
use_numpy=False,
):
"""
Save the given data (one of :py:class:`TensorMap`, :py:class:`TensorBlock`, or
:py:class:`Labels`) to the given ``file``.
:py:class:`TensorMap` are serialized using numpy's ``.npz`` format, i.e. a ZIP file
without compression (storage method is ``STORED``), where each file is stored as a
``.npy`` array. See the C API documentation for more information on the format.
:param file: where to save the data. This can be a string, :py:class:`pathlib.Path`
containing the path to the file to load, or a file-like object that should be
opened in binary mode.
:param data: data to serialize and save
:param use_numpy: should we use numpy or the native serializer implementation? Numpy
should be able to process more dtypes than the native implementation, which is
limited to float64, but the native implementation is usually faster than going
through numpy. This is ignored when saving :py:class:`Labels`.
"""
if isinstance(data, Labels):
return _save_labels(file=file, labels=data)
elif isinstance(data, TensorBlock):
return _save_block(file=file, block=data, use_numpy=use_numpy)
elif isinstance(data, TensorMap):
return _save_tensor(file=file, tensor=data, use_numpy=use_numpy)
else:
raise TypeError(
"`data` must be one of 'Labels', 'TensorBlock' or 'TensorMap', "
f"not {type(data)}"
)
[docs]
def save_buffer(
data: Union[TensorMap, TensorBlock, Labels],
use_numpy=False,
) -> memoryview:
"""
Save the given data (one of :py:class:`TensorMap`, :py:class:`TensorBlock`, or
:py:class:`Labels`) to an in-memory buffer.
:param data: data to serialize and save
:param use_numpy: should we use numpy or the native serializer implementation?
"""
if isinstance(data, Labels):
return memoryview(_save_labels_buffer_raw(labels=data))
elif isinstance(data, TensorBlock):
if use_numpy:
file = io.BytesIO()
save(file, data=data, use_numpy=use_numpy)
return file.getbuffer()
else:
return memoryview(_save_block_buffer_raw(block=data))
elif isinstance(data, TensorMap):
if use_numpy:
file = io.BytesIO()
save(file, data=data, use_numpy=use_numpy)
return file.getbuffer()
else:
return memoryview(_save_tensor_buffer_raw(tensor=data))
else:
raise TypeError(
"`data` must be one of 'Labels', 'TensorBlock' or 'TensorMap', "
f"not {type(data)}"
)