Source code for metatensor.operations.block_from_array

from typing import List, Optional

import numpy as np

from . import _dispatch
from ._backend import Labels, TensorBlock, torch_jit_is_scripting, torch_jit_script


try:
    import torch

    TorchScriptClass = torch.ScriptClass
except ImportError:

    class TorchScriptClass:
        pass



[docs]
@torch_jit_script
def block_from_array(
    array,
    sample_names: Optional[List[str]] = None,
    component_names: Optional[List[str]] = None,
    property_names: Optional[List[str]] = None,
) -> TensorBlock:
    """
    Creates a simple TensorBlock from an array.

    The metadata in the resulting :py:class:`TensorBlock` is filled with ranges of
    integers. This function should be seen as a quick way of creating a
    :py:class:`TensorBlock` from arbitrary data. However, the metadata generated in this
    way has little meaning.

    :param array: An array with two or more dimensions. This can either be a
        :py:class:`numpy.ndarray` or a :py:class:`torch.Tensor`.
    :param sample_names: A list containing ``d_samples`` names for the sample
        dimensions. The first ``d_samples`` dimensions in the array will be interpreted
        as enumerating samples. ``None`` implies a single dimension named ``"sample"``.
    :param property_names: A list containing ``d_properties`` names for the property
        dimensions. The last ``d_properties`` dimensions in the array will be
        interpreted as enumerating properties. ``None`` implies a single dimension named
        ``"property"``.
    :param component_names: A list containing ``n_component`` names for the component
        dimensions. The middle ``d_components`` dimensions in the array will be
        interpreted as enumerating components. ``None`` implies that all the middle
        dimensions (after removing any sample and property dimensions) will be
        considered components, named ``"component_xxx"``.

    :return: A :py:class:`TensorBlock` whose values correspond to the provided
        ``array``. If no name options are provided, the metadata names are set to
        ``"sample"`` for samples; ``"component_1"``, ``"component_2"``, ... for
        components; and ``property`` for properties. The number of ``component`` labels
        is adapted to the dimensionality of the input array. If axes names are given, as
        indicated in the parameter list, the dimensions of the array will be interpreted
        accordingly, and indices also generated in a similar way. The metadata
        associated with each axis is a range of integers going from 0 to the size of the
        corresponding axis. The returned :py:class:`TensorBlock` has no gradients.

    >>> import numpy as np
    >>> import metatensor
    >>> # Construct a simple 4D array:
    >>> array = np.linspace(0, 10, 42).reshape((7, 3, 1, 2))
    >>> # Transform it into a TensorBlock:
    >>> tensor_block = metatensor.block_from_array(array)
    >>> print(tensor_block)
    TensorBlock
        samples (7): ['sample']
        components (3, 1): ['component_1', 'component_2']
        properties (2): ['property']
        gradients: None
    >>> # The data inside the TensorBlock will correspond to the provided array:
    >>> print(np.all(array == tensor_block.values))
    True
    >>> # High-dimensional tensor
    >>> array = np.linspace(0, 10, 60).reshape((2, 3, 5, 1, 2))
    >>> # Specify axes names:
    >>> tensor_block = metatensor.block_from_array(
    ...     array, sample_names=["a", "b"], property_names=["y"]
    ... )
    >>> print(tensor_block)
    TensorBlock
        samples (6): ['a', 'b']
        components (5, 1): ['component_1', 'component_2']
        properties (2): ['y']
        gradients: None
    """

    if torch_jit_is_scripting():
        # we are using metatensor-torch
        labels_array_like = torch.empty(0)
    else:
        if isinstance(Labels, TorchScriptClass):
            # we are using metatensor-torch
            labels_array_like = torch.empty(0)
        else:
            # we are using metatensor-core
            labels_array_like = np.empty(0)

    shape = array.shape
    n_dimensions = len(shape)
    if n_dimensions < 2:
        raise ValueError(
            f"the array provided to `block_from_array` \
            must have at least two dimensions. Too few provided: {n_dimensions}"
        )

    # constructs the default label names and counts
    if sample_names is None:
        sample_names = ["sample"]
    d_samples = len(sample_names)
    if property_names is None:
        property_names = ["property"]
    d_properties = len(property_names)
    # guess number of components
    d_components = n_dimensions - d_samples - d_properties
    if d_components < 0:
        raise ValueError(
            f"the array provided to `block_from_array` with shape {shape} "
            + "does not have enough dimensions to match the sample and property names"
        )
    if component_names is None:
        component_names = [
            f"component_{component_index + 1}"
            for component_index in range(d_components)
        ]
    if len(component_names) != d_components:
        raise ValueError(
            f"the array provided to `block_from_array` with shape {shape} "
            + "does not have enough dimensions to match the given sample, "
            + "component, and property names"
        )

    samples = Labels(
        names=sample_names,
        values=_dispatch.indices_like(shape[0:d_samples], labels_array_like),
    )
    components = [
        Labels(
            names=[component_names[component_index]],
            values=_dispatch.int_array_like(
                list(range(axis_size)), labels_array_like
            ).reshape(-1, 1),
        )
        for component_index, axis_size in enumerate(shape[d_samples:-d_properties])
    ]
    properties = Labels(
        names=property_names,
        values=_dispatch.indices_like(shape[-d_properties:], labels_array_like),
    )

    device = _dispatch.get_device(array)
    samples = samples.to(device)
    components = [component.to(device) for component in components]
    properties = properties.to(device)

    # reshape the array if multiple axes of the input array are grouped as samples or
    # properties (i.e. if `len(sample_names) > 1` or `len(property_names) > 1`)
    if d_samples > 1 or d_properties > 1:
        block_shape = [len(samples)]
        for i in range(d_samples, d_samples + d_components):
            block_shape.append(shape[i])
        block_shape.append(len(properties))
        array = array.reshape(block_shape)

    return TensorBlock(array, samples, components, properties)