Source code for metatensor._labels

import ctypes
import functools
import io
import operator
import pathlib
from pickle import PickleBuffer
from typing import BinaryIO, List, Optional, Sequence, Tuple, Union, overload

import numpy as np

from ._c_api import (
    c_uintptr_t,
    mts_array_t,
    mts_labels_t,
)
from ._c_lib import _get_library
from ._data import Array, create_mts_array, mts_array_to_python_array
from ._status import check_pointer


class LabelsEntry:
    """A single entry (i.e. row) in a set of :py:class:`Labels`.

    The main way to create a :py:class:`LabelsEntry` is to index a
    :py:class:`Labels` or iterate over them.

    >>> from metatensor import Labels
    >>> import numpy as np
    >>> labels = Labels(
    ...     names=["system", "atom", "center_type"],
    ...     values=np.array([(0, 1, 8), (0, 2, 1), (0, 5, 1)]),
    ... )
    >>> entry = labels[0]  # or labels.entry(0)
    >>> entry.names
    ['system', 'atom', 'center_type']
    >>> print(entry.values)
    [0 1 8]
    """

    def __init__(self, names: List[str], values: Array):
        assert isinstance(names, list)
        for n in names:
            assert isinstance(n, str)
        assert isinstance(values, Array.__supertype__)

        self._names = names

        if len(values.shape) != 1 or values.dtype != np.int32:
            raise ValueError(
                "LabelsEntry values must be a 1-dimensional array of 32-bit integers"
            )

        self._values = values

    @property
    def names(self) -> List[str]:
        """names of the dimensions for this Labels entry"""
        return self._names

    @property
    def values(self) -> Array:
        """
        values associated with each dimensions of this Labels entry, stored as
        32-bit integers.
        """
        return self._values

    @property
    def device(self) -> str:
        """
        Get the device of this labels entry.

        This exists for compatibility with the TorchScript API, and always returns
        ``"cpu"`` when called.
        """
        return "cpu"

[docs] def print(self) -> str: """ print this entry as a named tuple (i.e. ``(key_1=value_1, key_2=value_2)``) """ values = [f"{n}={v}" for n, v in zip(self.names, self.values, strict=True)] return f"({', '.join(values)})"
def __repr__(self) -> str: return "LabelsEntry" + self.print()
[docs] def __len__(self) -> int: """number of dimensions in this labels entry""" return self._values.shape[0]
[docs] def __getitem__(self, dimension: Union[str, int]) -> int: """get the value associated with the dimension in this entry""" if isinstance(dimension, int): return self._values[dimension] elif isinstance(dimension, str): try: i = self._names.index(dimension) except ValueError: raise ValueError( f"'{dimension}' not found in the dimensions of these Labels" ) return self._values[i] else: raise TypeError( f"can only index LabelsEntry with str or int, got {type(dimension)}" )
[docs] def __eq__(self, other: "LabelsEntry") -> bool: """ check if ``self`` and ``other`` are equal (same dimensions/names and same values) """ if not isinstance(other, LabelsEntry): raise TypeError( f"can only compare between LabelsEntry for equality, got {type(other)}" ) return ( self._names == other._names and self._values.shape == other._values.shape and np.all(self._values == other._values) )
[docs] def __ne__(self, other: "LabelsEntry") -> bool: """ check if ``self`` and ``other`` are not equal (different dimensions/names or different values) """ return not self.__eq__(other)
class Labels: """ A set of labels carrying metadata associated with a :py:class:`TensorMap`. The metadata can be though as a list of tuples, where each value in the tuple also has an associated dimension name. In practice, the dimensions ``names`` are stored separately from the ``values``, and the values are in a 2-dimensional array integers with the shape ``(n_entries, n_dimensions)``. Each row/entry in this array is unique, and they are often (but not always) sorted in lexicographic order. >>> from metatensor import Labels >>> import numpy as np >>> labels = Labels( ... names=["system", "atom", "center_type"], ... values=np.array([(0, 1, 8), (0, 2, 1), (0, 5, 1)]), ... ) >>> labels Labels( system atom center_type 0 1 8 0 2 1 0 5 1 ) >>> labels.names ['system', 'atom', 'center_type'] >>> print(labels.values) [[0 1 8] [0 2 1] [0 5 1]] One can also iterate over labels entries, or directly index the :py:class:`Labels` to get a specific entry >>> entry = labels[0] # or labels.entry(0) >>> entry.names ['system', 'atom', 'center_type'] >>> print(entry.values) [0 1 8] >>> for entry in labels: ... print(entry) LabelsEntry(system=0, atom=1, center_type=8) LabelsEntry(system=0, atom=2, center_type=1) LabelsEntry(system=0, atom=5, center_type=1) Or get all the values associated with a given dimension/column name >>> print(labels.column("atom")) [1 2 5] >>> print(labels["atom"]) # alternative syntax for the above [1 2 5] Labels can be checked for equality: >>> labels == labels True Finally, it is possible to check if a value is inside (non-view) labels, and get the corresponding position: >>> labels.position([0, 2, 1]) 1 >>> print(labels.position([0, 2, 4])) None >>> (0, 2, 4) in labels False >>> labels[2] in labels True """ def __init__( self, names: Union[str, Sequence[str]], values: np.ndarray, assume_unique: bool = False, ): """ :param names: names of the dimensions in the new labels. A single string is transformed into a list with one element, i.e. ``names="a"`` is the same as ``names=["a"]``. :param values: values of the labels, this needs to be a 2-dimensional array of integers. :param assume_unique: skip uniqueness checks inside metatensor. This should only be set to ``True`` if you can ensure that label entries are already unique, either by construction or because you checked. """ names = _normalize_names_type(names) if not isinstance(values, np.ndarray): raise ValueError("`values` must be a numpy ndarray") if len(values) == 0: # Explicitly define empty labels values = np.empty((0, len(names)), dtype=np.int32) if len(values.shape) != 2: # make sure the array is 2D raise ValueError("`values` must be a 2D array") if len(names) != values.shape[1]: raise ValueError( "`names` must have an entry for each column of the `values` array" ) try: # We need to make sure the data is C-contiguous to take a pointer to # it, and that it has the right type values = np.ascontiguousarray( values.astype( np.int32, order="C", casting="same_kind", subok=False, copy=False, ) ) except TypeError as e: raise TypeError("Labels values must be convertible to integers") from e # values should not be writeable values.flags.writeable = False self._lib = _get_library() self._ptr = _create_new_labels( self._lib, names, values, assume_unique=assume_unique ) self._names = names self._cached_values = None
[docs] @staticmethod def single() -> "Labels": """ Create :py:class:`Labels` to use when there is no relevant metadata and only one entry in the corresponding dimension (e.g. keys when a tensor map contains a single block). """ return Labels(names=["_"], values=np.zeros(shape=(1, 1), dtype=np.int32))
[docs] @staticmethod def empty(names: Union[str, Sequence[str]]) -> "Labels": """ Create :py:class:`Labels` with given ``names`` but no values. :param names: names of the dimensions in the new labels. A single string is transformed into a list with one element, i.e. ``names="a"`` is the same as ``names=["a"]``. """ return Labels(names=names, values=np.empty((0, len(names))))
[docs] @staticmethod def range(name: str, end: int) -> "Labels": """ Create :py:class:`Labels` with a single dimension using the given ``name`` and values in the ``[0, end)`` range. :param name: name of the single dimension in the new labels. :param end: end of the range for labels >>> from metatensor import Labels >>> labels = Labels.range("dummy", 7) >>> labels.names ['dummy'] >>> print(labels.values) [[0] [1] [2] [3] [4] [5] [6]] """ return Labels( names=[name], values=np.arange(end, dtype=np.int32).reshape(-1, 1), )
@classmethod def _from_mts_labels_t(cls, labels): """Create Labels from an opaque mts_labels_t pointer.""" check_pointer(labels) obj = cls.__new__(cls) obj._lib = _get_library() obj._ptr = labels names_ptr = ctypes.POINTER(ctypes.c_char_p)() names_count = c_uintptr_t() obj._lib.mts_labels_dimensions(labels, names_ptr, names_count) names = [] for i in range(names_count.value): names.append(names_ptr[i].decode("utf8")) obj._names = names obj._cached_values = None return obj def __del__(self): if hasattr(self, "_lib") and self._lib is not None: if hasattr(self, "_labels") and self._ptr is not None: self._lib.mts_labels_free(self._ptr) def __deepcopy__(self, _memodict): ptr = self._lib.mts_labels_clone(self._ptr) check_pointer(ptr) return Labels._from_mts_labels_t(ptr) def __copy__(self): return self.__deepcopy__({}) def __str__(self) -> str: printed = self.print(4, 3) return f"Labels(\n {printed}\n)" def __repr__(self) -> str: printed = self.print(-1, 3) return f"Labels(\n {printed}\n)"
[docs] def __len__(self) -> int: """number of entries in these labels""" return self.values.shape[0]
def __iter__(self): for i in range(len(self)): yield LabelsEntry(self._names, self.values[i, :]) @overload def __getitem__(self, dimension: str) -> np.ndarray: pass @overload def __getitem__(self, index: int) -> LabelsEntry: pass
[docs] def __getitem__(self, index): """ When indexing with a string, get the values for the corresponding dimension as a 1-dimensional array (i.e. :py:func:`Labels.column`). When indexing with an integer, get the corresponding row/labels entry (i.e. :py:func:`Labels.entry`). """ if isinstance(index, (int, np.int8, np.int16, np.int32, np.int64)): return self.entry(index) if isinstance(index, (str, int)): return self.column(index) if isinstance(index, str) else self.entry(index) raise TypeError( "Labels can only be indexed by a single string or integer" f", got {type(index)}." "To select multiple columns, construct a new `Labels` object." )
[docs] def __contains__( self, entry: Union[LabelsEntry, Sequence[int]], ) -> bool: """check if these :py:class:`Labels` contain the given ``entry``""" return self.position(entry) is not None
[docs] def __eq__(self, other: "Labels") -> bool: """ check if two set of labels are equal (same dimension names and same values) """ if not isinstance(other, Labels): raise TypeError( f"can only compare between Labels for equality, got {type(other)}" ) return ( self._names == other._names and self.values.shape == other.values.shape and bool(np.all(self.values == other.values)) )
[docs] def __ne__(self, other: "Labels") -> bool: """ check if two set of labels are not equal (different dimension names or different values) """ return not self.__eq__(other)
def _as_mts_labels_t(self): return self._ptr # ===== Serialization support ===== # @classmethod def _from_pickle(cls, buffer: Union[bytes, bytearray]): """ Passed to pickler to reconstruct Labels from bytes object """ from .io import load_labels_buffer return load_labels_buffer(buffer) def __reduce_ex__(self, protocol: int): """ Used by the Pickler to dump Labels object to bytes object. When protocol >= 5 it supports PickleBuffer which reduces number of copies needed """ from .io import _save_labels_buffer_raw buffer = _save_labels_buffer_raw(self) if protocol >= 5: return self._from_pickle, (PickleBuffer(buffer),) else: return self._from_pickle, (buffer.raw,)
[docs] @staticmethod def load(file: Union[str, pathlib.Path, BinaryIO]) -> "Labels": """ Load a serialized :py:class:`Labels` from a file, calling :py:func:`metatensor.load_labels`. :param file: file path or file object to load from """ from .io import load_labels return load_labels(file=file)
[docs] @staticmethod def load_buffer(buffer: Union[bytes, bytearray, memoryview]) -> "Labels": """ Load a serialized :py:class:`Labels` from a buffer, calling :py:func:`metatensor.io.load_labels_buffer`. :param buffer: in-memory buffer containing the data """ from .io import load_labels_buffer return load_labels_buffer(buffer=buffer)
[docs] def save(self, file: Union[str, pathlib.Path, BinaryIO]): """ Save these :py:class:`Labels` to a file, calling :py:func:`metatensor.save`. :param file: file path or file object to save to """ from .io import save return save(file=file, data=self)
[docs] def save_buffer(self) -> memoryview: """ Save these :py:class:`Labels` to an in-memory buffer, calling :py:func:`metatensor.io.save_buffer`. """ from .io import save_buffer return save_buffer(data=self)
# ===== Data manipulation ===== # @property def names(self) -> List[str]: """names of the dimensions for these :py:class:`Labels`""" return self._names @property def _raw_values(self) -> mts_array_t: """Get the raw ``mts_array_t`` corresponding to these Labels' values""" data = mts_array_t() self._lib.mts_labels_values(self._ptr, data) return data @property def values(self) -> Array: """ values associated with each dimensions of the :py:class:`Labels`, stored as 2-dimensional tensor of 32-bit integers """ if self._cached_values is None: self._cached_values = mts_array_to_python_array(self._raw_values, self) return self._cached_values
[docs] def append(self, name: str, values: np.ndarray) -> "Labels": """Append a new dimension to the end of the :py:class:`Labels`. :param name: name of the new dimension :param values: 1D array of values for the new dimension >>> import numpy as np >>> from metatensor import Labels >>> label = Labels("foo", np.array([[42]])) >>> label Labels( foo 42 ) >>> label.append(name="bar", values=np.array([10])) Labels( foo bar 42 10 ) """ return self.insert(index=len(self), name=name, values=values)
[docs] def insert(self, index: int, name: str, values: np.ndarray) -> "Labels": """Insert a new dimension before ``index`` in the :py:class:`Labels`. :param index: index before the new dimension is inserted :param name: name of the new dimension :param values: 1D array of values for the new dimension >>> import numpy as np >>> from metatensor import Labels >>> label = Labels("foo", np.array([[42]])) >>> label Labels( foo 42 ) >>> label.insert(0, name="bar", values=np.array([10])) Labels( bar foo 10 42 ) """ new_names = self.names.copy() new_names.insert(index, name) if not isinstance(values, np.ndarray): raise ValueError("`values` must be a numpy ndarray") if len(values.shape) != 1: raise ValueError("`values` must be a 1D array") if values.shape[0] != len(self): raise ValueError( f"the new `values` contains {values.shape[0]} entries, " f"but the Labels contains {len(self)}" ) new_values = np.insert(self.values, index, values, axis=1) return Labels(names=new_names, values=new_values)
[docs] def permute(self, dimensions_indexes: List[int]) -> "Labels": """Permute dimensions according to ``dimensions_indexes`` in the :py:class:`Labels`. :param dimensions_indexes: desired ordering of the dimensions :raises ValueError: if length of ``dimensions_indexes`` does not match the Labels length :raises ValueError: if duplicate values are present in ``dimensions_indexes`` >>> import numpy as np >>> from metatensor import Labels >>> label = Labels(["foo", "bar", "baz"], np.array([[42, 10, 3]])) >>> label Labels( foo bar baz 42 10 3 ) >>> label.permute([2, 0, 1]) Labels( baz foo bar 3 42 10 ) """ if len(dimensions_indexes) != len(self.names): raise ValueError( f"the length of `dimensions_indexes` ({len(dimensions_indexes)}) does " f"not match the number of dimensions in the Labels ({len(self.names)})" ) names = [self.names[d] for d in dimensions_indexes] return Labels(names=names, values=self.values[:, dimensions_indexes])
[docs] def remove(self, name: str) -> "Labels": """Remove ``name`` from the dimensions of the :py:class:`Labels`. Removal can only be performed if the resulting :py:class:`Labels` instance will be unique. :param name: name to be removed :raises ValueError: if the name is not present. >>> import numpy as np >>> from metatensor import Labels >>> label = Labels(["foo", "bar"], np.array([[42, 10]])) >>> label Labels( foo bar 42 10 ) >>> label.remove(name="bar") Labels( foo 42 ) If the new :py:class:`Labels` is not unique an error is raised. >>> from metatensor import MetatensorError >>> label = Labels(["foo", "bar"], np.array([[42, 10], [42, 11]])) >>> label Labels( foo bar 42 10 42 11 ) >>> try: ... label.remove(name="bar") ... except MetatensorError as e: ... print(e) invalid parameter: can not have the same label entry multiple times: [42] is already present """ # noqa E501 if name not in self.names: raise ValueError(f"'{name}' not found in the dimensions of these Labels") new_names = self.names.copy() new_names.remove(name) index = self.names.index(name) new_values = np.delete(self.values, index, axis=1) return Labels(names=new_names, values=new_values)
[docs] def rename(self, old: str, new: str) -> "Labels": """Rename the ``old`` dimension to ``new`` in the :py:class:`Labels`. :param old: name to be replaced :param new: name after the replacement :raises ValueError: if old is not present. >>> import numpy as np >>> from metatensor import Labels >>> label = Labels("foo", np.array([[42]])) >>> label Labels( foo 42 ) >>> label.rename("foo", "bar") Labels( bar 42 ) """ if old not in self.names: raise ValueError(f"'{old}' not found in the dimensions of these Labels") names = self.names.copy() index = names.index(old) names[index] = new return Labels(names=names, values=self.values)
[docs] def to(self, device, non_blocking=False) -> "Labels": """ Move the values for these Labels to the given ``device``. ``non_blocking`` is ignored. In the Python version of metatensor, this returns the original labels without change. This function is defined for compatibility with the TorchScript version of metatensor. """ return self
@property def device(self) -> str: """ Get the device of these Labels. This exists for compatibility with the TorchScript API, and always returns ``"cpu"`` when called. """ return "cpu"
[docs] def position(self, entry: Union[LabelsEntry, Sequence[int]]) -> Optional[int]: """ Get the position of the given ``entry`` in this set of :py:class:`Labels`, or ``None`` if the entry is not present in the labels. """ result = ctypes.c_int64() c_entry = ctypes.ARRAY(ctypes.c_int32, len(entry))() for i, v in enumerate(entry): c_entry[i] = ctypes.c_int32(v) self._lib.mts_labels_position( self._ptr, c_entry, c_entry._length_, result, ) if result.value >= 0: return result.value else: return None
[docs] def difference(self, other: "Labels") -> "Labels": """ Take the set difference of these :py:class:`Labels` with ``other``. If you want to know where entries in ``self`` and ``other`` ends up in the difference, you can use :py:meth:`Labels.difference_and_mapping`. >>> import numpy as np >>> from metatensor import Labels >>> first = Labels( ... names=["a", "b"], values=np.array([[0, 1], [1, 3], [0, 3], [2, 2]]) ... ) >>> second = Labels( ... names=["a", "b"], values=np.array([[0, 3], [1, 3], [1, 2], [2, 1]]) ... ) >>> first.difference(second) Labels( a b 0 1 2 2 ) """ output = ctypes.POINTER(mts_labels_t)() self._lib.mts_labels_difference( self._as_mts_labels_t(), other._as_mts_labels_t(), ctypes.pointer(output), None, 0, ) return Labels._from_mts_labels_t(output)
[docs] def difference_and_mapping(self, other: "Labels") -> Tuple["Labels", np.ndarray]: """ Take the set difference of these :py:class:`Labels` with ``other``. This function also returns the position in the difference where each entry of the input :py:class::`Labels` ended up. :return: Tuple containing the difference, and a :py:class:`numpy.ndarray` containing the position in the difference of the entries from ``self``. >>> import numpy as np >>> from metatensor import Labels >>> first = Labels( ... names=["a", "b"], values=np.array([[0, 1], [1, 3], [0, 3], [2, 2]]) ... ) >>> second = Labels( ... names=["a", "b"], values=np.array([[0, 3], [1, 3], [1, 2], [2, 1]]) ... ) >>> difference, mapping_1 = first.difference_and_mapping(second) >>> difference Labels( a b 0 1 2 2 ) >>> print(mapping_1) [ 0 -1 -1 1] """ output = ctypes.POINTER(mts_labels_t)() first_mapping = np.zeros(len(self), dtype=np.int64) self._lib.mts_labels_difference( self._as_mts_labels_t(), other._as_mts_labels_t(), ctypes.pointer(output), first_mapping.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), len(first_mapping), ) return Labels._from_mts_labels_t(output), first_mapping
[docs] def union(self, other: "Labels") -> "Labels": """ Take the union of these :py:class:`Labels` with ``other``. If you want to know where entries in ``self`` and ``other`` ends up in the union, you can use :py:meth:`Labels.union_and_mapping`. >>> import numpy as np >>> from metatensor import Labels >>> first = Labels(names=["a", "b"], values=np.array([[0, 1], [1, 2], [0, 3]])) >>> second = Labels(names=["a", "b"], values=np.array([[0, 3], [1, 3], [1, 2]])) >>> first.union(second) Labels( a b 0 1 1 2 0 3 1 3 ) """ output = ctypes.POINTER(mts_labels_t)() self._lib.mts_labels_union( self._as_mts_labels_t(), other._as_mts_labels_t(), ctypes.pointer(output), None, 0, None, 0, ) return Labels._from_mts_labels_t(output)
[docs] def union_and_mapping( self, other: "Labels" ) -> Tuple["Labels", np.ndarray, np.ndarray]: """ Take the union of these :py:class:`Labels` with ``other``. This function also returns the position in the union where each entry of the input :py:class::`Labels` ended up. :return: Tuple containing the union, a :py:class:`numpy.ndarray` containing the position in the union of the entries from ``self``, and a :py:class:`numpy.ndarray` containing the position in the union of the entries from ``other``. >>> import numpy as np >>> from metatensor import Labels >>> first = Labels(names=["a", "b"], values=np.array([[0, 1], [1, 2], [0, 3]])) >>> second = Labels(names=["a", "b"], values=np.array([[0, 3], [1, 3], [1, 2]])) >>> union, mapping_1, mapping_2 = first.union_and_mapping(second) >>> union Labels( a b 0 1 1 2 0 3 1 3 ) >>> print(mapping_1) [0 1 2] >>> print(mapping_2) [2 3 1] """ output = ctypes.POINTER(mts_labels_t)() first_mapping = np.zeros(len(self), dtype=np.int64) second_mapping = np.zeros(len(other), dtype=np.int64) self._lib.mts_labels_union( self._as_mts_labels_t(), other._as_mts_labels_t(), ctypes.pointer(output), first_mapping.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), len(first_mapping), second_mapping.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), len(second_mapping), ) return Labels._from_mts_labels_t(output), first_mapping, second_mapping
[docs] def intersection(self, other: "Labels") -> "Labels": """ Take the intersection of these :py:class:`Labels` with ``other``. If you want to know where entries in ``self`` and ``other`` ends up in the intersection, you can use :py:meth:`Labels.intersection_and_mapping`. >>> import numpy as np >>> from metatensor import Labels >>> first = Labels(names=["a", "b"], values=np.array([[0, 1], [1, 2], [0, 3]])) >>> second = Labels(names=["a", "b"], values=np.array([[0, 3], [1, 3], [1, 2]])) >>> first.intersection(second) Labels( a b 1 2 0 3 ) """ output = ctypes.POINTER(mts_labels_t)() self._lib.mts_labels_intersection( self._as_mts_labels_t(), other._as_mts_labels_t(), ctypes.pointer(output), None, 0, None, 0, ) return Labels._from_mts_labels_t(output)
[docs] def intersection_and_mapping( self, other: "Labels" ) -> Tuple["Labels", np.ndarray, np.ndarray]: """ Take the intersection of these :py:class:`Labels` with ``other``. This function also returns the position in the intersection where each entry of the input :py:class:`Labels` ended up. :return: Tuple containing the intersection, a :py:class:`numpy.ndarray` containing the position in the intersection of the entries from ``self``, and a :py:class:`numpy.ndarray` containing the position in the intersection of the entries from ``other``. If entries in ``self`` or ``other`` are not used in the output, the mapping for them is set to ``-1``. >>> import numpy as np >>> from metatensor import Labels >>> first = Labels(names=["a", "b"], values=np.array([[0, 1], [1, 2], [0, 3]])) >>> second = Labels(names=["a", "b"], values=np.array([[0, 3], [1, 3], [1, 2]])) >>> intersection, mapping_1, mapping_2 = first.intersection_and_mapping(second) >>> intersection Labels( a b 1 2 0 3 ) >>> print(mapping_1) [-1 0 1] >>> print(mapping_2) [ 1 -1 0] """ output = ctypes.POINTER(mts_labels_t)() first_mapping = np.zeros(len(self), dtype=np.int64) second_mapping = np.zeros(len(other), dtype=np.int64) self._lib.mts_labels_intersection( self._as_mts_labels_t(), other._as_mts_labels_t(), ctypes.pointer(output), first_mapping.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), len(first_mapping), second_mapping.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), len(second_mapping), ) return Labels._from_mts_labels_t(output), first_mapping, second_mapping
[docs] def select(self, selection: "Labels") -> np.ndarray: """ Select entries in these :py:class:`Labels` that match the ``selection``. The selection's names must be a subset of the names of these labels. All entries in these :py:class:`Labels` that match one of the entry in the ``selection`` for all the selection's dimension will be picked. Any entry in the ``selection`` but not in these :py:class:`Labels` will be ignored. >>> import numpy as np >>> from metatensor import Labels >>> labels = Labels( ... names=["a", "b"], ... values=np.array([[0, 1], [1, 2], [0, 3], [1, 1], [2, 4]]), ... ) >>> selection = Labels(names=["a"], values=np.array([[0], [2], [5]])) >>> print(labels.select(selection)) [0 2 4] :param selection: description of the entries to select :return: 1-dimensional ndarray containing the integer indices of selected entries """ selected = np.zeros((len(self)), dtype=np.int64) selected_count = c_uintptr_t(len(self)) self._lib.mts_labels_select( self._as_mts_labels_t(), selection._as_mts_labels_t(), selected.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)), ctypes.pointer(selected_count), ) selected.resize(selected_count.value, refcheck=False) return selected
[docs] def print(self, max_entries: int, indent: int = 0) -> str: """print these :py:class:`Labels` to a string :param max_entries: how many entries to print, use ``-1`` to print everything :param indent: indent the output by ``indent`` spaces """ return _print_labels( self._names, self.values, max_entries=max_entries, indent=indent, )
[docs] def entry(self, index: int) -> LabelsEntry: """ Get a single entry/row in these labels. .. seealso:: :py:func:`Labels.__getitem__` as the main way to use this function """ return LabelsEntry(self._names, self.values[index, :])
[docs] def column(self, dimension: str) -> np.ndarray: """ Get the values associated with a single dimension in these labels (i.e. a single column of :py:attr:`Labels.values`) as a 1-dimensional array. .. seealso:: :py:func:`Labels.__getitem__` as the main way to use this function """ if not isinstance(dimension, str): message = ( "Labels can only be indexed by a single string or integer, " f"got {type(dimension)}" ) raise TypeError(message) try: index = self.names.index(dimension) except ValueError: raise ValueError( f"'{dimension}' not found in the dimensions of these Labels" ) return self.values[:, index]
def _normalize_names_type(names: Union[str, Sequence[str]]) -> List[str]: """ Transform Labels names from any of the accepted types into the canonical representation (list of strings). """ if isinstance(names, str): if len(names) == 0: names = [] else: names = [names] else: try: names = list(names) except TypeError: raise TypeError( f"Labels names must be a sequence of strings, got {type(names)} instead" ) for name in names: if not isinstance(name, str): raise TypeError( f"Labels names must be strings, got {type(name)} instead" ) return names def _create_new_labels( lib, names: List[str], values: np.ndarray, *, assume_unique: bool = False ): c_names = ctypes.ARRAY(ctypes.c_char_p, len(names))() for i, n in enumerate(names): c_names[i] = n.encode("utf8") # Ensure values is 2D int32 C-contiguous for the mts_array_t if values.ndim == 1: values = values.reshape(-1, len(names)) values = np.ascontiguousarray(values, dtype=np.int32) array = create_mts_array(values) if assume_unique: ptr = lib.mts_labels_assume_unique(c_names, len(names), array) else: ptr = lib.mts_labels(c_names, len(names), array) check_pointer(ptr) return ptr def _print_string_center(output, string, width, last): delta = width - len(string) n_before = delta // 2 n_after = delta - n_before if last: # don't add spaces after the last element output.write(" " * n_before) output.write(string) else: output.write(" " * n_before) output.write(string) output.write(" " * n_after) def _print_labels( names: List[str], values: np.ndarray, max_entries: int, indent: int, ) -> str: # ================================================================================ # # Step 1: determine the width of all the columns (at least the width of the names # # plus 2, might be wider for large values) # # ================================================================================ # # the +2 is here use at least one space on each side of the name widths = [len(n) + 2 for n in names] # first set of values to print (before the "...") values_first = [] # second set of values to print (after the "...") values_second = [] n_elements = values.shape[0] # first, determine the width of each column by looking through all the # values and names lengths if max_entries < 0 or n_elements <= max_entries: for entry in values: entry_strings = [] for i, e in enumerate(entry): element_str = str(e) entry_strings.append(element_str) widths[i] = max(widths[i], len(element_str) + 2) values_first.append(entry_strings) else: if max_entries < 2: max_entries = 2 n_after = max_entries // 2 n_before = max_entries - n_after # values before the "..." for entry in values[:n_before, :]: entry_strings = [] for i, e in enumerate(entry): element_str = str(e) entry_strings.append(element_str) widths[i] = max(widths[i], len(element_str) + 2) values_first.append(entry_strings) # values after the "..." for entry in values[n_elements - n_after :, :]: entry_strings = [] for i, e in enumerate(entry): element_str = str(e) entry_strings.append(element_str) widths[i] = max(widths[i], len(element_str) + 2) values_second.append(entry_strings) # ================================================================================ # # Step 2: actually create the output string, using io.StringIO to incrementally # # append to the output # # ================================================================================ # indent_str = " " * indent n_dimensions = values.shape[1] output = io.StringIO() for i in range(n_dimensions): last = i == n_dimensions - 1 _print_string_center(output, names[i], widths[i], last) output.write("\n") for strings in values_first: output.write(indent_str) for i in range(n_dimensions): last = i == n_dimensions - 1 _print_string_center(output, strings[i], widths[i], last) output.write("\n") if len(values_second) != 0: half_header_widths = sum(widths) // 2 if half_header_widths > 3: # 3 characters in '...' half_header_widths -= 3 output.write(indent_str) output.write((half_header_widths + 1) * " ") output.write("...\n") for strings in values_second: output.write(indent_str) for i in range(n_dimensions): last = i == n_dimensions - 1 _print_string_center(output, strings[i], widths[i], last) output.write("\n") output = output.getvalue() assert output[-1] == "\n" return output[:-1] def _ptr_to_const_ndarray(ptr, shape, dtype): if functools.reduce(operator.mul, shape) == 0: return np.empty(shape=shape, dtype=dtype) assert ptr is not None array = np.ctypeslib.as_array(ptr, shape=shape) assert array.dtype == dtype assert not array.flags["OWNDATA"] array.flags["WRITEABLE"] = False return array