Computing SOAP featuresΒΆ
This examples shows how to compute the SOAP power spectrum descriptor for each atom in each system of a provided systems file. The path to the systems file is taken from the first command line argument.
In the end, the descriptor is transformed in a way compatible with how most classic machine learning (such as PCA or linear regression) work.
The workflow is the same for every provided descriptor. Take a look at the Reference guides for a list with all descriptors and their specific parameters.
You can obtain a testing dataset from our website.
import chemfiles
from featomic import SoapPowerSpectrum
Read systems using chemfiles. You can obtain the dataset used in this
example from our website.
with chemfiles.Trajectory("dataset.xyz") as trajectory:
systems = [s for s in trajectory]
Featomic can also handles systems read by ASE using
systems = ase.io.read("dataset.xyz", ":").
We can now define hyper parameters for the calculation
HYPER_PARAMETERS = {
"cutoff": {
"radius": 5.0,
"smoothing": {"type": "ShiftedCosine", "width": 0.5},
},
"density": {
"type": "Gaussian",
"width": 0.3,
},
"basis": {
"type": "TensorProduct",
"max_angular": 4,
"radial": {"type": "Gto", "max_radial": 6},
},
}
calculator = SoapPowerSpectrum(**HYPER_PARAMETERS)
And then run the actual calculation, including gradients with respect to positions
descriptor = calculator.compute(systems, gradients=["positions"])
The descriptor is a metatensor TensorMap, containing multiple blocks. We
can transform it to a single block containing a dense representation, with one
sample for each atom-centered environment by using keys_to_samples and
keys_to_properties
print("before: ", len(descriptor.keys))
descriptor = descriptor.keys_to_samples("center_type")
descriptor = descriptor.keys_to_properties(["neighbor_1_type", "neighbor_2_type"])
print("after: ", len(descriptor.keys))
before: 40
after: 1
you can now use descriptor.block().values as the input of a machine
learning algorithm
print(descriptor.block().values.shape)
(1380, 2450)
use metatensor::Labels;
use featomic::{Calculator, System, CalculationOptions};
use chemfiles::{Trajectory, Frame};
fn read_systems_from_file(path: &str) -> Vec<Box<dyn System>> {
let mut trajectory = Trajectory::open(path, 'r').expect("could not open the trajectory");
let mut frame = Frame::new();
let mut systems = Vec::new();
for step in 0..trajectory.nsteps() {
trajectory.read_step(step, &mut frame).expect("failed to read single frame");
systems.push((&frame).into());
}
systems
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
// load the systems from command line argument
let path = std::env::args().nth(1).expect("expected a command line argument");
let mut systems = read_systems_from_file(&path);
// pass hyper-parameters as JSON
let parameters = r#"{
"cutoff": {
"radius": 5.0,
"smoothing": {
"type": "ShiftedCosine",
"width": 0.5
}
},
"density": {
"type": "Gaussian",
"width": 0.3
},
"basis": {
"type": "TensorProduct",
"max_angular": 4,
"radial": {"type": "Gto", "max_radial": 6}
}
}"#;
// create the calculator with its name and parameters
let mut calculator = Calculator::new("soap_power_spectrum", parameters.to_owned())?;
// create the options for a single calculation, here we request the
// calculation of gradients with respect to positions
let options = CalculationOptions {
gradients: &["positions"],
..Default::default()
};
// run the calculation
let descriptor = calculator.compute(&mut systems, options)?;
// Transform the descriptor to dense representation, with one sample for
// each atom-centered environment, and the neighbor atomic types part of the
// properties
let keys_to_move = Labels::empty(vec!["center_type"]);
let descriptor = descriptor.keys_to_samples(&keys_to_move, /* sort_samples */ true)?;
let keys_to_move = Labels::empty(vec!["neighbor_1_type", "neighbor_2_type"]);
let descriptor = descriptor.keys_to_properties(&keys_to_move, /* sort_samples */ true)?;
// descriptor now contains a single block, which can be used as the input
// to standard ML algorithms
let values = descriptor.block_by_id(0).values().to_array();
println!("SOAP representation shape: {:?}", values.shape());
Ok(())
}
#include <iostream>
#include <chemfiles.hpp>
#include <featomic.hpp>
static std::vector<featomic::SimpleSystem> read_systems(const std::string& path);
int main(int argc, char* argv[]) {
if (argc < 2) {
std::cout << "error: expected a command line argument" << std::endl;
return 1;
}
auto systems = read_systems(argv[1]);
// pass hyper-parameters as JSON
const char* parameters = R"({
"cutoff": {
"radius": 5.0,
"smoothing": {"type": "ShiftedCosine", "width": 0.5}
},
"density": {
"type": "Gaussian",
"width": 0.3
},
"basis": {
"type": "TensorProduct",
"max_angular": 6,
"radial": {"type": "Gto", "max_radial": 6}
}
})";
// create the calculator with its name and parameters
auto calculator = featomic::Calculator("soap_power_spectrum", parameters);
// setup the calculation options
auto options = featomic::CalculationOptions();
options.use_native_system = true;
options.gradients.push_back("positions");
// run the calculation
auto descriptor = calculator.compute(systems, options);
// The descriptor is a metatensor `TensorMap`, containing multiple blocks.
// We can transform it to a single block containing a dense representation,
// with one sample for each atom-centered environment.
descriptor.keys_to_samples("center_type");
descriptor.keys_to_properties(std::vector<std::string>{"neighbor_1_type", "neighbor_2_type"});
// extract values from the descriptor in the only remaining block
auto block = descriptor.block_by_id(0);
auto values = block.values();
// you can now use values as the input of a machine learning algorithm
return 0;
}
std::vector<featomic::SimpleSystem> read_systems(const std::string& path) {
auto trajectory = chemfiles::Trajectory(path);
auto systems = std::vector<featomic::SimpleSystem>();
for (size_t step=0; step<trajectory.size(); step++) {
auto frame = trajectory.read_at(step);
auto matrix = frame.cell().matrix();
auto system = featomic::SimpleSystem(featomic::System::CellMatrix{{
{matrix[0][0], matrix[0][1], matrix[0][2]},
{matrix[1][0], matrix[1][1], matrix[1][2]},
{matrix[2][0], matrix[2][1], matrix[2][2]},
}});
auto positions = frame.positions();
for (size_t i=0; i<frame.size(); i++) {
system.add_atom(
static_cast<int32_t>(frame[i].atomic_number().value_or(0)),
{positions[i][0], positions[i][1], positions[i][2]}
);
}
systems.push_back(system);
}
return systems;
}
#include <assert.h>
#include <stdbool.h>
#include <stdio.h>
#include <featomic.h>
#include <metatensor.h>
#include "common/systems.h"
static mts_tensormap_t* move_keys_to_samples(mts_tensormap_t* descriptor, const char* keys_to_move[], size_t keys_to_move_len);
static mts_tensormap_t* move_keys_to_properties(mts_tensormap_t* descriptor, const char* keys_to_move[], size_t keys_to_move_len);
int main(int argc, char* argv[]) {
int status = FEATOMIC_SUCCESS;
featomic_calculator_t* calculator = NULL;
featomic_system_t* systems = NULL;
uintptr_t n_systems = 0;
double* values = NULL;
const uintptr_t* shape = NULL;
uintptr_t shape_count = 0;
bool got_error = true;
const char* keys_to_samples[] = {"center_type"};
const char* keys_to_properties[] = {"neighbor_1_type", "neighbor_2_type"};
// use the default set of options, computing all samples and all features,
// and including gradients with respect to positions
featomic_calculation_options_t options = {0};
const char* gradients_list[] = {"positions"};
options.gradients = gradients_list;
options.gradients_count = 1;
options.use_native_system = true;
mts_tensormap_t* descriptor = NULL;
mts_block_t* block = NULL;
mts_array_t array = {0};
// hyper-parameters for the calculation as JSON
const char* parameters = "{\n"
"\"cutoff\": {\n"
" \"radius\": 5.0,\n"
" \"smoothing\": {\"type\": \"ShiftedCosine\", \"width\": 0.5}\n"
"},\n"
"\"density\": {\n"
" \"type\": \"Gaussian\",\n"
" \"width\": 0.3\n"
"},\n"
"\"basis\": {\n"
" \"type\": \"TensorProduct\",\n"
" \"max_angular\": 6,\n"
" \"radial\": {\"type\": \"Gto\", \"max_radial\": 6}\n"
"}\n"
"}";
// load systems from command line arguments
if (argc < 2) {
printf("error: expected a command line argument");
goto cleanup;
}
status = read_systems_example(argv[1], &systems, &n_systems);
if (status != 0) {
goto cleanup;
}
// create the calculator with its name and parameters
calculator = featomic_calculator("soap_power_spectrum", parameters);
if (calculator == NULL) {
printf("Error: %s\n", featomic_last_error());
goto cleanup;
}
// run the calculation
status = featomic_calculator_compute(
calculator, &descriptor, systems, n_systems, options
);
if (status != FEATOMIC_SUCCESS) {
printf("Error: %s\n", featomic_last_error());
goto cleanup;
}
// The descriptor is a metatensor `TensorMap`, containing multiple blocks.
// We can transform it to a single block containing a dense representation,
// with one sample for each atom-centered environment.
descriptor = move_keys_to_samples(descriptor, keys_to_samples, 1);
if (descriptor == NULL) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
descriptor = move_keys_to_properties(descriptor, keys_to_properties, 2);
if (descriptor == NULL) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
// extract the unique block and corresponding values from the descriptor
status = mts_tensormap_block_by_id(descriptor, &block, 0);
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
status = mts_block_data(block, &array);
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
// callback the functions on the mts_array_t to extract the shape/data pointer
status = array.shape(array.ptr, &shape, &shape_count);
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
status = array.data(array.ptr, &values);
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
if (status != MTS_SUCCESS) {
printf("Error: %s\n", mts_last_error());
goto cleanup;
}
assert(shape_count == 2);
// you can now use `values` as the input of a machine learning algorithm
printf("the value array shape is %lu x %lu\n", shape[0], shape[1]);
got_error = false;
cleanup:
mts_tensormap_free(descriptor);
featomic_calculator_free(calculator);
free_systems_example(systems, n_systems);
if (got_error) {
return 1;
} else {
return 0;
}
}
mts_tensormap_t* move_keys_to_samples(mts_tensormap_t* descriptor, const char* keys_to_move[], size_t keys_to_move_len) {
mts_labels_t keys = {0};
mts_tensormap_t* moved_descriptor = NULL;
keys.names = keys_to_move;
keys.size = keys_to_move_len;
keys.values = NULL;
keys.count = 0;
moved_descriptor = mts_tensormap_keys_to_samples(descriptor, keys, true);
mts_tensormap_free(descriptor);
return moved_descriptor;
}
mts_tensormap_t* move_keys_to_properties(mts_tensormap_t* descriptor, const char* keys_to_move[], size_t keys_to_move_len) {
mts_labels_t keys = {0};
mts_tensormap_t* moved_descriptor = NULL;
keys.names = keys_to_move;
keys.size = keys_to_move_len;
keys.values = NULL;
keys.count = 0;
moved_descriptor = mts_tensormap_keys_to_properties(descriptor, keys, true);
mts_tensormap_free(descriptor);
return moved_descriptor;
}
#include "common/systems.c"