AdsorbML tutorial#

from fairchem.core.common.relaxation.ase_utils import OCPCalculator
import ase.io
from ase.optimize import BFGS

from fairchem.data.oc.core import Adsorbate, AdsorbateSlabConfig, Bulk, Slab
import os
from glob import glob
import pandas as pd
from fairchem.data.oc.utils import DetectTrajAnomaly
from fairchem.data.oc.utils.vasp import write_vasp_input_files

# Optional - see below
import numpy as np
from dscribe.descriptors import SOAP
from scipy.spatial.distance import pdist, squareform
from x3dase.visualize import view_x3d_n

Enumerate the adsorbate-slab configurations to run relaxations on#

AdsorbML incorporates random placement, which is especially useful for more complicated adsorbates which may have many degrees of freedom. I have opted sample a few random placements and a few heuristic. Here I am using *CO on copper (1,1,1) as an example.

bulk_src_id = "mp-30"
adsorbate_smiles = "*CO"

bulk = Bulk(bulk_src_id_from_db = bulk_src_id)
adsorbate = Adsorbate(adsorbate_smiles_from_db=adsorbate_smiles)
slabs = Slab.from_bulk_get_specific_millers(bulk = bulk, specific_millers=(1,1,1))

# There may be multiple slabs with this miller index.
# For demonstrative purposes we will take the first entry.
slab = slabs[0]
Downloading src/fairchem/data/oc/databases/pkls/bulks.pkl...
# Perform heuristic placements
heuristic_adslabs = AdsorbateSlabConfig(slabs[0], adsorbate, mode="heuristic")

# Perform random placements
# (for AdsorbML we use `num_sites = 100` but we will use 4 for brevity here)
random_adslabs = AdsorbateSlabConfig(slabs[0], adsorbate, mode="random_site_heuristic_placement", num_sites = 4)

Run ML relaxations:#

There are 2 options for how to do this.

  1. Using OCPCalculator as the calculator within the ASE framework

  2. By writing objects to lmdb and relaxing them using main.py in the ocp repo

(1) is really only adequate for small stuff and it is what I will show here, but if you plan to run many relaxations, you should definitely use (2). More details about writing lmdbs has been provided here - follow the IS2RS/IS2RE instructions. And more information about running relaxations once the lmdb has been written is here.

You need to provide the calculator with a path to a model checkpoint file. That can be downloaded here

from fairchem.core.common.relaxation.ase_utils import OCPCalculator
from fairchem.core.models.model_registry import model_name_to_local_file
import os

checkpoint_path = model_name_to_local_file('EquiformerV2-31M-S2EF-OC20-All+MD', local_cache='/tmp/fairchem_checkpoints/')

os.makedirs(f"data/{bulk}_{adsorbate}", exist_ok=True)

# Define the calculator
calc = OCPCalculator(checkpoint_path=checkpoint_path) # if you have a gpu, add `cpu=False` to speed up calculations

adslabs = [*heuristic_adslabs.atoms_list, *random_adslabs.atoms_list]
# Set up the calculator
for idx, adslab in enumerate(adslabs):
    adslab.calc = calc
    opt = BFGS(adslab, trajectory=f"data/{bulk}_{adsorbate}/{idx}.traj")
    opt.run(fmax=0.05, steps=100) # For the AdsorbML results we used fmax = 0.02 and steps = 300, but we will use less strict values for brevity.
/home/runner/work/fairchem/fairchem/src/fairchem/core/models/escn/so3.py:23: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  _Jd = torch.load(os.path.join(os.path.dirname(__file__), "Jd.pt"))
/home/runner/work/fairchem/fairchem/src/fairchem/core/models/scn/spherical_harmonics.py:23: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  _Jd = torch.load(os.path.join(os.path.dirname(__file__), "Jd.pt"))
/home/runner/work/fairchem/fairchem/src/fairchem/core/models/equiformer_v2/wigner.py:10: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  _Jd = torch.load(os.path.join(os.path.dirname(__file__), "Jd.pt"))
/home/runner/work/fairchem/fairchem/src/fairchem/core/models/equiformer_v2/layer_norm.py:75: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
  @torch.cuda.amp.autocast(enabled=False)
/home/runner/work/fairchem/fairchem/src/fairchem/core/models/equiformer_v2/layer_norm.py:175: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
  @torch.cuda.amp.autocast(enabled=False)
/home/runner/work/fairchem/fairchem/src/fairchem/core/models/equiformer_v2/layer_norm.py:263: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
  @torch.cuda.amp.autocast(enabled=False)
/home/runner/work/fairchem/fairchem/src/fairchem/core/models/equiformer_v2/layer_norm.py:357: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
  @torch.cuda.amp.autocast(enabled=False)
/home/runner/work/fairchem/fairchem/src/fairchem/core/common/relaxation/ase_utils.py:150: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
  checkpoint = torch.load(checkpoint_path, map_location=torch.device("cpu"))
WARNING:root:Detected old config, converting to new format. Consider updating to avoid potential incompatibilities.
WARNING:root:equiformer_v2 (EquiformerV2) class is deprecated in favor of equiformer_v2_backbone_and_heads  (EquiformerV2BackboneAndHeads)
/home/runner/work/fairchem/fairchem/src/fairchem/core/modules/normalization/normalizer.py:69: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
  "mean": torch.tensor(state_dict["mean"]),
WARNING:root:No seed has been set in modelcheckpoint or OCPCalculator! Results may not be reproducible on re-run
/home/runner/work/fairchem/fairchem/src/fairchem/core/trainers/ocp_trainer.py:451: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
  with torch.cuda.amp.autocast(enabled=self.scaler is not None):
      Step     Time          Energy          fmax
BFGS:    0 21:50:54       -0.391371        1.900899
BFGS:    1 21:50:56       -0.395039        2.309924
BFGS:    2 21:50:59       -0.428412        0.814332
BFGS:    3 21:51:02       -0.454615        0.796530
BFGS:    4 21:51:04       -0.507092        1.218886
BFGS:    5 21:51:07       -0.542908        0.276724
BFGS:    6 21:51:10       -0.543536        0.181136
BFGS:    7 21:51:12       -0.544438        0.358524
BFGS:    8 21:51:15       -0.546418        0.370567
BFGS:    9 21:51:17       -0.553526        0.268939
BFGS:   10 21:51:20       -0.556836        0.113263
BFGS:   11 21:51:23       -0.558598        0.109847
BFGS:   12 21:51:26       -0.559091        0.210901
BFGS:   13 21:51:28       -0.560620        0.181438
BFGS:   14 21:51:31       -0.560235        0.278524
BFGS:   15 21:51:33       -0.561613        0.100454
BFGS:   16 21:51:36       -0.563280        0.061414
BFGS:   17 21:51:39       -0.563258        0.075300
BFGS:   18 21:51:41       -0.563859        0.105138
BFGS:   19 21:51:44       -0.565240        0.114931
BFGS:   20 21:51:47       -0.566414        0.060449
BFGS:   21 21:51:49       -0.566214        0.047732
      Step     Time          Energy          fmax
BFGS:    0 21:51:52       -0.238505        1.115626
BFGS:    1 21:51:54       -0.269474        1.488330
BFGS:    2 21:51:57       -0.367652        1.837515
BFGS:    3 21:52:00       -0.440838        1.461116
BFGS:    4 21:52:02       -0.478828        0.621207
BFGS:    5 21:52:05       -0.496601        0.283607
BFGS:    6 21:52:08       -0.502604        0.156827
BFGS:    7 21:52:10       -0.503448        0.143278
BFGS:    8 21:52:13       -0.517408        0.164732
BFGS:    9 21:52:16       -0.521539        0.173601
BFGS:   10 21:52:18       -0.523101        0.083747
BFGS:   11 21:52:21       -0.526111        0.212284
BFGS:   12 21:52:24       -0.528776        0.291233
BFGS:   13 21:52:26       -0.530236        0.228941
BFGS:   14 21:52:29       -0.529788        0.139954
BFGS:   15 21:52:32       -0.528640        0.059005
BFGS:   16 21:52:34       -0.529172        0.091502
BFGS:   17 21:52:37       -0.532773        0.161912
BFGS:   18 21:52:39       -0.534419        0.104595
BFGS:   19 21:52:42       -0.536149        0.016058
      Step     Time          Energy          fmax
BFGS:    0 21:52:45       -0.310907        1.364807
BFGS:    1 21:52:47       -0.321717        1.642053
BFGS:    2 21:52:50       -0.357307        1.127896
BFGS:    3 21:52:53       -0.489981        0.397479
BFGS:    4 21:52:55       -0.492910        0.198253
BFGS:    5 21:52:58       -0.495735        0.188074
BFGS:    6 21:53:01       -0.510065        0.234184
BFGS:    7 21:53:03       -0.511904        0.161509
BFGS:    8 21:53:06       -0.515557        0.122404
BFGS:    9 21:53:09       -0.516587        0.181690
BFGS:   10 21:53:11       -0.518766        0.176865
BFGS:   11 21:53:14       -0.520422        0.167028
BFGS:   12 21:53:17       -0.521419        0.090325
BFGS:   13 21:53:19       -0.522455        0.045961
      Step     Time          Energy          fmax
BFGS:    0 21:53:22       -0.460599        1.845325
BFGS:    1 21:53:25       -0.458817        2.265166
BFGS:    2 21:53:27       -0.488197        0.675386
BFGS:    3 21:53:30       -0.505703        0.615144
BFGS:    4 21:53:33       -0.536102        1.160958
BFGS:    5 21:53:35       -0.555528        0.726693
BFGS:    6 21:53:38       -0.560840        0.208051
BFGS:    7 21:53:41       -0.562889        0.075991
BFGS:    8 21:53:43       -0.563395        0.141898
BFGS:    9 21:53:46       -0.563854        0.153328
BFGS:   10 21:53:49       -0.564715        0.096904
BFGS:   11 21:53:51       -0.565289        0.030101
      Step     Time          Energy          fmax
BFGS:    0 21:53:54       -0.345225        1.425721
BFGS:    1 21:53:57       -0.350880        1.787260
BFGS:    2 21:53:59       -0.380526        1.087931
BFGS:    3 21:54:02       -0.487299        1.542374
BFGS:    4 21:54:04       -0.502747        0.731063
BFGS:    5 21:54:07       -0.503430        0.303712
BFGS:    6 21:54:10       -0.505877        0.422102
BFGS:    7 21:54:12       -0.514497        0.711535
BFGS:    8 21:54:15       -0.523932        0.627515
BFGS:    9 21:54:17       -0.530560        0.308552
BFGS:   10 21:54:20       -0.534902        0.235303
BFGS:   11 21:54:23       -0.537158        0.483748
BFGS:   12 21:54:25       -0.542109        0.696068
BFGS:   13 21:54:28       -0.545244        0.519801
BFGS:   14 21:54:30       -0.546094        0.234405
BFGS:   15 21:54:33       -0.546727        0.145162
BFGS:   16 21:54:36       -0.547353        0.259752
BFGS:   17 21:54:38       -0.549069        0.345136
BFGS:   18 21:54:41       -0.550898        0.290284
BFGS:   19 21:54:43       -0.554138        0.131703
BFGS:   20 21:54:46       -0.554671        0.082699
BFGS:   21 21:54:49       -0.555166        0.211035
BFGS:   22 21:54:51       -0.555538        0.288984
BFGS:   23 21:54:54       -0.556777        0.283664
BFGS:   24 21:54:56       -0.556900        0.187666
BFGS:   25 21:54:59       -0.558718        0.088404
BFGS:   26 21:55:02       -0.559720        0.097630
BFGS:   27 21:55:04       -0.560398        0.172980
BFGS:   28 21:55:07       -0.561608        0.201558
BFGS:   29 21:55:09       -0.563842        0.159479
BFGS:   30 21:55:12       -0.564775        0.045841
      Step     Time          Energy          fmax
BFGS:    0 21:55:14       -0.406964        1.750304
BFGS:    1 21:55:17       -0.406263        2.153095
BFGS:    2 21:55:20       -0.434780        0.830246
BFGS:    3 21:55:22       -0.465443        0.926775
BFGS:    4 21:55:25       -0.514382        1.425336
BFGS:    5 21:55:28       -0.539978        0.667856
BFGS:    6 21:55:30       -0.542923        0.206426
BFGS:    7 21:55:33       -0.542893        0.244127
BFGS:    8 21:55:36       -0.543767        0.365858
BFGS:    9 21:55:38       -0.548827        0.394697
BFGS:   10 21:55:41       -0.553839        0.248308
BFGS:   11 21:55:44       -0.556071        0.089981
BFGS:   12 21:55:46       -0.557083        0.237762
BFGS:   13 21:55:49       -0.558221        0.366610
BFGS:   14 21:55:51       -0.560680        0.368340
BFGS:   15 21:55:54       -0.561029        0.226291
BFGS:   16 21:55:57       -0.562043        0.069994
BFGS:   17 21:55:59       -0.562952        0.070878
BFGS:   18 21:56:02       -0.563575        0.121798
BFGS:   19 21:56:05       -0.563968        0.151496
BFGS:   20 21:56:07       -0.566332        0.137261
BFGS:   21 21:56:10       -0.565416        0.066056
BFGS:   22 21:56:13       -0.564767        0.039613
      Step     Time          Energy          fmax
BFGS:    0 21:56:15       -0.280445        1.099904
BFGS:    1 21:56:18       -0.294218        1.320595
BFGS:    2 21:56:21       -0.346874        1.484989
BFGS:    3 21:56:23       -0.320967        2.810294
BFGS:    4 21:56:26       -0.445420        0.474433
BFGS:    5 21:56:29       -0.462733        0.303569
BFGS:    6 21:56:31       -0.471744        0.366226
BFGS:    7 21:56:34       -0.475220        0.356226
BFGS:    8 21:56:37       -0.499918        0.311891
BFGS:    9 21:56:39       -0.503124        0.151150
BFGS:   10 21:56:42       -0.507992        0.369765
BFGS:   11 21:56:45       -0.513744        0.489930
BFGS:   12 21:56:47       -0.516483        0.339289
BFGS:   13 21:56:50       -0.518115        0.176953
BFGS:   14 21:56:52       -0.517795        0.125420
BFGS:   15 21:56:55       -0.517994        0.208962
BFGS:   16 21:56:58       -0.518409        0.247253
BFGS:   17 21:57:00       -0.521945        0.144050
BFGS:   18 21:57:03       -0.523472        0.058242
BFGS:   19 21:57:06       -0.523136        0.030632
      Step     Time          Energy          fmax
BFGS:    0 21:57:08       -0.304054        1.153055
BFGS:    1 21:57:11       -0.317349        1.438439
BFGS:    2 21:57:14       -0.360821        1.346647
BFGS:    3 21:57:16       -0.377894        1.888766
BFGS:    4 21:57:19       -0.465986        0.526045
BFGS:    5 21:57:22       -0.480075        0.272989
BFGS:    6 21:57:24       -0.486466        0.310522
BFGS:    7 21:57:27       -0.489288        0.319292
BFGS:    8 21:57:30       -0.504547        0.221060
BFGS:    9 21:57:32       -0.508582        0.154991
BFGS:   10 21:57:35       -0.511958        0.156013
BFGS:   11 21:57:37       -0.514305        0.176052
BFGS:   12 21:57:40       -0.519680        0.197151
BFGS:   13 21:57:43       -0.524766        0.160557
BFGS:   14 21:57:45       -0.526024        0.101224
BFGS:   15 21:57:48       -0.525868        0.111524
BFGS:   16 21:57:51       -0.526154        0.101521
BFGS:   17 21:57:53       -0.524787        0.067668
BFGS:   18 21:57:56       -0.525968        0.057480
BFGS:   19 21:57:59       -0.525583        0.039490

Parse the trajectories and post-process#

As a post-processing step we check to see if:

  1. the adsorbate desorbed

  2. the adsorbate disassociated

  3. the adsorbate intercalated

  4. the surface has changed

We check these because they effect our referencing scheme and may result in erroneous energies. For (4), the relaxed surface should really be supplied as well. It will be necessary when correcting the SP / RX energies later. Since we don’t have it here, we will ommit supplying it, and the detector will instead compare the initial and final slab from the adsorbate-slab relaxation trajectory. If a relaxed slab is provided, the detector will compare it and the slab after the adsorbate-slab relaxation. The latter is more correct! Note: for the results in the AdsorbML paper, we did not check if the adsorbate was intercalated (is_adsorbate_intercalated()) because it is a new addition.

# Iterate over trajs to extract results
results = []
for file in glob(f"data/{bulk}_{adsorbate}/*.traj"):
    rx_id = file.split("/")[-1].split(".")[0]
    traj = ase.io.read(file, ":")
    
    # Check to see if the trajectory is anomolous
    initial_atoms = traj[0]
    final_atoms = traj[-1]
    atom_tags = initial_atoms.get_tags()
    detector = DetectTrajAnomaly(initial_atoms, final_atoms, atom_tags)
    anom = (
        detector.is_adsorbate_dissociated()
        or detector.is_adsorbate_desorbed()
        or detector.has_surface_changed()
        or detector.is_adsorbate_intercalated()
    )
    rx_energy = traj[-1].get_potential_energy()
    results.append({"relaxation_idx": rx_id, "relaxed_atoms": traj[-1],
                    "relaxed_energy_ml": rx_energy, "anomolous": anom})
df = pd.DataFrame(results)
df
relaxation_idx relaxed_atoms relaxed_energy_ml anomolous
0 2 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.522455 False
1 4 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.564775 False
2 6 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.523136 False
3 0 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.566214 False
4 7 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.525583 False
5 3 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.565289 False
6 1 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.536149 False
7 5 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.564767 False
#scrap anomalies
df = df[~df.anomolous].copy().reset_index()

(Optional) Deduplicate structures#

We may have enumerated very similar structures or structures may have relaxed to the same configuration. For this reason, it is advantageous to cull systems if they are very similar. This results in marginal improvements in the recall metrics we calculated for AdsorbML, so it wasnt implemented there. It is, however, a good way to prevent wasteful VASP calculations. You can also imagine that if we would have enumerated 1000 configs per slab adsorbate combo rather than 100 for AdsorbML, it is more likely that having redundant systems would reduce performance, so its a good thing to keep in mind. This may be done by eye for a small number of systems, but with many systems it is easier to use an automated approach. Here is an example of one such approach, which uses a SOAP descriptor to find similar systems.

# Extract the configs and their energies
def deduplicate(configs_for_deduplication: list,
                adsorbate_binding_index: int,
                cosine_similarity = 1e-3,
               ):
    """
    A function that may be used to deduplicate similar structures.
    Among duplicate entries, the one with the lowest energy will be kept.
    
    Args:
        configs_for_deduplication: a list of ML relaxed adsorbate-
            surface configurations.
        cosine_similarity: The cosine simularity value above which,
            configurations are considered duplicate.
            
    Returns:
        (list): the indices of configs which should be kept as non-duplicate
    """
    
    energies_for_deduplication = np.array([atoms.get_potential_energy() for atoms in configs_for_deduplication])
    # Instantiate the soap descriptor
    soap = SOAP(
        species=np.unique(configs_for_deduplication[0].get_chemical_symbols()),
        r_cut = 2.0,
        n_max=6,
        l_max=3,
        periodic=True,
    )
    #Figure out which index cooresponds to 
    ads_len = list(configs_for_deduplication[0].get_tags()).count(2)
    position_idx = -1*(ads_len-adsorbate_binding_index)
    # Iterate over the systems to get the SOAP vectors
    soap_desc = []
    for config in configs_for_deduplication:
        soap_ex = soap.create(config, centers=[position_idx])
        soap_desc.extend(soap_ex)

    soap_descs = np.vstack(soap_desc)

    #Use euclidean distance to assess similarity
    distance = squareform(pdist(soap_descs, metric="cosine"))

    bool_matrix = np.where(distance <= cosine_similarity, 1, 0)
    # For configs that are found to be similar, just keep the lowest energy one
    idxs_to_keep = []
    pass_idxs = []
    for idx, row in enumerate(bool_matrix):
        if idx in pass_idxs:
            continue
            
        elif sum(row) == 1:
            idxs_to_keep.append(idx)
        else:
            same_idxs = [row_idx for row_idx, val in enumerate(row) if val == 1]
            pass_idxs.extend(same_idxs)
            # Pick the one with the lowest energy by ML
            min_e = min(energies_for_deduplication[same_idxs])
            idxs_to_keep.append(list(energies_for_deduplication).index(min_e))
    return idxs_to_keep
configs_for_deduplication =  df.relaxed_atoms.tolist()
idxs_to_keep = deduplicate(configs_for_deduplication, adsorbate.binding_indices[0])
# Flip through your configurations to check them out (and make sure deduplication looks good)
print(idxs_to_keep)
view_x3d_n(configs_for_deduplication[2].repeat((2,2,1)))
df = df.iloc[idxs_to_keep]
low_e_values = np.round(df.sort_values(by = "relaxed_energy_ml").relaxed_energy_ml.tolist()[0:5],3)
print(f"The lowest 5 energies are: {low_e_values}")
df
The lowest 5 energies are: [-0.566 -0.536 -0.526]
index relaxation_idx relaxed_atoms relaxed_energy_ml anomolous
4 4 7 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.525583 False
3 3 0 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.566214 False
6 6 1 (Atom('Cu', [-1.3000465215529715, 2.2517466275... -0.536149 False

Write VASP input files#

This assumes you have access to VASP pseudopotentials and the right environment variables configured for ASE. The default VASP flags (which are equivalent to those used to make OC20) are located in ocdata.utils.vasp. Alternatively, you may pass your own vasp flags to the write_vasp_input_files function as vasp_flags.

# Grab the 5 systems with the lowest energy
configs_for_dft = df.sort_values(by = "relaxed_energy_ml").relaxed_atoms.tolist()[0:5]
config_idxs = df.sort_values(by = "relaxed_energy_ml").relaxation_idx.tolist()[0:5]

# Write the inputs
for idx, config in enumerate(configs_for_dft):
    os.mkdir(f"data/{config_idxs[idx]}")
    write_vasp_input_files(config, outdir = f"data/{config_idxs[idx]}/")