core.components.benchmark

core.components.benchmark#

Copyright (c) Meta Platforms, Inc. and affiliates.

This source code is licensed under the MIT license found in the LICENSE file in the root directory of this source tree.

Submodules#

Classes#

`JsonDFReducer`	A common pandas DataFrame reducer for benchmarks
`AdsorbMLReducer`	A common pandas DataFrame reducer for benchmarks
`AdsorptionReducer`	A common pandas DataFrame reducer for benchmarks
`Kappa103Reducer`	A common pandas DataFrame reducer for benchmarks
`MaterialsDiscoveryReducer`	A common pandas DataFrame reducer for benchmarks
`NVEMDReducer`	Benchmark reducer interface class.
`OMCPolymorphReducer`	A common pandas DataFrame reducer for benchmarks
`OMolReducer`	A common pandas DataFrame reducer for benchmarks
`InferenceBenchRunner`	Represents an abstraction over things that run in a loop and can save/load state.

Package Contents#

class core.components.benchmark.JsonDFReducer(benchmark_name: str, target_data_path: str | None = None, target_data_keys: collections.abc.Sequence[str] | None = None, index_name: str | None = None)#

Bases: BenchmarkReducer

A common pandas DataFrame reducer for benchmarks

Results are assumed to be saved as json files that can be read into pandas dataframes. Only mean absolute error is computed for common columns in the predicted results and target data

index_name#

benchmark_name#

target_data#

target_data_keys#

static load_targets(path: str, index_name: str | None) → pandas.DataFrame#

Load target data from a JSON file into a pandas DataFrame.

Parameters:

path – Path to the target JSON file
index_name – Optional name of the column to use as index

Returns:

DataFrame containing the target data, sorted by index

join_results(results_dir: str, glob_pattern: str) → pandas.DataFrame#

Join results from multiple JSON files into a single DataFrame.

Parameters:

results_dir – Directory containing result files
glob_pattern – Pattern to match result files

Returns:

Combined DataFrame containing all results

save_results(results: pandas.DataFrame, results_dir: str) → None#

Save joined results to a compressed json file

Parameters:

results – results: Combined results from join_results
results_dir – Directory containing result files

compute_metrics(results: pandas.DataFrame, run_name: str) → pandas.DataFrame#

Compute mean absolute error metrics for common columns between results and targets.

Parameters:

results – DataFrame containing prediction results
run_name – Name of the current run, used as index in the metrics DataFrame

Returns:

DataFrame containing computed metrics with run_name as index

save_metrics(metrics: pandas.DataFrame, results_dir: str) → None#

Save computed metrics to a compressed JSON file.

Parameters:

metrics – DataFrame containing the computed metrics
results_dir – Directory where metrics will be saved

log_metrics(metrics: pandas.DataFrame, run_name: str) → None#

Log metrics to the configured logger if available.

Parameters:

metrics – DataFrame containing the computed metrics
run_name – Name of the current run

save_state(checkpoint_location: str, is_preemption: bool = False) → bool#

Save the current state of the reducer to a checkpoint.

Parameters:

checkpoint_location – Location to save the checkpoint
is_preemption – Whether the save is due to preemption

Returns:

Success status of the save operation

Return type:

bool

load_state(checkpoint_location: str | None) → None#

Load reducer state from a checkpoint.

Parameters:: checkpoint_location – Location to load the checkpoint from, or None

class core.components.benchmark.AdsorbMLReducer(benchmark_name: str, target_data_key: str | None = None, index_name: str | None = None, threshold: float = 0.1)#

Bases: fairchem.core.components.benchmark.JsonDFReducer

A common pandas DataFrame reducer for benchmarks

Results are assumed to be saved as json files that can be read into pandas dataframes. Only mean absolute error is computed for common columns in the predicted results and target data

index_name#

benchmark_name#

target_data_key#

threshold#

compute_metrics(results: pandas.DataFrame, run_name: str) → pandas.DataFrame#

Compute mean absolute error metrics for common columns between results and targets.

Parameters:

results – DataFrame containing prediction results
run_name – Name of the current run, used as index in the metrics DataFrame

Returns:

DataFrame containing computed metrics with run_name as index

class core.components.benchmark.AdsorptionReducer(benchmark_name: str, target_data_key: str | None = None, index_name: str | None = None)#

Bases: fairchem.core.components.benchmark.JsonDFReducer

A common pandas DataFrame reducer for benchmarks

Results are assumed to be saved as json files that can be read into pandas dataframes. Only mean absolute error is computed for common columns in the predicted results and target data

index_name#

benchmark_name#

target_data_key#

compute_metrics(results: pandas.DataFrame, run_name: str) → pandas.DataFrame#

Compute mean absolute error metrics for common columns between results and targets.

Parameters:

results – DataFrame containing prediction results
run_name – Name of the current run, used as index in the metrics DataFrame

Returns:

DataFrame containing computed metrics with run_name as index

class core.components.benchmark.Kappa103Reducer(benchmark_name: str, target_data_path: str | None = None, target_data_keys: collections.abc.Sequence[str] | None = None, index_name: str | None = 'mp_id')#

Bases: fairchem.core.components.benchmark.JsonDFReducer

A common pandas DataFrame reducer for benchmarks

Results are assumed to be saved as json files that can be read into pandas dataframes. Only mean absolute error is computed for common columns in the predicted results and target data

property runner_type: type[fairchem.core.components.calculate.KappaRunner]#: The runner type this reducer is associated with.

compute_metrics(results: pandas.DataFrame, run_name: str) → pandas.DataFrame#

Compute Matbench discovery metrics for relaxed energy and structure predictions.

Parameters:

results – DataFrame containing prediction results with energy values
run_name – Identifier for the current evaluation run

Returns:

DataFrame containing computed metrics for different material subsets

save_state(checkpoint_location: str, is_preemption: bool = False) → bool#

Save the current state of the reducer to a checkpoint.

Parameters:

checkpoint_location – Location to save the checkpoint
is_preemption – Whether the save is due to preemption

Returns:

Success status of the save operation

Return type:

bool

load_state(checkpoint_location: str | None) → None#

Load reducer state from a checkpoint.

Parameters:: checkpoint_location – Location to load the checkpoint from, or None

class core.components.benchmark.MaterialsDiscoveryReducer(benchmark_name: str, target_data_path: str, cse_data_path: str | None = None, elemental_references_path: str | None = None, index_name: str | None = None, corrections: pymatgen.entries.compatibility.Compatibility | None = MP2020Compatibility, max_error_threshold: float = 5.0, analyze_geo_opt: bool = True, geo_symprec: float = 1e-05)#

Bases: fairchem.core.components.benchmark.JsonDFReducer

A common pandas DataFrame reducer for benchmarks

Results are assumed to be saved as json files that can be read into pandas dataframes. Only mean absolute error is computed for common columns in the predicted results and target data

_corrections#

_max_error_threshold#

_elemental_references_path#

_cse_data_path#

_analyze_geo_opt#

_geo_symprec#

property runner_type: type[fairchem.core.components.calculate.RelaxationRunner]#: The runner type this reducer is associated with.

static load_targets(path: str, index_name: str | None) → pandas.DataFrame#

Load target data from a JSON file into a pandas DataFrame.

Parameters:

path – Path to the target JSON file
index_name – Optional name of the column to use as index

Returns:

DataFrame containing the target data, sorted by index

static _load_elemental_ref_energies(elemental_references_path: str) → dict[str, float]#

static _load_computed_structure_entries(cse_data_path: str, results: pandas.DataFrame) → pandas.DataFrame#

Convert prediction results to computed structure entries with updated energies and structures.

Returns:: DataFrame of computed structure entries indexed by material IDs

_apply_corrections(computed_structure_entries: list[pymatgen.entries.computed_entries.ComputedStructureEntry]) → None#

Apply compatibility corrections to computed structure entries.

Parameters:: computed_structure_entries – List of ComputedStructureEntry objects to apply corrections to
Raises:: ValueError – If not all entries were successfully processed after applying corrections

_analyze_relaxed_geometry(pred_structures: dict[str, pymatgen.core.Structure], target_structures: dict[str, pymatgen.core.Structure]) → dict[str, float]#

Analyze geometry of relaxed structures and calculate RMSD wrt to the target structures.

Parameters:

pred_structures – Dictionary mapping material IDs to predicted Structure objects
target_structures – Dictionary mapping material IDs to target Structure objects

Returns:

Dictionary containing geometric analysis metrics

join_results(results_dir: str, glob_pattern: str) → pandas.DataFrame#

Join results from multiple relaxation JSON files into a single DataFrame.

Joins results for relaxed energy, applies compatibility corrections, and computes formation energy w.r.t to MP reference structures in MatBench Discovery

Parameters:

results_dir – Directory containing result files
glob_pattern – Pattern to match result files

Returns:

Combined DataFrame containing all results

save_results(results: pandas.DataFrame, results_dir: str) → None#

Save joined results to a single file

Saves the results in two formats: 1. CSV file containing only numerical data 2. JSON file containing all data including relaxed structures

Parameters:

results – DataFrame containing the prediction results
results_dir – Directory path where result files will be saved

compute_metrics(results: pandas.DataFrame, run_name: str) → pandas.DataFrame#

Compute Matbench discovery metrics for relaxed energy and structure predictions.

Parameters:

results – DataFrame containing prediction results with energy values
run_name – Identifier for the current evaluation run

Returns:

DataFrame containing computed metrics for different material subsets

log_metrics(metrics: pandas.DataFrame, run_name: str) → None#

Log metrics to the configured logger if available.

Parameters:

metrics – DataFrame containing the computed metrics
run_name – Name of the current run

save_state(checkpoint_location: str, is_preemption: bool = False) → bool#

Save the current state of the reducer to a checkpoint.

Parameters:

checkpoint_location – Location to save the checkpoint
is_preemption – Whether the save is due to preemption

Returns:

Success status of the save operation

Return type:

bool

load_state(checkpoint_location: str | None) → None#

Load reducer state from a checkpoint.

Parameters:: checkpoint_location – Location to load the checkpoint from, or None

class core.components.benchmark.NVEMDReducer(benchmark_name: str)#

Bases: fairchem.core.components.benchmark._benchmark_reducer.BenchmarkReducer

Benchmark reducer interface class.

Note

When running with the fairchemv2 cli, the job_config and runner_config attributes are set at runtime to those given in the config file.

See the Reducer interface class for implementation details.

job_config#

a managed attribute that gives access to the job config

Type:: DictConfig

runner_config#

a managed attributed that gives access to the calling runner config

Type:: DictConfig

benchmark_name#

property runner_type: type[fairchem.core.components.calculate.NVEMDRunner]#: The runner type this reducer is associated with.

join_results(results_dir: str, glob_pattern: str) → pandas.DataFrame#

Join results from multiple JSON files into a single DataFrame.

Parameters:

results_dir – Directory containing result files
glob_pattern – Pattern to match result files

Returns:

Combined DataFrame containing all results

save_results(results: list, results_dir: str) → None#

Save joined results to a compressed json file

Parameters:

results – results: Combined results from join_results
results_dir – Directory containing result files

compute_metrics(results: list, run_name: str) → pandas.DataFrame#

Compute Matbench discovery metrics for relaxed energy and structure predictions.

Parameters:

results – DataFrame containing prediction results with energy values
run_name – Identifier for the current evaluation run

Returns:

DataFrame containing computed metrics for different material subsets

save_metrics(metrics: pandas.DataFrame, results_dir: str) → None#

Save computed metrics to a compressed JSON file.

Parameters:

metrics – DataFrame containing the computed metrics
results_dir – Directory where metrics will be saved

log_metrics(metrics: pandas.DataFrame, run_name: str) → None#

Log metrics to the configured logger if available.

Parameters:

metrics – DataFrame containing the computed metrics
run_name – Name of the current run

save_state(checkpoint_location: str, is_preemption: bool = False) → bool#

Save the current state of the reducer to a checkpoint.

Parameters:

checkpoint_location – Location to save the checkpoint
is_preemption – Whether the save is due to preemption

Returns:

Success status of the save operation

Return type:

bool

load_state(checkpoint_location: str | None) → None#

Load reducer state from a checkpoint.

Parameters:: checkpoint_location – Location to load the checkpoint from, or None

class core.components.benchmark.OMCPolymorphReducer(benchmark_name: str, target_data_key: str, molecule_id_key: str, calculate_structural_metrics: bool = False, index_name: str | None = None)#

Bases: fairchem.core.components.benchmark.JsonDFReducer

A common pandas DataFrame reducer for benchmarks

Results are assumed to be saved as json files that can be read into pandas dataframes. Only mean absolute error is computed for common columns in the predicted results and target data

_molecule_id_key#

_calc_structural_metrics#

property runner_type: type[fairchem.core.components.calculate.SinglePointRunner | fairchem.core.components.calculate.RelaxationRunner]#: The runner type this reducer is associated with.

compute_metrics(results: pandas.DataFrame, run_name: str) → pandas.DataFrame#

Compute OMC polymorph metrics for single point or relaxed energy and structure predictions.

Parameters:

results – DataFrame containing prediction results with energy values
run_name – Identifier for the current evaluation run

Returns:

DataFrame containing computed metrics for different material subsets

save_state(checkpoint_location: str, is_preemption: bool = False) → bool#

Save the current state of the reducer to a checkpoint.

Parameters:

checkpoint_location – Location to save the checkpoint
is_preemption – Whether the save is due to preemption

Returns:

Success status of the save operation

Return type:

bool

load_state(checkpoint_location: str | None) → None#

Load reducer state from a checkpoint.

Parameters:: checkpoint_location – Location to load the checkpoint from, or None

class core.components.benchmark.OMolReducer(benchmark_name: str, evaluator: Callable | None = None, benchmark_labels: str | None = None)#

Bases: fairchem.core.components.benchmark.JsonDFReducer

A common pandas DataFrame reducer for benchmarks

Results are assumed to be saved as json files that can be read into pandas dataframes. Only mean absolute error is computed for common columns in the predicted results and target data

benchmark_name#

benchmark_labels#

evaluator#

join_results(results_dir: str, glob_pattern: str) → pandas.DataFrame#

Join results from multiple JSON files into a single DataFrame.

Parameters:

results_dir – Directory containing result files
glob_pattern – Pattern to match result files

Returns:

Combined DataFrame containing all results

save_results(results: pandas.DataFrame, results_dir: str) → None#

Save joined results to a compressed json file

Parameters:

results – results: Combined results from join_results
results_dir – Directory containing result files

compute_metrics(results: dict, run_name: str) → pandas.DataFrame#

Compute mean absolute error metrics for common columns between results and targets.

Parameters:

results – DataFrame containing prediction results
run_name – Name of the current run, used as index in the metrics DataFrame

Returns:

DataFrame containing computed metrics with run_name as index

class core.components.benchmark.InferenceBenchRunner(model_checkpoints: dict[str, str], natoms_list: list[int] | None = None, input_system: dict | None = None, timeiters: int = 10, repeats: int = 5, seed: int = 1, device='cuda', overrides: dict | None = None, inference_settings: fairchem.core.units.mlip_unit.api.inference.InferenceSettings = inference_settings_default(), generate_traces: bool = False, expand_supercells: int | None = None, dataset_name: str = 'omat')#

Bases: fairchem.core.components.runner.Runner

Represents an abstraction over things that run in a loop and can save/load state.

ie: Trainers, Validators, Relaxation all fall in this category.

Note

When running with the fairchemv2 cli, the job_config and attribute is set at runtime to those given in the config file.

job_config#

a managed attribute that gives access to the job config

Type:: DictConfig

natoms_list#

input_system#

device#

seed#

timeiters#

model_checkpoints#

overrides#

inference_settings#

generate_traces#

expand_supercells#

dataset_name#

repeats#

run() → None#

save_state(_)#

load_state(_)#