core.components.benchmark.benchmark_reducer#

Copyright (c) Meta Platforms, Inc. and affiliates.

This source code is licensed under the MIT license found in the LICENSE file in the root directory of this source tree.

Attributes#

R

M

Classes#

BenchmarkReducer

Benchmark reducer interface class.

JsonDFReducer

A common pandas DataFrame reducer for benchmarks

Module Contents#

core.components.benchmark.benchmark_reducer.R#
core.components.benchmark.benchmark_reducer.M#
class core.components.benchmark.benchmark_reducer.BenchmarkReducer#

Bases: fairchem.core.components.reducer.Reducer

Benchmark reducer interface class.

Note

When running with the fairchemv2 cli, the job_config and runner_config attributes are set at runtime to those given in the config file.

See the Reducer interface class for implementation details.

job_config#

a managed attribute that gives access to the job config

Type:

DictConfig

runner_config#

a managed attributed that gives access to the calling runner config

Type:

DictConfig

property runner_type: type[fairchem.core.components.calculate.calculate_runner.CalculateRunner]#

The runner type this reducer is associated with.

property glob_pattern#

Returns the glob pattern used to find result files from the runner.

property logger: fairchem.core.common.logger.WandBSingletonLogger | None#

Returns a logger instance if conditions are met, otherwise None.

Returns:

Logger instance if running on main rank with logging enabled

Return type:

WandBSingletonLogger or None

abstract join_results(results_dir: str, glob_pattern: str) R#

Join results from multiple files into a single result object.

Parameters:
  • results_dir – Directory containing result files

  • glob_pattern – Pattern to match result files

Returns:

Combined results object of type R

abstract save_results(results: R, results_dir: str) None#

Save joined results to file

Parameters:
  • results – results: Combined results from join_results

  • results_dir – Directory containing result files

abstract compute_metrics(results: R, run_name: str) M#

Compute metrics from the joined results.

Parameters:
  • results – Combined results from join_results

  • run_name – Name of the current run

Returns:

Metrics object of type M

abstract save_metrics(metrics: M, results_dir: str) None#

Save computed metrics to disk.

Parameters:
  • metrics – Metrics object to save

  • results_dir – Directory to save metrics to

abstract log_metrics(metrics: M, run_name: str)#

Log metrics to the configured logger.

Parameters:
  • metrics – Metrics object to log

  • run_name – Name of the current run

abstract save_state(checkpoint_location: str, is_preemption: bool = False) bool#

Save the current state of the reducer to a checkpoint.

Parameters:
  • checkpoint_location – Location to save the checkpoint

  • is_preemption – Whether the save is due to preemption

Returns:

Success status of the save operation

Return type:

bool

abstract load_state(checkpoint_location: str | None) None#

Load reducer state from a checkpoint.

Parameters:

checkpoint_location – Location to load the checkpoint from, or None

reduce()#

Join results, compute metrics, save and log resulting metrics.

Note

Re-implementing this method in derived classes is discouraged.

class core.components.benchmark.benchmark_reducer.JsonDFReducer(benchmark_name: str, target_data_path: str | None = None, target_data_keys: collections.abc.Sequence[str] | None = None, index_name: str | None = None)#

Bases: BenchmarkReducer

A common pandas DataFrame reducer for benchmarks

Results are assumed to be saved as json files that can be read into pandas dataframes. Only mean absolute error is computed for common columns in the predicted results and target data

index_name#
benchmark_name#
target_data#
target_data_keys#
static load_targets(path: str, index_name: str | None) pandas.DataFrame#

Load target data from a JSON file into a pandas DataFrame.

Parameters:
  • path – Path to the target JSON file

  • index_name – Optional name of the column to use as index

Returns:

DataFrame containing the target data, sorted by index

join_results(results_dir: str, glob_pattern: str) pandas.DataFrame#

Join results from multiple JSON files into a single DataFrame.

Parameters:
  • results_dir – Directory containing result files

  • glob_pattern – Pattern to match result files

Returns:

Combined DataFrame containing all results

save_results(results: pandas.DataFrame, results_dir: str) None#

Save joined results to a compressed json file

Parameters:
  • results – results: Combined results from join_results

  • results_dir – Directory containing result files

compute_metrics(results: pandas.DataFrame, run_name: str) pandas.DataFrame#

Compute mean absolute error metrics for common columns between results and targets.

Parameters:
  • results – DataFrame containing prediction results

  • run_name – Name of the current run, used as index in the metrics DataFrame

Returns:

DataFrame containing computed metrics with run_name as index

save_metrics(metrics: pandas.DataFrame, results_dir: str) None#

Save computed metrics to a compressed JSON file.

Parameters:
  • metrics – DataFrame containing the computed metrics

  • results_dir – Directory where metrics will be saved

log_metrics(metrics: pandas.DataFrame, run_name: str) None#

Log metrics to the configured logger if available.

Parameters:
  • metrics – DataFrame containing the computed metrics

  • run_name – Name of the current run

save_state(checkpoint_location: str, is_preemption: bool = False) bool#

Save the current state of the reducer to a checkpoint.

Parameters:
  • checkpoint_location – Location to save the checkpoint

  • is_preemption – Whether the save is due to preemption

Returns:

Success status of the save operation

Return type:

bool

load_state(checkpoint_location: str | None) None#

Load reducer state from a checkpoint.

Parameters:

checkpoint_location – Location to load the checkpoint from, or None