core.launchers.api#
Copyright (c) Meta Platforms, Inc. and affiliates.
This source code is licensed under the MIT license found in the LICENSE file in the root directory of this source tree.
Attributes#
Classes#
Enum where members are also (and must be) strings |
|
Enum where members are also (and must be) strings |
|
Enum where members are also (and must be) strings |
|
Enum where members are also (and must be) strings |
|
Module Contents#
- core.launchers.api.ALLOWED_TOP_LEVEL_KEYS#
- core.launchers.api.LOG_DIR_NAME = 'logs'#
- core.launchers.api.CHECKPOINT_DIR_NAME = 'checkpoints'#
- core.launchers.api.RESULTS_DIR = 'results'#
- core.launchers.api.CONFIG_FILE_NAME = 'canonical_config.yaml'#
- core.launchers.api.PREEMPTION_STATE_DIR_NAME = 'preemption_state'#
- class core.launchers.api.SchedulerType#
Bases:
fairchem.core.common.utils.StrEnumEnum where members are also (and must be) strings
- LOCAL = 'local'#
- SLURM = 'slurm'#
- class core.launchers.api.DeviceType#
Bases:
fairchem.core.common.utils.StrEnumEnum where members are also (and must be) strings
- CPU = 'cpu'#
- CUDA = 'cuda'#
- class core.launchers.api.RunType#
Bases:
fairchem.core.common.utils.StrEnumEnum where members are also (and must be) strings
- RUN = 'run'#
- REDUCE = 'reduce'#
- class core.launchers.api.DistributedInitMethod#
Bases:
fairchem.core.common.utils.StrEnumEnum where members are also (and must be) strings
- TCP = 'tcp'#
- FILE = 'file'#
- class core.launchers.api.SlurmConfig#
- mem_gb: int = 80#
- timeout_hr: int = 168#
- cpus_per_task: int = 8#
- partition: str | None = None#
- qos: str | None = None#
- account: str | None = None#
- additional_parameters: dict | None = None#
- class core.launchers.api.SchedulerConfig#
- mode: SchedulerType#
- distributed_init_method: DistributedInitMethod#
- ranks_per_node: int = 1#
- num_nodes: int = 1#
- num_array_jobs: int = 1#
- slurm: SlurmConfig#
- use_ray: bool = False#
- ray_cluster: RayClusterConfig#
- class core.launchers.api.SlurmEnv#
- job_id: str | None = None#
- raw_job_id: str | None = None#
- array_job_id: str | None = None#
- array_task_id: str | None = None#
- restart_count: str | None = None#
- class core.launchers.api.Metadata#
- commit: str#
- log_dir: str#
- checkpoint_dir: str#
- results_dir: str#
- config_path: str#
- preemption_checkpoint_dir: str#
- cluster_name: str#
- array_job_num: int = 0#
- class core.launchers.api.JobConfig#
- run_name: str#
- timestamp_id: str#
- run_dir: str#
- device_type: DeviceType#
- debug: bool = False#
- scheduler: SchedulerConfig#
- logger: dict | None = None#
- seed: int = 0#
- deterministic: bool = False#
- runner_state_path: str | None = None#
- graph_parallel_group_size: int | None = None#
- __post_init__() None#