core._cli_hydra

Contents

core._cli_hydra#

Copyright (c) Facebook, Inc. and its affiliates.

This source code is licensed under the MIT license found in the LICENSE file in the root directory of this source tree.

Attributes#

Classes#

SchedulerType

str(object='') -> str

DeviceType

str(object='') -> str

RunType

str(object='') -> str

SlurmConfig

SchedulerConfig

SlurmEnv

Metadata

JobConfig

Submitit

Derived callable classes are requeued after timeout with their current

Functions#

_set_seeds(→ None)

_set_deterministic_mode(→ None)

_get_slurm_env(→ SlurmEnv)

remove_runner_state_from_submission(→ None)

map_job_config_to_dist_config(→ dict)

get_canonical_config(→ omegaconf.DictConfig)

get_hydra_config_from_yaml(→ omegaconf.DictConfig)

_runner_wrapper(config[, run_type])

main([args, override_args])

Module Contents#

core._cli_hydra.ALLOWED_TOP_LEVEL_KEYS#
core._cli_hydra.LOG_DIR_NAME = 'logs'#
core._cli_hydra.CHECKPOINT_DIR_NAME = 'checkpoints'#
core._cli_hydra.RESULTS_DIR = 'results'#
core._cli_hydra.CONFIG_FILE_NAME = 'canonical_config.yaml'#
core._cli_hydra.PREEMPTION_STATE_DIR_NAME = 'preemption_state'#
class core._cli_hydra.SchedulerType#

Bases: str, enum.Enum

str(object=’’) -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to ‘strict’.

LOCAL = 'local'#
SLURM = 'slurm'#
class core._cli_hydra.DeviceType#

Bases: str, enum.Enum

str(object=’’) -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to ‘strict’.

CPU = 'cpu'#
CUDA = 'cuda'#
class core._cli_hydra.RunType#

Bases: str, enum.Enum

str(object=’’) -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to ‘strict’.

RUN = 'run'#
REDUCE = 'reduce'#
class core._cli_hydra.SlurmConfig#
mem_gb: int = 80#
timeout_hr: int = 168#
cpus_per_task: int = 8#
partition: str | None = None#
qos: str | None = None#
account: str | None = None#
class core._cli_hydra.SchedulerConfig#
mode: SchedulerType#
ranks_per_node: int = 1#
num_nodes: int = 1#
num_array_jobs: int = 1#
slurm: SlurmConfig#
class core._cli_hydra.SlurmEnv#
job_id: str | None = None#
raw_job_id: str | None = None#
array_job_id: str | None = None#
array_task_id: str | None = None#
restart_count: str | None = None#
class core._cli_hydra.Metadata#
commit: str#
log_dir: str#
checkpoint_dir: str#
results_dir: str#
config_path: str#
preemption_checkpoint_dir: str#
cluster_name: str#
array_job_num: int = 0#
slurm_env: SlurmEnv#
class core._cli_hydra.JobConfig#
run_name: str#
timestamp_id: str#
run_dir: str#
device_type: DeviceType#
debug: bool = False#
scheduler: SchedulerConfig#
logger: dict | None = None#
seed: int = 0#
deterministic: bool = False#
runner_state_path: str | None = None#
metadata: Metadata | None = None#
graph_parallel_group_size: int | None = None#
__post_init__() None#
core._cli_hydra._set_seeds(seed: int) None#
core._cli_hydra._set_deterministic_mode() None#
core._cli_hydra._get_slurm_env() SlurmEnv#
core._cli_hydra.remove_runner_state_from_submission(log_folder: str, job_id: str) None#
class core._cli_hydra.Submitit#

Bases: submitit.helpers.Checkpointable

Derived callable classes are requeued after timeout with their current state dumped at checkpoint.

__call__ method must be implemented to make your class a callable.

Note

The following implementation of the checkpoint method resubmits the full current state of the callable (self) with the initial argument. You may want to replace the method to curate the state (dump a neural network to a standard format and remove it from the state so that not to pickle it) and change/remove the initial parameters.

config = None#
runner = None#
__call__(dict_config: omegaconf.DictConfig, run_type: RunType = RunType.RUN) None#
_init_logger() None#
checkpoint(*args, **kwargs) submitit.helpers.DelayedSubmission#

Resubmits the same callable with the same arguments

core._cli_hydra.map_job_config_to_dist_config(job_cfg: JobConfig) dict#
core._cli_hydra.get_canonical_config(config: omegaconf.DictConfig) omegaconf.DictConfig#
core._cli_hydra.get_hydra_config_from_yaml(config_yml: str, overrides_args: list[str]) omegaconf.DictConfig#
core._cli_hydra._runner_wrapper(config: omegaconf.DictConfig, run_type: RunType = RunType.RUN)#
core._cli_hydra.main(args: argparse.Namespace | None = None, override_args: list[str] | None = None)#