core.units.mlip_unit.inference.inference_server_ray#

Sequential request server with parallel model execution Usage: python server.py –workers 4 –port 8000

Attributes#

Classes#

Functions#

main(cfg)

Module Contents#

core.units.mlip_unit.inference.inference_server_ray.ray_installed = True#
class core.units.mlip_unit.inference.inference_server_ray.MLIPWorker(worker_id: int, world_size: int, master_port: int, predictor_config: dict)#
worker_id#
predict_unit#
_distributed_setup(worker_id: int, master_port: int, world_size: int, device: str)#
predict(data: bytes)#
class core.units.mlip_unit.inference.inference_server_ray.MLIPInferenceServerWebSocket(predictor_config: dict, port=8001, num_workers=1)#
host = 'localhost'#
port#
num_workers#
predictor_config#
master_pg_port#
workers#
_setup_signal_handlers()#

Set up signal handlers for graceful shutdown

async handler(websocket)#
async start()#
run()#

Run the server (blocking)

shutdown()#

Shutdown the server and clean up Ray resources

core.units.mlip_unit.inference.inference_server_ray.main(cfg: omegaconf.DictConfig)#