Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 22 additions & 6 deletions src/shapepipe/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

"""

import os
import sys
from datetime import datetime
from importlib.metadata import requires
Expand All @@ -22,12 +23,27 @@
from shapepipe.pipeline.job_handler import JobHandler
from shapepipe.pipeline.mpi_run import split_mpi_jobs, submit_mpi_jobs

try:
from mpi4py import MPI
except ImportError: # pragma: no cover
import_mpi = False
# Importing mpi4py initializes MPI immediately, which aborts the whole
# process when no MPI launcher is available — e.g. inside an
# ``srun``-launched shell on a SLURM cluster, where Open MPI detects the
# SLURM step environment, expects a PMI server that srun never started,
# and calls MPI_Abort before even ``shapepipe_run -h`` can print (#744).
# Only import (and hence initialize) MPI when a launcher environment is
# actually present: ``mpirun``/``orterun`` set OMPI_COMM_WORLD_SIZE,
# ``srun --mpi=pmi2`` sets PMI_RANK and ``srun --mpi=pmix`` sets
# PMIX_RANK. A bare ``shapepipe_run`` (login node, compute-node shell,
# container) runs in SMP mode without ever touching MPI.
_MPI_LAUNCHER_VARS = ("OMPI_COMM_WORLD_SIZE", "PMI_RANK", "PMIX_RANK")

if any(var in os.environ for var in _MPI_LAUNCHER_VARS):
try:
from mpi4py import MPI
except ImportError: # pragma: no cover
import_mpi = False
else:
import_mpi = True
else:
import_mpi = True
import_mpi = False


class ShapePipe:
Expand Down Expand Up @@ -178,7 +194,7 @@ def _check_dependencies(self):
module_dep = self._get_module_depends("depends") + __installs__
module_exe = self._get_module_depends("executes")

module_dep += ["mpi4py"] if import_mpi else module_dep
module_dep += ["mpi4py"] if import_mpi else []

exe_to_module = {
exe: module
Expand Down
54 changes: 54 additions & 0 deletions src/shapepipe/tests/test_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""UNIT TESTS FOR RUN.

This module contains unit tests for the shapepipe.run module, in
particular the MPI-launcher gating of the mpi4py import (#744): a bare
``shapepipe_run`` must never initialize MPI, otherwise the whole process
aborts inside an ``srun``-launched shell whose Open MPI lacks SLURM PMI
support.

:Author: Claude (on behalf of Cail Daley) <cail.daley@cea.fr>

"""

import os
import subprocess
import sys

import pytest

SNIPPET = "import shapepipe.run as r; print(r.import_mpi)"

# Env vars that either mark an MPI launcher (the gate) or make Open MPI
# believe it was direct-launched by srun (the failure mode under test).
_SCRUBBED_PREFIXES = ("OMPI_", "PMI_", "PMIX_", "SLURM_")


def _import_mpi_flag(extra_env):
"""Report shapepipe.run.import_mpi in a subprocess with a clean env."""
env = {
key: value
for key, value in os.environ.items()
if not key.startswith(_SCRUBBED_PREFIXES)
}
env.update(extra_env)
result = subprocess.run(
[sys.executable, "-c", SNIPPET],
env=env,
capture_output=True,
text=True,
)
assert result.returncode == 0, (
f"subprocess failed (exit {result.returncode}): {result.stderr}"
)
return result.stdout.strip()


def test_bare_launch_skips_mpi():
"""A bare launch (no MPI launcher env) must not import/init MPI."""
assert _import_mpi_flag({}) == "False"


def test_mpirun_launch_imports_mpi():
"""An mpirun-style env (OMPI_COMM_WORLD_SIZE) must import MPI."""
pytest.importorskip("mpi4py")
assert _import_mpi_flag({"OMPI_COMM_WORLD_SIZE": "1"}) == "True"
Loading