diff --git a/README.rst b/README.rst index b21c55f1a..47e3677da 100644 --- a/README.rst +++ b/README.rst @@ -663,41 +663,46 @@ The host-specific parameters are configured in a simple YAML file (specified with the ``--mpi-config-file`` flag). The allowed keys are given in the following table; all are optional. -+----------------+------------------+----------+------------------------------+ -| Key | Type | Default | Description | -+================+==================+==========+==============================+ -| runner | str | "mpirun" | The primary command to use. | -+----------------+------------------+----------+------------------------------+ -| nproc_flag | str | "-n" | Flag to set number of | -| | | | processes to start. | -+----------------+------------------+----------+------------------------------+ -| default_nproc | int | 1 | Default number of processes. | -+----------------+------------------+----------+------------------------------+ -| extra_flags | List[str] | [] | A list of any other flags to | -| | | | be added to the runner's | -| | | | command line before | -| | | | the ``baseCommand``. | -+----------------+------------------+----------+------------------------------+ -| env_pass | List[str] | [] | A list of environment | -| | | | variables that should be | -| | | | passed from the host | -| | | | environment through to the | -| | | | tool (e.g., giving the | -| | | | node list as set by your | -| | | | scheduler). | -+----------------+------------------+----------+------------------------------+ -| env_pass_regex | List[str] | [] | A list of python regular | -| | | | expressions that will be | -| | | | matched against the host's | -| | | | environment. Those that match| -| | | | will be passed through. | -+----------------+------------------+----------+------------------------------+ -| env_set | Mapping[str,str] | {} | A dictionary whose keys are | -| | | | the environment variables set| -| | | | and the values being the | -| | | | values. | -+----------------+------------------+----------+------------------------------+ - ++----------------+------------------+------------+------------------------------+ +| Key | Type | Default | Description | ++================+==================+============+==============================+ +| runner | str | "mpirun" | The primary command to use. | ++----------------+------------------+------------+------------------------------+ +| nproc_flag | str | "-n" | Flag to set number of | +| | | | processes to start. | ++----------------+------------------+------------+------------------------------+ +| default_nproc | int | 1 | Default number of processes. | ++----------------+------------------+------------+------------------------------+ +| extra_flags | List[str] | [] | A list of any other flags to | +| | | | be added to the runner's | +| | | | command line before | +| | | | the ``baseCommand``. | ++----------------+------------------+------------+------------------------------+ +| env_pass | List[str] | [] | A list of environment | +| | | | variables that should be | +| | | | passed from the host | +| | | | environment through to the | +| | | | tool (e.g., giving the | +| | | | node list as set by your | +| | | | scheduler). | ++----------------+------------------+------------+------------------------------+ +| env_pass_regex | List[str] | [] | A list of python regular | +| | | | expressions that will be | +| | | | matched against the host's | +| | | | environment. Those that match| +| | | | will be passed through. | ++----------------+------------------+------------+------------------------------+ +| env_set | Mapping[str,str] | {} | A dictionary whose keys are | +| | | | the environment variables set| +| | | | and the values being the | +| | | | values. | ++----------------+------------------+------------+------------------------------+ +| shm_enabled | bool | True | Flag to control whether | +| | | | shared memory is used or not.| ++----------------+------------------+------------+------------------------------+ +| shm_dir | str | "/dev/shm" | Location to use for shared | +| | | | memory. | ++----------------+------------------+------------+------------------------------+ Enabling Fast Parser (experimental) =================================== diff --git a/cwltool/mpi.py b/cwltool/mpi.py index cea53e2f7..011d5f45c 100644 --- a/cwltool/mpi.py +++ b/cwltool/mpi.py @@ -23,6 +23,8 @@ def __init__( env_pass: list[str] | None = None, env_pass_regex: list[str] | None = None, env_set: Mapping[str, str] | None = None, + shm_enabled: bool = True, + shm_dir: str = "/dev/shm", # nosec B108 - required for MPI/shared memory in containers ) -> None: """ Initialize from the argument mapping. @@ -35,6 +37,8 @@ def __init__( env_pass: [] env_pass_regex: [] env_set: {} + shm_enabled: True + shm_dir: "/dev/shm Any unknown keys will result in an exception. """ @@ -45,6 +49,11 @@ def __init__( self.env_pass = env_pass or [] self.env_pass_regex = env_pass_regex or [] self.env_set = env_set or {} + self.shm_enabled = shm_enabled + # POSIX only contains functions to handle shared memory, but it does not + # specify the directory to be used, nor if a directory needs to be used + # at all -- ref: https://pubs.opengroup.org/onlinepubs/9699919799/ + self.shm_dir = shm_dir @classmethod def load(cls: type[MpiConfigT], config_file_name: str) -> MpiConfigT: diff --git a/cwltool/singularity.py b/cwltool/singularity.py index 11380e3ed..20cda3adc 100644 --- a/cwltool/singularity.py +++ b/cwltool/singularity.py @@ -1,5 +1,6 @@ """Support for executing Docker format containers using Singularity {2,3}.x or Apptainer 1.x.""" +import atexit import copy import hashlib import json @@ -11,7 +12,10 @@ import sys import threading from collections.abc import Callable, MutableMapping, MutableSequence +from contextlib import suppress +from importlib.resources import files as resource_files from subprocess import check_call, check_output, run # nosec +from tempfile import NamedTemporaryFile from typing import cast from cwl_utils.types import CWLDirectoryType, CWLFileType, CWLObjectType @@ -29,6 +33,7 @@ from .errors import WorkflowException from .job import ContainerCommandLineJob from .loghandler import _logger +from .mpi import MPIRequirementName from .pathmapper import MapperEnt, PathMapper from .singularity_utils import singularity_supports_userns from .utils import create_tmp_dir, ensure_non_writable, ensure_writable @@ -203,7 +208,7 @@ def __init__( hints: list[CWLObjectType], name: str, ) -> None: - """Builder for invoking the Singularty software container engine.""" + """Builder for invoking the Singularity software container engine.""" super().__init__(builder, joborder, make_path_mapper, requirements, hints, name) @staticmethod @@ -592,14 +597,55 @@ def create_runtime( """Return the Singularity runtime list of commands and options.""" any_path_okay = self.builder.get_requirement("DockerRequirement")[1] or False - runtime = [ - "singularity", - "--quiet", - "run" if (is_apptainer_1_1_or_newer() or is_version_3_10_or_newer()) else "exec", - "--contain", - "--ipc", - "--cleanenv", - ] + mpi_req, is_req = self.builder.get_requirement(MPIRequirementName) + mpi_enabled = mpi_req and is_req + mpi_config = runtime_context.mpi_config + mpi_env_vars_reference_file_name: str | None = None + runtime: list[str] = [] + if mpi_enabled: + # Save current environment variables. The ``singularity_wrapper.sh`` will + # diff it against the env vars produced by mpirun/srun/etc., and use the new + # env vars as SINGULARITYENV_... for Singularity. + with NamedTemporaryFile(mode="w+", delete=False) as f: + for k, v in os.environ.items(): + f.write(f"{k}={v}\n") + mpi_env_vars_reference_file_name = f.name + + def delete_mpi_baseline_env() -> None: + """Clean up the MPI baseline environment variables file at exit.""" + with suppress(FileNotFoundError): # pragma: no cover + os.remove(mpi_env_vars_reference_file_name) # pragma: no cover + + atexit.register(delete_mpi_baseline_env) + + runtime.extend( + [ + str(resource_files("cwltool") / "singularity_wrapper.sh"), + mpi_env_vars_reference_file_name, + "singularity", + ] + ) + else: + runtime.append("singularity") + + runtime.extend( + [ + "--quiet", + "run" if (is_apptainer_1_1_or_newer() or is_version_3_10_or_newer()) else "exec", + "--contain", + "--ipc", + "--cleanenv", + ] + ) + if mpi_enabled and mpi_config.shm_enabled: + # MPI implementations like OpenMPI and MPICH use shared memory. + self.append_volume( + runtime, + runtime_context.create_tmpdir(), + mpi_config.shm_dir, + writable=True, + ) + if is_apptainer_1_1_or_newer() or is_version_3_10_or_newer(): runtime.append("--no-eval") @@ -665,4 +711,4 @@ def create_runtime( if container_HOME: # Restore HOME if we removed it above. self.environment["HOME"] = container_HOME - return (runtime, None) + return runtime, None diff --git a/cwltool/singularity_wrapper.sh b/cwltool/singularity_wrapper.sh new file mode 100755 index 000000000..247c63c4d --- /dev/null +++ b/cwltool/singularity_wrapper.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +set -euo pipefail + +# singularity_wrapper.sh +# +# DESCRIPTION +# Wrapper around Singularity/Apptainer for CWL + MPI + Singularity. +# +# This script identifies environment variables added by an MPI launcher +# (e.g. srun, mpirun) and adds these environment variables as Singularity +# environment variables using the format ``SINGULARITYENV_$KEY=$VALUE``. +# +# This allows CWL (which uses ``--cleanenv``) to launch MPI + Singularity. +# +# USAGE +# singularity_wrapper.sh +# +# ARGUMENTS +# +# Path to the file containing KEY=VALUE pairs with the baseline env. +# +# +# Path to singularity/apptainer executable. +# +# [args...] +# Arguments passed to the singularity binary. +# +# EXAMPLE +# singularity_wrapper.sh env.txt singularity --cleanenv exec image.sif +# +# DEPENDENCIES +# It uses the following binaries: +# - printenv + +usage() { + cat >&2 < [args...] +EOF + exit 1 +} + +if [[ "${1:-}" == "--help" ]]; then + usage +fi + +[[ $# -ge 2 ]] || usage + +BASELINE_FILE="$1" +SINGULARITY_BIN="$2" +shift 2 + +if [[ ! -f "$BASELINE_FILE" ]]; then + echo "Error: baseline env file not found: $BASELINE_FILE" >&2 + exit 2 +fi + +# Read baseline env into a variable. +BASELINE_CONTENT=$'\n'"$(cat "$BASELINE_FILE")"$'\n' + +# Build new environment variables for Singularity (i.e. ``SINGULARITYENV_KEY=VALUE``). +# Excludes empty variables and variables whose name do not follow POSIX (e.g. some +# Bash environments on HPC clusters such as BSC MareNostrum5, ``BASH_FUNC_module%%=``). +while IFS='=' read -r k v; do + [[ -n "$k" ]] || continue + [[ "$k" =~ ^[A-Za-z_][A-Za-z0-9_]*$ ]] || continue + # If the current env doesn't exist (``! -z``) in the given baseline env (``BASE_ENV``), + # then we want to add it as ``--env`` in singularity. + # Check if the key exists in the BASELINE_CONTENT string in the + # form \n$KEY= (that's why we start the BASELINE and end it with \n). + if [[ ! "$BASELINE_CONTENT" == *$'\n'"$k"=* ]]; then + # Debug + # echo "Adding env var for Singularity command: SINGULARITYENV_$k=$v" >&2 + export "SINGULARITYENV_$k=$v" + fi +done < <(printenv) + +# Launch the Singularity binary. +exec "$SINGULARITY_BIN" "${@}" diff --git a/tests/test_mpi.py b/tests/test_mpi.py index 5ea2c7aa8..b13b6f8c2 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -3,6 +3,7 @@ import json import os.path import sys +import tempfile from collections.abc import Generator, MutableMapping from importlib.resources import files from io import StringIO @@ -10,7 +11,9 @@ from typing import Any, cast import pytest +from cwl_utils.types import CWLOutputType from ruamel.yaml.comments import CommentedMap, CommentedSeq +from schema_salad.avro import schema from schema_salad.avro.schema import Names from schema_salad.ref_resolver import file_uri from schema_salad.utils import yaml_no_ts @@ -18,11 +21,13 @@ import cwltool.load_tool import cwltool.singularity import cwltool.udocker +from cwltool.builder import Builder from cwltool.command_line_tool import CommandLineTool from cwltool.context import RuntimeContext from cwltool.main import main from cwltool.mpi import MpiConfig, MPIRequirementName - +from cwltool.stdfsaccess import StdFsAccess +from cwltool.update import INTERNAL_VERSION from .util import get_data, working_directory @@ -35,6 +40,8 @@ def test_mpi_conf_defaults() -> None: assert mpi.env_pass == [] assert mpi.env_pass_regex == [] assert mpi.env_set == {} + assert mpi.shm_dir == "/dev/shm" + assert mpi.shm_enabled is True def test_mpi_conf_unknownkeys() -> None: @@ -334,6 +341,120 @@ def test_singularity(schema_ext11: Names) -> None: assert jr is cwltool.singularity.SingularityCommandLineJob +def _make_fake_singularity() -> str: + tmpdir = tempfile.mkdtemp() + fake_path = Path(tmpdir) / "singularity" + with open(fake_path, "w") as f: + f.write("#!/bin/sh\n") + # It must print the version, as another test calls ``version()``. + f.write("echo 'singularity-ce version 3.11.5'\n") + + fake_path.chmod(0o755) + + return tmpdir + + +@pytest.mark.parametrize( + "requirements,shm_enabled,shm_dir,expected_command", + [ + ([], True, "/dev/shm", "singularity"), + ([], False, "/dev/shm", "singularity"), + ( + [CommentedMap({"class": MPIRequirementName, "processes": 1})], + True, + "/dev/shm", + "singularity_wrapper.sh", + ), + ( + [CommentedMap({"class": MPIRequirementName, "processes": 1})], + False, + "/dev/shm", + "singularity_wrapper.sh", + ), + ], + ids=[ + "No requirements, runs singularity, no shared mem used", + "No requirements, runs singularity, no shared mem used", + "MPIRequirement, runs mpirun, shared memory used", + "MPIRequirement, but no shared memory volume used", + ], +) +def test_singularity_create_runtime( + requirements: list[MutableMapping[str, CWLOutputType | None]], + shm_enabled: bool, + shm_dir: str, + expected_command: str, + schema_ext11: Names, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Tests that""" + runtime_context = RuntimeContext({}) + runtime_context.mpi_config.shm_dir = shm_dir + runtime_context.mpi_config.shm_enabled = shm_enabled + builder = Builder( + {}, + [], + [], + {}, + schema.Names(), + requirements, + [], + {}, + None, + None, + StdFsAccess, + StdFsAccess(""), + None, + 0.1, + True, + False, + False, + "no_listing", + runtime_context.get_outdir(), + runtime_context.get_tmpdir(), + runtime_context.get_stagedir(), + INTERNAL_VERSION, + "singularity", + ) + job = cwltool.singularity.SingularityCommandLineJob( + builder, {}, CommandLineTool.make_path_mapper, requirements=requirements, hints=[], name="" + ) + env = dict(os.environ) + # Inject a fake singularity into the $PATH. The reason for this, is that + # the MacOS GitHub Actions job fails when ``job.create_runtime`` gets + # called. Internally, it calls ``is_apptainer_1_1_or_newer``, which uses + # ``version_output = check_output(["singularity", "--version"], text=True).strip()``. + # The command call above will raise an exception (below) and crash pytest. + # ``FileNotFoundError: [Errno 2] No such file or directory: 'singularity'``. + fake_bin_dir = _make_fake_singularity() + with monkeypatch.context() as m: + m.setenv("PATH", fake_bin_dir + os.pathsep + os.environ["PATH"]) + env["PATH"] = fake_bin_dir + os.pathsep + env["PATH"] + + job.prepare_environment(runtime_context, env) + command, options = job.create_runtime(env, runtime_context) + + assert command + assert not options + + assert command[0] == expected_command or command[0].endswith(expected_command) + + mpi_req, is_req = builder.get_requirement(MPIRequirementName) + + def any_contains(haystack: list[str], needle: str) -> bool: + return any(needle in h for h in haystack) + + shared_memory_used = any_contains(command, shm_dir) + + if mpi_req and is_req: + if shm_enabled: + assert shared_memory_used, "Missing shared memory volume!" + else: + assert not shared_memory_used, "Shared memory volume not supposed to be used!" + else: + assert not shared_memory_used, "Shared memory volume used without MPIRequirement!" + + def test_udocker(schema_ext11: Names) -> None: rc, clt = mk_tool(schema_ext11, ["--udocker"], reqs=[mpiReq, containerReq]) clt._init_job({}, rc) diff --git a/tests/test_singularity_wrapper.py b/tests/test_singularity_wrapper.py new file mode 100644 index 000000000..052e632a0 --- /dev/null +++ b/tests/test_singularity_wrapper.py @@ -0,0 +1,85 @@ +"""Tests for the Shell wrapper of the Singularity command. + +This script tests a Shell script. This script does not contribute to the +project test coverage (although kcov, or bats+kcov could be used in the +future). +""" + +import os +import subprocess +from importlib.resources import files +from textwrap import dedent +from typing import TYPE_CHECKING + +import pytest + +if TYPE_CHECKING: + from pathlib import Path + + +@pytest.mark.parametrize( + "args,expected_return_code", + [(["--help"], 1), ([""], 1), (["singularity"], 1)], + ids=[ + "Print usage because user passed --help", + "Print usage because of missing args", + "Print usage because not all args provided", + ], +) +def test_wrapper_usage(args: list[str], expected_return_code: int) -> None: + """Test the usage of the Singularity wrapper is printed.""" + wrapper = str(files("cwltool") / "singularity_wrapper.sh") + command: list[str] = [wrapper] + args + result = subprocess.run(command, capture_output=True, text=True) + + assert result.returncode == expected_return_code + assert "Wrapper around Singularity/Apptainer for CWL + MPI + Singularity" in result.stderr + + +def test_wrapper_invalid_baseline_env_file() -> None: + """Test the script fails if the given file is not valid.""" + wrapper = str(files("cwltool") / "singularity_wrapper.sh") + command: list[str] = [wrapper, "parangaricutirimicuaro.dat", "foo"] + result = subprocess.run(command, capture_output=True, text=True) + + assert result.returncode == 2 + assert "file not found" in result.stderr + + +def test_wrapper_env_vars(tmp_path: "Path") -> None: + """Test that the wrapper script adds the new environment variables.""" + fake_singularity = tmp_path / "fake_singularity" + fake_singularity.write_text(dedent("""\ + #!/bin/bash + echo "Fake Singularity script" + env + """)) + fake_singularity.chmod(0o755) + + new_env_var = "TEST_WRAPPER_ENV_VARS_INJECTED_VAR" + + # Create the baseline environment variables file. + baseline_env = os.environ + assert new_env_var not in baseline_env, "The test needs a new env var!" + baseline = tmp_path / "baseline.env" + baseline.write_text("A=1\nB=2\n") + for k, v in baseline_env.items(): + baseline.write_text(f"{k}={v}") + + # Now pretend we are mpirun, and we are adding a new env var. + new_env = os.environ.copy() + new_env[new_env_var] = "42" + + wrapper = str(files("cwltool") / "singularity_wrapper.sh") + command: list[str] = [wrapper, str(baseline), str(fake_singularity), "--cleanenv"] + + result = subprocess.run(command, capture_output=True, text=True, env=new_env) + + assert result.returncode == 0 + # There, now the wrapper just runs `env`, and the output must + # contain the new environment variable. We know the wrapper + # must have worked because we have thew new variable in the + # output... + assert new_env_var in result.stdout + # And also because we have the new SINGULARITYENV_{new_env_var}! + assert f"SINGULARITYENV_{new_env_var}" in result.stdout