From aa8295bd8b3384de45bb231cc951a43b30e03977 Mon Sep 17 00:00:00 2001 From: Matteo Cusini Date: Mon, 9 Sep 2024 14:59:08 -0700 Subject: [PATCH 1/6] upgrade rockylinux images and clean up old files. --- docker/Stanford/Dockerfile | 1 - docker/TotalEnergies/pecan-CPU.cmake | 31 --- docker/TotalEnergies/pecan-GPU.cmake | 24 --- docker/TotalEnergies/pecan.Dockerfile | 193 ------------------ docker/tpl-centos-gcc-cuda.Dockerfile | 113 ---------- .../tpl-rockylinux-clang-cuda-12.Dockerfile | 2 +- docker/tpl-rockylinux-gcc-cuda-12.Dockerfile | 2 +- 7 files changed, 2 insertions(+), 364 deletions(-) delete mode 100644 docker/TotalEnergies/pecan-CPU.cmake delete mode 100644 docker/TotalEnergies/pecan-GPU.cmake delete mode 100644 docker/TotalEnergies/pecan.Dockerfile delete mode 100644 docker/tpl-centos-gcc-cuda.Dockerfile diff --git a/docker/Stanford/Dockerfile b/docker/Stanford/Dockerfile index 7615be1d..2a745259 100644 --- a/docker/Stanford/Dockerfile +++ b/docker/Stanford/Dockerfile @@ -3,7 +3,6 @@ ARG TMP_DIR=/tmp ARG SRC_DIR=$TMP_DIR/thirdPartyLibs ARG BLD_DIR=$TMP_DIR/build -# The docker base image can be pecan or pangea. ARG DOCKER_ROOT_IMAGE FROM $DOCKER_ROOT_IMAGE as tpl_toolchain_intersect_geosx_toolchain ARG SRC_DIR diff --git a/docker/TotalEnergies/pecan-CPU.cmake b/docker/TotalEnergies/pecan-CPU.cmake deleted file mode 100644 index 722ea2b4..00000000 --- a/docker/TotalEnergies/pecan-CPU.cmake +++ /dev/null @@ -1,31 +0,0 @@ -set(COMPILER_HOME "/apps/gcc/8.2.0/x86_64") -set(MPI_HOME "/hrtc/apps/mpi/openmpi/4.0.1/RDHPC/gcc/8.2.0") - -set(CMAKE_C_COMPILER ${COMPILER_HOME}/bin/gcc CACHE PATH "" FORCE) -set(CMAKE_CXX_COMPILER ${COMPILER_HOME}/bin/g++ CACHE PATH "" FORCE) -set(CMAKE_Fortran_COMPILER ${COMPILER_HOME}/bin/gfortran CACHE PATH "" FORCE) -set(ENABLE_FORTRAN OFF CACHE BOOL "" FORCE) - -set(ENABLE_MPI ON CACHE PATH "" FORCE) -set(ENABLE_OPENMP ON CACHE PATH "" FORCE) - -set(MPI_C_COMPILER "${MPI_HOME}/bin/mpicc" CACHE PATH "" FORCE) -set(MPI_CXX_COMPILER "${MPI_HOME}/bin/mpicxx" CACHE PATH "" FORCE) -set(MPI_Fortran_COMPILER "${MPI_HOME}/bin/mpifort" CACHE PATH "" FORCE) -set(MPIEXEC_EXECUTABLE "${MPI_HOME}/bin/mpirun" CACHE PATH "" FORCE) -#set(MPIEXEC_EXECUTABLE /apps/slurm/x86/20.02.0/bin/srun CACHE PATH "" FORCE) -#set(MPIEXEC_NUMPROC_FLAG "-p pecan -n" CACHE STRING "") -set(ENABLE_WRAP_ALL_TESTS_WITH_MPIEXEC ON CACHE BOOL "") - -set(ENABLE_GTEST_DEATH_TESTS ON CACHE BOOL "" FORCE) -set(ENABLE_CALIPER ON CACHE BOOL "") - -set(ENABLE_MKL ON CACHE BOOL "") -set(INTEL_ROOT "/apps/intel/2019/u5/compilers_and_libraries_2019.5.281/linux" ) -set(MKL_ROOT "${INTEL_ROOT}/mkl" ) -set(MKL_INCLUDE_DIRS ${MKL_ROOT}/include CACHE STRING "") -set(MKL_LIBRARIES ${MKL_ROOT}/lib/intel64/libmkl_intel_lp64.so - ${MKL_ROOT}/lib/intel64/libmkl_intel_thread.so - ${MKL_ROOT}/lib/intel64/libmkl_core.so - ${INTEL_ROOT}/compiler/lib/intel64_lin/libiomp5.so - CACHE STRING "") diff --git a/docker/TotalEnergies/pecan-GPU.cmake b/docker/TotalEnergies/pecan-GPU.cmake deleted file mode 100644 index ef6e4457..00000000 --- a/docker/TotalEnergies/pecan-GPU.cmake +++ /dev/null @@ -1,24 +0,0 @@ -# Retrieve the compilers, standard libraries... from the CPU configuration -include(${CMAKE_CURRENT_LIST_DIR}/pecan-CPU.cmake) - -# Now let's add what's dedicated to GPU. -set(ENABLE_CUDA ON CACHE PATH "" FORCE) -set(CUDA_TOOLKIT_ROOT_DIR /hrtc/apps/cuda/11.5.119/x86_64/centos7 CACHE PATH "") -set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "") -set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc CACHE STRING "") - -set(CUDA_ARCH sm_75 CACHE STRING "") -set(CMAKE_CUDA_ARCHITECTURES 75 CACHE STRING "") -set(CMAKE_CUDA_FLAGS "-restrict -arch ${CUDA_ARCH} --expt-relaxed-constexpr --expt-extended-lambda -Werror cross-execution-space-call,reorder,deprecated-declarations" CACHE STRING "") -set(CMAKE_CUDA_FLAGS_RELEASE "-O3 -DNDEBUG -Xcompiler -DNDEBUG -Xcompiler -O3" CACHE STRING "") -set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-g -lineinfo ${CMAKE_CUDA_FLAGS_RELEASE}" CACHE STRING "") -set(CMAKE_CUDA_FLAGS_DEBUG "-g -G -O0 -Xcompiler -O0" CACHE STRING "") - -# Current version of hypre does not build with GPU support inside of docker. -# Hypre's build system awaits to be embedded into a CUDA environment. -# This is a bit tedious to reproduce in docker environment. -# And since most recent version of hypre do build without this constraint. -# Let's wait for an upgrade on our side. -# In the mean time, if you need the GPU support for hypre, -# simply install the classical way, with some `module load cuda`. -#set(ENABLE_HYPRE_CUDA ON CACHE BOOL "" FORCE) diff --git a/docker/TotalEnergies/pecan.Dockerfile b/docker/TotalEnergies/pecan.Dockerfile deleted file mode 100644 index 3e19fbef..00000000 --- a/docker/TotalEnergies/pecan.Dockerfile +++ /dev/null @@ -1,193 +0,0 @@ -ARG GCC_VERSION=8.2.0 -ARG OPENMPI_VERSION=4.0.1 -ARG UCX_VERSION=1.3.0 -# Note that you need to define both SLURM_TARBALL and SLURM_HOME because one cannot be deduced from the other smoothly. -# It would require (heavy?) text processing for little benefit. -ARG SLURM_TARBALL=slurm-20-02-0-1.tar.gz -ARG SLURM_HOME=/apps/slurm/x86/20.02.0 - -ARG GCC_HOME=/apps/gcc/${GCC_VERSION}/x86_64 -ARG UCX_HOME=/hrtc/apps/devtools/ucx/${UCX_VERSION}/x86_64_nocuda/gcc/${GCC_VERSION} -ARG OPENMPI_HOME=/hrtc/apps/mpi/openmpi/${OPENMPI_VERSION}/RDHPC/gcc/${GCC_VERSION} -# While the installation directory is defined here, the patches and exact versions are still defined in the CUDA stage. -ARG CUDA_HOME=/hrtc/apps/cuda/11.5.119/x86_64/centos7 - -FROM centos:7.7.1908 AS shared_components - -RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo && \ - sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo && \ - sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo - -RUN yum update -y && \ - yum install -y \ - glibc-devel - -FROM shared_components AS gcc_stage - -ARG GCC_VERSION -ARG GCC_HOME - -# FIXME wget could be replaced by curl in the contrib/download_prerequisites script. To be challenged. -RUN yum install -y \ - make \ - gcc \ - gcc-c++ \ - wget \ - bzip2 \ - zlib-devel - -WORKDIR /tmp/src -RUN curl -s https://ftp.gnu.org/gnu/gcc/gcc-${GCC_VERSION}/gcc-${GCC_VERSION}.tar.gz | tar --strip-components=1 -xzf - -RUN ./contrib/download_prerequisites -RUN ./configure \ - --prefix=${GCC_HOME} \ - --disable-multilib \ - --with-system-zlib \ - --enable-threads=posix \ - --enable-languages=c,c++,fortran -RUN make -j $(nproc) && make install-strip - -FROM shared_components AS ucx_stage - -ARG UCX_VERSION -ARG UCX_HOME -ARG GCC_HOME - -COPY --from=gcc_stage ${GCC_HOME} ${GCC_HOME} - -ENV CC=${GCC_HOME}/bin/gcc \ - CXX=${GCC_HOME}/bin/g++ \ - FC=${GCC_HOME}/bin/gfortran \ - LD_LIBRARY_PATH=${GCC_HOME}/lib64 - -RUN yum install -y \ - make \ - numactl-devel - -WORKDIR /tmp/src -RUN curl -fsSL https://github.com/openucx/ucx/releases/download/v${UCX_VERSION}/ucx-${UCX_VERSION}.tar.gz | tar --strip-components=1 -xzf - -RUN ./configure \ - --prefix=${UCX_HOME} \ - --enable-mt \ - --disable-optimizations \ - --disable-logging \ - --disable-debug \ - --disable-assertions -RUN make -j $(nproc) && make install - -FROM shared_components AS slurm_stage - -ARG SLURM_TARBALL -ARG SLURM_HOME -ARG GCC_HOME - -COPY --from=gcc_stage ${GCC_HOME} ${GCC_HOME} - -ENV CC=${GCC_HOME}/bin/gcc \ - CXX=${GCC_HOME}/bin/g++ \ - FC=${GCC_HOME}/bin/gfortran \ - LD_LIBRARY_PATH=${GCC_HOME}/lib64 - -RUN yum install -y perl python3 file make - -WORKDIR /tmp/src -RUN curl -fsSL https://github.com/SchedMD/slurm/archive/${SLURM_TARBALL} | tar --strip-components=1 -xzf - -RUN ./configure --prefix=${SLURM_HOME} -RUN make -j $(nproc) && make install - -FROM shared_components AS openmpi_stage - -ARG SLURM_HOME -ARG UCX_HOME -ARG GCC_HOME -# FIXME so we do not use SLURM? -ARG OPENMPI_VERSION -ARG OPENMPI_HOME - -COPY --from=gcc_stage ${GCC_HOME} ${GCC_HOME} -COPY --from=ucx_stage ${UCX_HOME} ${UCX_HOME} -COPY --from=slurm_stage ${SLURM_HOME} ${SLURM_HOME} - -ENV CC=${GCC_HOME}/bin/gcc \ - CXX=${GCC_HOME}/bin/g++ \ - FC=${GCC_HOME}/bin/gfortran \ - LD_LIBRARY_PATH=${GCC_HOME}/lib64 - -RUN yum install -y \ - perl \ - make \ - zlib-devel \ - numactl-devel -# FIXME deal with the devel probably too much - -WORKDIR /tmp/src -RUN curl -fsSL https://download.open-mpi.org/release/open-mpi/v${OPENMPI_VERSION%.[0-9]*}/openmpi-${OPENMPI_VERSION}.tar.gz | tar --strip-components=1 -xzf - -RUN ./configure CC=$CC FC=$FC CXX=$CXX \ - --prefix=${OPENMPI_HOME} \ - --enable-static \ - --enable-smp-locks \ - --enable-mpi-thread-multiple \ - --with-slurm \ - --with-ucx=${UCX_HOME} \ - --with-ucx-libdir=${UCX_HOME}/lib \ - --with-io-romio-flags=--with-file-system=testfs+ufs+lustre -RUN make -j $(nproc) && make install - -FROM shared_components AS cuda_stage - -ARG GCC_HOME - -COPY --from=gcc_stage ${GCC_HOME} ${GCC_HOME} - -ARG CUDA_HOME - -# FIXME Why glibc-devel?!?!? -RUN yum install -y which glibc-devel - -ENV PATH=${GCC_HOME}/bin:${PATH} \ - LD_LIBRARY_PATH=${GCC_HOME}/lib64 - -WORKDIR /tmp/src -ADD https://developer.download.nvidia.com/compute/cuda/11.5.0/local_installers/cuda_11.5.0_495.29.05_linux.run . -RUN mkdir -p ${CUDA_HOME} -RUN sh cuda_11.5.0_495.29.05_linux.run --silent --toolkit --no-man-page --installpath=${CUDA_HOME} - -FROM shared_components AS intel_stage - -ARG SLURM_HOME -ARG UCX_HOME -ARG GCC_HOME -ARG OPENMPI_HOME -ARG CUDA_HOME - -COPY --from=gcc_stage ${GCC_HOME} ${GCC_HOME} -COPY --from=ucx_stage ${UCX_HOME} ${UCX_HOME} -COPY --from=slurm_stage ${SLURM_HOME} ${SLURM_HOME} -COPY --from=openmpi_stage ${OPENMPI_HOME} ${OPENMPI_HOME} -COPY --from=cuda_stage ${CUDA_HOME} ${CUDA_HOME} - -RUN yum install -y \ - numactl-devel -# FIXME maybe only numactl-libs? - -RUN rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB -RUN yum-config-manager \ - --add-repo https://yum.repos.intel.com/mkl/setup/intel-mkl.repo -RUN yum install -y \ - intel-mkl-2019.5-075.x86_64 && \ - yum clean all - -ARG PECAN_INTEL_HOME=/apps/intel/2019/u5 -ARG INTEL_DIR_NAME=compilers_and_libraries_2019.5.281 -RUN mkdir -p /apps/intel/2019/u5 && \ - ln -s /opt/intel/${INTEL_DIR_NAME} ${PECAN_INTEL_HOME}/${INTEL_DIR_NAME} - -# Exposing quite everything, making future modularization more complicated. -# Most likely there will be no future modularization! -ENV CC=${GCC_HOME}/bin/gcc \ - CXX=${GCC_HOME}/bin/g++ \ - FC=${GCC_HOME}/bin/gfortran \ - MPICC=${OPENMPI_HOME}/bin/mpicc \ - MPICXX=${OPENMPI_HOME}/bin/mpicxx \ - MPIFC=${OPENMPI_HOME}/bin/mpifort \ - LD_LIBRARY_PATH=${GCC_HOME}/lib64:${PECAN_INTEL_HOME}/${INTEL_DIR_NAME}/linux/mkl/lib/intel64:${PECAN_INTEL_HOME}/${INTEL_DIR_NAME}/compiler/lib/intel64:${OPENMPI_HOME}/lib diff --git a/docker/tpl-centos-gcc-cuda.Dockerfile b/docker/tpl-centos-gcc-cuda.Dockerfile deleted file mode 100644 index a02b350a..00000000 --- a/docker/tpl-centos-gcc-cuda.Dockerfile +++ /dev/null @@ -1,113 +0,0 @@ -# NOTE: see docker/tpl-ubuntu-gcc.Dockerfile for detailed comments -ARG TMP_DIR=/tmp -ARG SRC_DIR=$TMP_DIR/thirdPartyLibs -ARG BLD_DIR=$TMP_DIR/build - -FROM nvidia/cuda:11.8.0-devel-centos7 AS tpl_toolchain_intersect_geosx_toolchain -ARG SRC_DIR - -ARG INSTALL_DIR -ENV GEOSX_TPL_DIR=$INSTALL_DIR - -RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo && \ - sed -i s/^#.*baseurl=http/baseurl=https/g /etc/yum.repos.d/*.repo && \ - sed -i s/^mirrorlist=http/#mirrorlist=https/g /etc/yum.repos.d/*.repo - -# Using gcc 8.3.1 provided by the Software Collections (SCL). -RUN yum install -y \ - centos-release-scl - -# Modify the SCLo repository configuration -RUN sed -i 's|^mirrorlist=|#mirrorlist=|g' /etc/yum.repos.d/CentOS-SCLo-scl.repo && \ - sed -i 's|^baseurl=http://mirror.centos.org/centos/\$releasever/sclo/\$basearch/rh|baseurl=http://vault.centos.org/7.9.2009/sclo/x86_64/rh|g' /etc/yum.repos.d/CentOS-SCLo-scl.repo && \ - sed -i 's|^mirrorlist=|#mirrorlist=|g' /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo && \ - sed -i 's|^baseurl=http://mirror.centos.org/centos/\$releasever/sclo/\$basearch/rh|baseurl=http://vault.centos.org/7.9.2009/sclo/x86_64/rh|g' /etc/yum.repos.d/CentOS-SCLo-scl-rh.repo - -# Install necessary tools and update the system -RUN yum -y update && \ - yum -y install yum-utils - -RUN yum install -y \ - devtoolset-8-gcc \ - devtoolset-8-gcc-c++ \ - devtoolset-8-gcc-gfortran - -# Installing dependencies -RUN yum -y install \ - ca-certificates \ - curl \ - tbb \ - blas-devel \ - lapack-devel \ - zlib-devel \ - openmpi-devel \ - python3 - -RUN --mount=src=.,dst=$SRC_DIR $SRC_DIR/docker/install-cmake.sh - -ENV CC=/opt/rh/devtoolset-8/root/usr/bin/gcc \ - CXX=/opt/rh/devtoolset-8/root/usr/bin/g++ \ - MPICC=/usr/lib64/openmpi/bin/mpicc \ - MPICXX=/usr/lib64/openmpi/bin/mpicxx \ - MPIEXEC=/usr/lib64/openmpi/bin/mpirun -ENV OMPI_CC=$CC \ - OMPI_CXX=$CXX -ENV ENABLE_CUDA=ON \ - CMAKE_CUDA_FLAGS="-restrict -arch sm_70 --expt-extended-lambda -Werror cross-execution-space-call,reorder,deprecated-declarations" - -# Installing TPL's -FROM tpl_toolchain_intersect_geosx_toolchain AS tpl_toolchain -ARG SRC_DIR -ARG BLD_DIR - -ENV FC=/opt/rh/devtoolset-8/root/usr/bin/gfortran \ - MPIFC=/usr/lib64/openmpi/bin/mpifort -ENV OMPI_FC=$FC - -RUN yum install -y \ - tbb-devel \ - make \ - bc \ - file \ - bison \ - flex \ - patch - -ARG HOST_CONFIG - -ARG CUDA_TOOLKIT_ROOT_DIR=/usr/local/cuda -ARG CUDA_ARCH=sm_70 -ARG CMAKE_CUDA_COMPILER=$CUDA_TOOLKIT_ROOT_DIR/bin/nvcc -ARG CMAKE_CUDA_ARCHITECTURES=70 - -ENV HYPRE_CUDA_SM=70 -ENV CUDA_HOME=$CUDA_TOOLKIT_ROOT_DIR - -RUN --mount=src=.,dst=$SRC_DIR $SRC_DIR/docker/configure-tpl.sh \ - -DENABLE_CUDA=$ENABLE_CUDA \ - -DENABLE_HYPRE_DEVICE="CUDA" \ - -DCUDA_TOOLKIT_ROOT_DIR=$CUDA_TOOLKIT_ROOT_DIR \ - -DCUDA_ARCH=$CUDA_ARCH \ - -DCMAKE_CUDA_ARCHITECTURES=$CMAKE_CUDA_ARCHITECTURES \ - -DCMAKE_CUDA_COMPILER=$CMAKE_CUDA_COMPILER -WORKDIR $BLD_DIR -RUN --mount=src=.,dst=$SRC_DIR make - -# Extract only TPL's from previous stage -FROM tpl_toolchain_intersect_geosx_toolchain AS geosx_toolchain -ARG SRC_DIR - -COPY --from=tpl_toolchain $GEOSX_TPL_DIR $GEOSX_TPL_DIR -RUN yum install -y \ - openssh-client \ - ca-certificates \ - curl \ - python3 \ - texlive \ - graphviz \ - git - -RUN --mount=src=.,dst=$SRC_DIR $SRC_DIR/docker/install-ninja.sh - -RUN --mount=src=.,dst=$SRC_DIR $SRC_DIR/docker/install-sccache.sh -ENV SCCACHE=/opt/sccache/bin/sccache diff --git a/docker/tpl-rockylinux-clang-cuda-12.Dockerfile b/docker/tpl-rockylinux-clang-cuda-12.Dockerfile index 96d13da2..47ba2391 100644 --- a/docker/tpl-rockylinux-clang-cuda-12.Dockerfile +++ b/docker/tpl-rockylinux-clang-cuda-12.Dockerfile @@ -3,7 +3,7 @@ ARG TMP_DIR=/tmp ARG SRC_DIR=$TMP_DIR/thirdPartyLibs ARG BLD_DIR=$TMP_DIR/build -FROM nvidia/cuda:12.5.0-devel-rockylinux8 AS tpl_toolchain_intersect_geosx_toolchain +FROM nvidia/cuda:12.6.1-devel-rockylinux9 AS tpl_toolchain_intersect_geosx_toolchain ARG SRC_DIR ARG INSTALL_DIR diff --git a/docker/tpl-rockylinux-gcc-cuda-12.Dockerfile b/docker/tpl-rockylinux-gcc-cuda-12.Dockerfile index 5091d116..45f97751 100644 --- a/docker/tpl-rockylinux-gcc-cuda-12.Dockerfile +++ b/docker/tpl-rockylinux-gcc-cuda-12.Dockerfile @@ -2,7 +2,7 @@ ARG TMP_DIR=/tmp ARG SRC_DIR=$TMP_DIR/thirdPartyLibs ARG BLD_DIR=$TMP_DIR/build -FROM nvidia/cuda:12.5.0-devel-rockylinux8 AS tpl_toolchain_intersect_geosx_toolchain +FROM nvidia/cuda:12.6.1-devel-rockylinux9 AS tpl_toolchain_intersect_geosx_toolchain ARG SRC_DIR ARG INSTALL_DIR From e06a7c91b95ac06ae589abb026525af8ec960327 Mon Sep 17 00:00:00 2001 From: mfrigo Date: Mon, 9 Sep 2024 19:55:27 -0700 Subject: [PATCH 2/6] Sherlock image: bypassing EOF of centos and moving to cuda 12.4.0 --- .github/workflows/docker_build_tpls.yml | 14 ++++++++++++++ docker/Stanford/Sherlock.Dockerfile | 10 ++++++++-- ...ompi4.1.2-openblas0.3.10-cuda12.4.0-sm70.cmake} | 2 +- 3 files changed, 23 insertions(+), 3 deletions(-) rename docker/Stanford/{sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake => sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda12.4.0-sm70.cmake} (97%) diff --git a/.github/workflows/docker_build_tpls.yml b/.github/workflows/docker_build_tpls.yml index 58772621..92c14030 100644 --- a/.github/workflows/docker_build_tpls.yml +++ b/.github/workflows/docker_build_tpls.yml @@ -62,6 +62,20 @@ jobs: DOCKER_REPOSITORY: geosx/rockylinux8-clang17-cuda12.5 DOCKERFILE: docker/tpl-rockylinux-clang-cuda-12.Dockerfile RUNS_ON: Runner_4core_16GB + - name: Sherlock CPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, openblas 0.3.10, zlib 1.2.11) + DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-openblas0.3.10-zlib1.2.11 + DOCKERFILE: docker/Stanford/Dockerfile + DOCKER_ROOT_IMAGE: matteofrigo5/sherlock-gcc10.1.0-openmpi4.1.2-cuda12.4.0-openblas0.3.10-zlib1.2.11-no-geosx:0.0.2 + HOST_CONFIG: docker/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10.cmake + INSTALL_DIR_ROOT: /oak/stanford/groups/tchelepi/geos-sherlock/CPU + RUNS_ON: ubuntu-latest + - name: Sherlock GPU (centos 7.9.2009, gcc 10.1.0, open-mpi 4.1.2, cuda 12.4.0, openblas 0.3.10, zlib 1.2.11) + DOCKER_REPOSITORY: geosx/sherlock-gcc10.1.0-openmpi4.1.2-cuda12.4.0-openblas0.3.10-zlib1.2.11 + DOCKERFILE: docker/Stanford/Dockerfile + DOCKER_ROOT_IMAGE: matteofrigo5/sherlock-gcc10.1.0-openmpi4.1.2-cuda12.4.0-openblas0.3.10-zlib1.2.11-no-geosx:0.0.2 + HOST_CONFIG: docker/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda12.4.0-sm70.cmake + INSTALL_DIR_ROOT: /oak/stanford/groups/tchelepi/geos-sherlock/GPU + RUNS_ON: ubuntu-latest steps: - name: Checkout diff --git a/docker/Stanford/Sherlock.Dockerfile b/docker/Stanford/Sherlock.Dockerfile index ab3992f6..353874a1 100644 --- a/docker/Stanford/Sherlock.Dockerfile +++ b/docker/Stanford/Sherlock.Dockerfile @@ -11,8 +11,9 @@ ARG GCC_VERSION=10.1.0 ARG OPENMPI_VERSION=4.1.2 ARG OPENBLAS_VERSION=0.3.10 ARG ZLIB_VERSION=1.2.11 -ARG CUDA_VERSION=11.7.1 -ARG CUDA_SUBVERSION=515.65.01 +ARG CUDA_VERSION=12.4.0 +ARG CUDA_SUBVERSION=550.54.14 + # Main software root installation directory in SHERLOCK ARG SHERLOCK_ROOT_INSTALL_DIR=/share/software/user/open @@ -28,6 +29,11 @@ FROM centos:7.9.2009 AS shared_components RUN yum install -y \ glibc-devel +# I need these lines because centos 7.9 has reached EOL +RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/CentOS-*.repo +RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/CentOS-*.repo +RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/CentOS-*.repo + # We'll compile and deploy a version of `gcc` in this stage. FROM shared_components AS gcc_stage diff --git a/docker/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake b/docker/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda12.4.0-sm70.cmake similarity index 97% rename from docker/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake rename to docker/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda12.4.0-sm70.cmake index 51031871..4185dfad 100644 --- a/docker/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda11.7.1-sm70.cmake +++ b/docker/Stanford/sherlock-gcc10-ompi4.1.2-openblas0.3.10-cuda12.4.0-sm70.cmake @@ -5,7 +5,7 @@ set(ENABLE_OPENMP OFF CACHE BOOL "" FORCE) # CUDA options set(ENABLE_CUDA ON CACHE BOOL "" FORCE) -set(CUDA_VERSION "11.7.1" CACHE PATH "") +set(CUDA_VERSION "12.4.0" CACHE PATH "") set(CUDA_HOME "${SOFTWARE_ROOT}/cuda/${CUDA_VERSION}" CACHE PATH "") set(CMAKE_CUDA_ARCHITECTURES "70" CACHE STRING "") set(CUDA_ARCH "sm_${CMAKE_CUDA_ARCHITECTURES}" CACHE STRING "") From a3198c4df78eee2c9389af40c4b4adcc2f49894c Mon Sep 17 00:00:00 2001 From: Matteo Cusini <49037133+CusiniM@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:14:17 -0700 Subject: [PATCH 3/6] Apply suggestions from code review --- docker/tpl-rockylinux-clang-cuda-12.Dockerfile | 2 +- docker/tpl-rockylinux-gcc-cuda-12.Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/tpl-rockylinux-clang-cuda-12.Dockerfile b/docker/tpl-rockylinux-clang-cuda-12.Dockerfile index 47ba2391..96d13da2 100644 --- a/docker/tpl-rockylinux-clang-cuda-12.Dockerfile +++ b/docker/tpl-rockylinux-clang-cuda-12.Dockerfile @@ -3,7 +3,7 @@ ARG TMP_DIR=/tmp ARG SRC_DIR=$TMP_DIR/thirdPartyLibs ARG BLD_DIR=$TMP_DIR/build -FROM nvidia/cuda:12.6.1-devel-rockylinux9 AS tpl_toolchain_intersect_geosx_toolchain +FROM nvidia/cuda:12.5.0-devel-rockylinux8 AS tpl_toolchain_intersect_geosx_toolchain ARG SRC_DIR ARG INSTALL_DIR diff --git a/docker/tpl-rockylinux-gcc-cuda-12.Dockerfile b/docker/tpl-rockylinux-gcc-cuda-12.Dockerfile index 45f97751..8ceebdf3 100644 --- a/docker/tpl-rockylinux-gcc-cuda-12.Dockerfile +++ b/docker/tpl-rockylinux-gcc-cuda-12.Dockerfile @@ -2,7 +2,7 @@ ARG TMP_DIR=/tmp ARG SRC_DIR=$TMP_DIR/thirdPartyLibs ARG BLD_DIR=$TMP_DIR/build -FROM nvidia/cuda:12.6.1-devel-rockylinux9 AS tpl_toolchain_intersect_geosx_toolchain +FROM nvidia/cuda:12.6.1-devel-rockylinux8 AS tpl_toolchain_intersect_geosx_toolchain ARG SRC_DIR ARG INSTALL_DIR From afbe661407cdea71519be1e174cbf236cb1aa66c Mon Sep 17 00:00:00 2001 From: Matteo Cusini <49037133+CusiniM@users.noreply.github.com> Date: Wed, 11 Sep 2024 18:26:07 -0700 Subject: [PATCH 4/6] modified name. --- .github/workflows/docker_build_tpls.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker_build_tpls.yml b/.github/workflows/docker_build_tpls.yml index 92c14030..3ea3aa93 100644 --- a/.github/workflows/docker_build_tpls.yml +++ b/.github/workflows/docker_build_tpls.yml @@ -54,8 +54,8 @@ jobs: DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89 DOCKERFILE: docker/tpl-ubuntu-clang-cuda.Dockerfile RUNS_ON: ubuntu-latest - - name: Rockylinux (8, gcc 8.5, cuda 12.5) - DOCKER_REPOSITORY: geosx/rockylinux8-gcc8-cuda12.5 + - name: Rockylinux (8, gcc 8.5, cuda 12.6.1) + DOCKER_REPOSITORY: geosx/rockylinux8-gcc8-cuda12.6.1 DOCKERFILE: docker/tpl-rockylinux-gcc-cuda-12.Dockerfile RUNS_ON: Runner_4core_16GB - name: Rockylinux (8, clang 17.0.6, cuda 12.5) From 2f34b87a21dbd706e6c35cd7eabb569cdcaee37a Mon Sep 17 00:00:00 2001 From: Matteo Cusini Date: Thu, 12 Sep 2024 12:16:58 -0700 Subject: [PATCH 5/6] upgrade ubuntu image. --- .github/workflows/docker_build_tpls.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/docker_build_tpls.yml b/.github/workflows/docker_build_tpls.yml index 3ea3aa93..0dabe35c 100644 --- a/.github/workflows/docker_build_tpls.yml +++ b/.github/workflows/docker_build_tpls.yml @@ -20,12 +20,6 @@ jobs: matrix: include: - - name: Ubuntu (20.04, gcc 9.4.0, open-mpi 4.0.3) - DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc9 - DOCKERFILE: docker/tpl-ubuntu-gcc.Dockerfile - DOCKER_ROOT_IMAGE: ubuntu:20.04 - DOCKER_COMPILER_BUILD_ARG: "--build-arg GCC_MAJOR_VERSION=9" - RUNS_ON: ubuntu-latest - name: Ubuntu (20.04, gcc 10.5.0, open-mpi 4.0.3) - github codespaces DOCKER_REPOSITORY: geosx/ubuntu20.04-gcc10 DOCKERFILE: docker/tpl-ubuntu-gcc.Dockerfile @@ -50,6 +44,12 @@ jobs: DOCKERFILE: docker/tpl-ubuntu-clang.Dockerfile DOCKER_COMPILER_BUILD_ARG: "--build-arg CLANG_MAJOR_VERSION=15 --build-arg GCC_MAJOR_VERSION=11" RUNS_ON: ubuntu-latest + - name: Ubuntu (24.04, gcc 14, open-mpi) + DOCKER_REPOSITORY: geosx/ubuntu24.04-gcc14 + DOCKERFILE: docker/tpl-ubuntu-gcc.Dockerfile + DOCKER_ROOT_IMAGE: ubuntu:24.04 + DOCKER_COMPILER_BUILD_ARG: "--build-arg GCC_MAJOR_VERSION=14" + RUNS_ON: ubuntu-latest - name: Ubuntu (20.04, clang 10.0.0 + gcc 9.4.0, open-mpi 4.0.3, cuda-11.8.89) DOCKER_REPOSITORY: geosx/ubuntu20.04-clang10.0.0-cuda11.8.89 DOCKERFILE: docker/tpl-ubuntu-clang-cuda.Dockerfile From 3002c861964c8dae27de02674ab8a84575e06769 Mon Sep 17 00:00:00 2001 From: Matteo Cusini Date: Thu, 12 Sep 2024 13:40:00 -0700 Subject: [PATCH 6/6] modify libtbb2 for libtbb-dev --- docker/tpl-ubuntu-gcc.Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/tpl-ubuntu-gcc.Dockerfile b/docker/tpl-ubuntu-gcc.Dockerfile index c3117ed5..0d137f91 100644 --- a/docker/tpl-ubuntu-gcc.Dockerfile +++ b/docker/tpl-ubuntu-gcc.Dockerfile @@ -3,7 +3,7 @@ ARG TMP_DIR=/tmp ARG SRC_DIR=$TMP_DIR/thirdPartyLibs ARG BLD_DIR=$TMP_DIR/build -# Defining the building toolchain that are common to both GEOSX and its TPLs. +# Defining the building toolchain that are common to both GEOS and its TPLs. # The docker base image could be any version of ubuntu/debian (as long as package names are unchanged). ARG DOCKER_ROOT_IMAGE @@ -21,7 +21,7 @@ ARG SRC_DIR ARG INSTALL_DIR ENV GEOSX_TPL_DIR=$INSTALL_DIR -# The same distribution and Dockerfile can be used for the 8, 9 and 10 version of the GNU compilers. +# The same distribution and Dockerfile can be used for various versions of the GNU compilers. # The GCC_MAJOR_VERSION argument is here to parametrise (--build-arg) the build from the `docker build` command line. # Note that docker seems to forget about the ARGs after each FROM statement. # This is why we repeat it below. @@ -42,14 +42,14 @@ RUN DEBIAN_FRONTEND=noninteractive TZ=America/Los_Angeles \ # Note the difference between runtime and development packages. ca-certificates \ curl \ - libtbb2 \ + libtbb-dev \ libblas-dev \ liblapack-dev \ zlib1g-dev \ openmpi-bin \ libopenmpi-dev \ # Some of the TPL's make "extensive" use of python in their build. -# And we want to test GEOSX's python configuration script. +# And we want to test GEOS's python configuration script. # Unfortunately argparse (standard library's package used by GEOSX) # is not in the python-minimal package so we install the whole std lib. python3 @@ -67,7 +67,7 @@ ENV OMPI_CC=$CC \ OMPI_CXX=$CXX # This stage is dedicated to TPLs uniquely. -# A multi-stage build patern will allow to extract what we need for the GEOSX build. +# A multi-stage build patern will allow to extract what we need for the GEOS build. FROM tpl_toolchain_intersect_geosx_toolchain AS tpl_toolchain ARG SRC_DIR ARG BLD_DIR