diff --git a/benchmarks/nemo/nemolite2d/psykal/Makefile b/benchmarks/nemo/nemolite2d/psykal/Makefile index dd96a5de..577332b1 100644 --- a/benchmarks/nemo/nemolite2d/psykal/Makefile +++ b/benchmarks/nemo/nemolite2d/psykal/Makefile @@ -45,7 +45,7 @@ COMMON_LIB = ${COMMON_DIR}/nemolite2d_common.a # The targets that this Makefile supports .PHONY: all nemolite2d_serial nemolite2d_omp nemolite2d_mpi nemolite2d_acc \ nemolite2d_hybrid nemolite2d_ocl nemolite2d_mpiocl nemolite2d_ompt \ - nemolite2d_ompt_mpi + nemolite2d_ompt_mpi nemolite2d_omp_gpu # We don't include the OpenACC or OpenCL targets here as they # have specialist compiler requirements so we require the user @@ -84,6 +84,20 @@ nemolite2d_omp: serial_libs nemolite2d_alg.f90 psyclone_scripts/omp_transform.py cd $@ && make nemolite2d @echo "OpenMP version prepared." +# OpenMP GPU offloading PSyclone code-generation of NemoLite2D with UM (not working) +nemolite2d_omp_gpu: serial_libs nemolite2d_alg.f90 psyclone_scripts/omp_gpu_transform.py + @echo "Generating NemoLite2D with OpenMP GPU offloading using omp_gpu_transform.py"; rm -rf $@; mkdir -p $@ + ${PSYCLONE} -nodm -s psyclone_scripts/omp_gpu_transform.py \ + -I ${COMMON_DIR} \ + -oalg $@/alg.f90 -opsy $@/psy.f90 nemolite2d_alg.f90 + # Each generated folder needs its purposely built inf_lib and timer_lib + cp ${INF_LIB} $@/. + cp ${TIMER_LIB} $@/libdl_timer.a + cp namelist $@/. + cp Makefile_gen $@/Makefile + cd $@ && make nemolite2dompgpu + @echo "OpenMP GPU offloading version prepared." + # OpenMP task PSyclone code-generation of NemoLite2D nemolite2d_ompt: serial_libs nemolite2d_alg.f90 psyclone_scripts/omp_task_transform.py @echo "Generating NemoLite2D with OpenMP using omp_task_transform.py"; rm -rf $@; mkdir -p $@ @@ -136,8 +150,7 @@ nemolite2d_hybrid: parallel_libs nemolite2d_alg.f90 psyclone_scripts/omp_transfo cd $@ && make nemolite2d @echo "Hybrid: MPI+OpenMP version prepared." -# OpenACC PSyclone code-generation of NemoLite2D (current PSyclone 1.9 will fail -# to generate the OpenACC for the GOcean API) +# OpenACC PSyclone code-generation of NemoLite2D (not working) nemolite2d_acc: serial_libs nemolite2d_alg.f90 psyclone_scripts/acc_transform.py @echo "Generating NemoLite2D with OpenACC"; rm -rf $@; mkdir -p $@ ${PSYCLONE} -nodm -s psyclone_scripts/acc_transform.py \ @@ -218,7 +231,7 @@ common_lib_mpi: clean: rm -rf nemolite2d_serial nemolite2d_omp nemolite2d_mpi rm -rf nemolite2d_hybrid nemolite2d_acc nemolite2d_ocl nemolite2d_mpiocl - rm -rf nemolite2d_ompt nemolite2d_ompt_mpi + rm -rf nemolite2d_ompt nemolite2d_ompt_mpi nemolite2d_omp_gpu libclean: ${MAKE} -C ${INF_DIR} distclean diff --git a/benchmarks/nemo/nemolite2d/psykal/Makefile_gen b/benchmarks/nemo/nemolite2d/psykal/Makefile_gen index 32a66171..40894514 100644 --- a/benchmarks/nemo/nemolite2d/psykal/Makefile_gen +++ b/benchmarks/nemo/nemolite2d/psykal/Makefile_gen @@ -58,6 +58,11 @@ nemolite2d: ${KERNELS} psy.o alg.o ${TIMER_LIB} ${INF_LIB} $(LDFLAGS) ${OMPFLAGS} +# Build NemoLite2d (this Makefile) but with the OMPTARGETFLAGS +nemolite2dompgpu: + $(MAKE) F90FLAGS="$(F90FLAGS) ${OMPTARGETFLAGS} ${UMEMFLAGS}" \ + LDFLAGS="$(LDFLAGS) ${OMPTARGETFLAGS} ${UMEMFLAGS}" nemolite2d + # Build NemoLite2D for OpenACC, uses the same target as NemoLite2D but with # additional Fortran flags. nemolite2dacc: diff --git a/benchmarks/nemo/nemolite2d/psykal/README.md b/benchmarks/nemo/nemolite2d/psykal/README.md index 005250c0..55bbb03c 100644 --- a/benchmarks/nemo/nemolite2d/psykal/README.md +++ b/benchmarks/nemo/nemolite2d/psykal/README.md @@ -32,10 +32,10 @@ The Makefile supports many targets: - nemolite2d_omp - OpenMP version with static scheduling - nemolite2d_mpi - MPI version for distributed memory parallelism - nemolite2d_hybrid - MPI and OpenMP combined version -- nemolite2d_acc - OpenACC (Not working - PSyclone will not generate it) +- nemolite2d_acc - OpenACC offloading (Not working - Checksums returns 0) +- nemolite2d_omp_gpu - OpenMP offloading (Not working - issue with GPU routine with derived types?) - nemolite2d_ocl - OpenCL version -- nemolite2_mpiocl - MPI and OpenCL combined version (Not working - Psyclone -will generate a OpenCL-only version) +- nemolite2_mpiocl - MPI and OpenCL combined version Executing `make ` will generate a folder with the same name as the target, containing at least a `alg.f90` and a `psy.f90` for the diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/acc_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/acc_transform.py index 7a3d116e..d381d3f1 100644 --- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/acc_transform.py +++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/acc_transform.py @@ -1,20 +1,29 @@ '''Python script intended to be passed to PSyclone's generate() function via the -s option. Performs OpenACC transformations. ''' +from psyclone.domain.common.transformations import KernelModuleInlineTrans from psyclone.psyGen import TransInfo from psyclone.psyir.nodes import Loop def trans(psy): ''' Take the supplied psy object, apply OpenACC transformations - to the schedule of invoke_0 and return the new psy object ''' + to the schedule of invoke_0 and return the new psy object + + :param psy: The PSy layer object to apply transformations to. + :type psy: :py:class:`psyclone.psyGen.PSy` + + :returns: the transformed PSy object. + :rtype: :py:class:`psyclone.psyGen.PSy` + + ''' tinfo = TransInfo() parallel_trans = tinfo.get_trans_name('ACCParallelTrans') loop_trans = tinfo.get_trans_name('ACCLoopTrans') enter_data_trans = tinfo.get_trans_name('ACCEnterDataTrans') routine_trans = tinfo.get_trans_name('ACCRoutineTrans') glo2arg_trans = tinfo.get_trans_name('KernelImportsToArguments') - inline_trans = tinfo.get_trans_name('KernelModuleInline') + inline_trans = KernelModuleInlineTrans() invoke = psy.invokes.get('invoke_0') schedule = invoke.schedule diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/ocl_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/ocl_transform.py index 8235d3a0..14456516 100644 --- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/ocl_transform.py +++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/ocl_transform.py @@ -30,8 +30,15 @@ def trans(psy): - ''' Transform the schedule for OpenCL generation ''' + ''' Transform the schedule for OpenCL generation. + :param psy: The PSy layer object to apply transformations to. + :type psy: :py:class:`psyclone.psyGen.PSy` + + :returns: the transformed PSy object. + :rtype: :py:class:`psyclone.psyGen.PSy` + + ''' # Import transformations tinfo = TransInfo() globaltrans = tinfo.get_trans_name('KernelImportsToArguments') diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_gpu_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_gpu_transform.py new file mode 100644 index 00000000..b7f995e4 --- /dev/null +++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_gpu_transform.py @@ -0,0 +1,60 @@ +''' Python script intended to be passed to PSyclone's generate() +function via the -s option. It applies OpenMP to every loop and +module-inlines all kernels in the schedule.''' + +from psyclone.domain.common.transformations import KernelModuleInlineTrans +from psyclone.psyir.nodes import Routine, Loop +from psyclone.psyir.transformations import OMPTargetTrans +from psyclone.psyir.transformations import OMPLoopTrans +from psyclone.psyir.transformations import TransformationError +from psyclone.psyir.transformations import \ + FoldConditionalReturnExpressionsTrans +from psyclone.transformations import \ + KernelImportsToArguments, OMPDeclareTargetTrans + + +def trans(psy): + ''' Add OpenMP offloading parallelism to the Schedule. + + :param psy: The PSy layer object to apply transformations to. + :type psy: :py:class:`psyclone.psyGen.PSy` + + :returns: the transformed PSy object. + :rtype: :py:class:`psyclone.psyGen.PSy` + + ''' + loop_trans = OMPLoopTrans() + target_trans = OMPTargetTrans() + module_inline_trans = KernelModuleInlineTrans() + imports_to_arguments = KernelImportsToArguments() + fold_trans = FoldConditionalReturnExpressionsTrans() + omp_declare = OMPDeclareTargetTrans() + + schedule = psy.invokes.invoke_list[0].schedule + + # Module-Inline and simplify all kernels in this Schedule + for kernel in schedule.kernels(): + imports_to_arguments.apply(kernel) + fold_trans.apply(kernel.get_kernel_schedule()) + module_inline_trans.apply(kernel) + kernel.lower_to_language_level() + + loop_trans.omp_directive = "teamsdistributeparalleldo" + loop_trans.omp_schedule = "none" + + # Add OpenMP target and parallelisation pragmas to all outer + # loops with a fixed collapse 2 clause + for outer_loop in schedule.walk(Loop, stop_type=Loop): + try: + loop_trans.apply(outer_loop) + outer_loop.parent.parent.collapse = 2 + target_trans.apply(outer_loop.parent.parent) + except TransformationError: + pass + + # Add an OpenMP declare target directive to each kernel subroutine + for kernel_subroutine in schedule.parent.walk(Routine): + if not kernel_subroutine.name.startswith("invoke_"): + omp_declare.apply(kernel_subroutine) + + return psy diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_task_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_task_transform.py index 17be3e07..e0cfa8d2 100644 --- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_task_transform.py +++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_task_transform.py @@ -2,17 +2,26 @@ function via the -s option. It applies OpenMP tasking to every loop and inlines all kernels in the schedule.''' +from psyclone.domain.common.transformations import KernelModuleInlineTrans from psyclone.psyir.nodes import Loop from psyclone.configuration import Config from psyclone.transformations import OMPParallelTrans, OMPSingleTrans, \ - OMPTaskloopTrans, KernelModuleInlineTrans + OMPTaskloopTrans from psyclone.psyir.transformations import OMPTaskwaitTrans from psyclone.psyir.nodes import OMPTaskloopDirective, OMPTaskwaitDirective, \ OMPDirective, OMPParallelDirective def trans(psy): - '''Transformation entry point''' + '''Transformation entry point. + + :param psy: The PSy layer object to apply transformations to. + :type psy: :py:class:`psyclone.psyGen.PSy` + + :returns: the transformed PSy object. + :rtype: :py:class:`psyclone.psyGen.PSy` + + ''' config = Config.get() schedule = psy.invokes.get('invoke_0').schedule diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_transform.py index eb8bc62b..247decab 100644 --- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_transform.py +++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/omp_transform.py @@ -2,19 +2,28 @@ function via the -s option. It applies OpenMP to every loop and inlines all kernels in the schedule.''' +from psyclone.domain.common.transformations import KernelModuleInlineTrans from psyclone.psyGen import TransInfo from psyclone.psyir.nodes import Loop from psyclone.configuration import Config def trans(psy): - ''' Transformation entry point ''' + ''' Transformation entry point. + + :param psy: The PSy layer object to apply transformations to. + :type psy: :py:class:`psyclone.psyGen.PSy` + + :returns: the transformed PSy object. + :rtype: :py:class:`psyclone.psyGen.PSy` + + ''' config = Config.get() tinfo = TransInfo() parallel_loop_trans = tinfo.get_trans_name('GOceanOMPParallelLoopTrans') loop_trans = tinfo.get_trans_name('GOceanOMPLoopTrans') parallel_trans = tinfo.get_trans_name('OMPParallelTrans') - module_inline_trans = tinfo.get_trans_name('KernelModuleInline') + module_inline_trans = KernelModuleInlineTrans() schedule = psy.invokes.get('invoke_0').schedule diff --git a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/serial_transform.py b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/serial_transform.py index 8bd4eefd..a214b1f2 100644 --- a/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/serial_transform.py +++ b/benchmarks/nemo/nemolite2d/psykal/psyclone_scripts/serial_transform.py @@ -1,14 +1,20 @@ ''' Python script intended to be passed to PSyclone's generate() function via the -s option. This script module-inline all kernels in the PSy-layer.''' -from psyclone.psyGen import TransInfo +from psyclone.domain.common.transformations import KernelModuleInlineTrans def trans(psy): - ''' Transformation script entry function ''' + ''' Transformation script entry function. - tinfo = TransInfo() - itrans = tinfo.get_trans_name('KernelModuleInline') + :param psy: The PSy layer object to apply transformations to. + :type psy: :py:class:`psyclone.psyGen.PSy` + + :returns: the transformed PSy object. + :rtype: :py:class:`psyclone.psyGen.PSy` + + ''' + itrans = KernelModuleInlineTrans() schedule = psy.invokes.get('invoke_0').schedule