diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h index 3ae447b14f320..c41b4d1e9844c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h @@ -23,7 +23,8 @@ enum OMPTgtExecModeFlags : unsigned char { OMP_TGT_EXEC_MODE_GENERIC = 1 << 0, OMP_TGT_EXEC_MODE_SPMD = 1 << 1, OMP_TGT_EXEC_MODE_GENERIC_SPMD = - OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD + OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD, + OMP_TGT_EXEC_MODE_SPMD_NO_LOOP = 1 << 2 | OMP_TGT_EXEC_MODE_SPMD }; } // end namespace omp diff --git a/offload/DeviceRTL/src/Kernel.cpp b/offload/DeviceRTL/src/Kernel.cpp index 467e44a65276c..8c2828b270419 100644 --- a/offload/DeviceRTL/src/Kernel.cpp +++ b/offload/DeviceRTL/src/Kernel.cpp @@ -30,7 +30,8 @@ enum OMPTgtExecModeFlags : unsigned char { OMP_TGT_EXEC_MODE_GENERIC = 1 << 0, OMP_TGT_EXEC_MODE_SPMD = 1 << 1, OMP_TGT_EXEC_MODE_GENERIC_SPMD = - OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD + OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD, + OMP_TGT_EXEC_MODE_SPMD_NO_LOOP = 1 << 2 | OMP_TGT_EXEC_MODE_SPMD }; static void diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 75f87cab6049d..2c01ed219b29c 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -434,6 +434,8 @@ struct GenericKernelTy { return "Generic"; case OMP_TGT_EXEC_MODE_GENERIC_SPMD: return "Generic-SPMD"; + case OMP_TGT_EXEC_MODE_SPMD_NO_LOOP: + return "SPMD-No-Loop"; } llvm_unreachable("Unknown execution mode!"); } @@ -471,7 +473,8 @@ struct GenericKernelTy { uint32_t BlockLimitClause[3], uint64_t LoopTripCount, uint32_t &NumThreads, bool IsNumThreadsFromUser) const; - /// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode. + /// Indicate if the kernel works in Generic SPMD, Generic, No-Loop + /// or SPMD mode. bool isGenericSPMDMode() const { return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_GENERIC_SPMD; @@ -486,6 +489,10 @@ struct GenericKernelTy { bool isBareMode() const { return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_BARE; } + bool isNoLoopMode() const { + return KernelEnvironment.Configuration.ExecMode == + OMP_TGT_EXEC_MODE_SPMD_NO_LOOP; + } /// The kernel name. std::string Name; diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index d4b5f914c6672..4363a4f7c06ca 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -662,6 +662,10 @@ uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice, return std::min(NumTeamsClause[0], GenericDevice.getBlockLimit()); } + // Return the number of teams required to cover the loop iterations. + if (isNoLoopMode()) + return LoopTripCount > 0 ? (((LoopTripCount - 1) / NumThreads) + 1) : 1; + uint64_t DefaultNumBlocks = GenericDevice.getDefaultNumBlocks(); uint64_t TripCountNumBlocks = std::numeric_limits::max(); if (LoopTripCount > 0) {