Skip to content

Commit 8548e2f

Browse files
[nfc][libomptarget] Move named_sync() into target_impl
Summary: [nfc][libomptarget] Move named_sync() into target_impl Reviewers: ABataev, jdoerfert, grokos Reviewed By: ABataev Subscribers: openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D69487
1 parent db8dad2 commit 8548e2f

File tree

6 files changed

+11
-20
lines changed

6 files changed

+11
-20
lines changed

openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -765,7 +765,7 @@ INLINE void syncWorkersInGenericMode(uint32_t NumThreads) {
765765
// is started, so we don't need a barrier.
766766
if (NumThreads > 1) {
767767
#endif
768-
named_sync(L1_BARRIER, WARPSIZE * NumWarps);
768+
__kmpc_impl_named_sync(L1_BARRIER, WARPSIZE * NumWarps);
769769
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 700
770770
}
771771
#endif

openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
256256
// If we guard this barrier as follows it leads to deadlock, probably
257257
// because of a compiler bug: if (!IsGenericMode()) __syncthreads();
258258
uint16_t SyncWarps = (NumThreads + WARPSIZE - 1) / WARPSIZE;
259-
named_sync(L1_BARRIER, SyncWarps * WARPSIZE);
259+
__kmpc_impl_named_sync(L1_BARRIER, SyncWarps * WARPSIZE);
260260

261261
// If this team is not the last, quit.
262262
if (/* Volatile read by all threads */ !IsLastTeam)

openmp/libomptarget/deviceRTLs/nvptx/src/support.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,6 @@ INLINE unsigned long PadBytes(unsigned long size, unsigned long alignment);
8383
#define SUB_BYTES(_addr, _bytes) \
8484
((void *)((char *)((void *)(_addr)) - (_bytes)))
8585

86-
////////////////////////////////////////////////////////////////////////////////
87-
// Named Barrier Routines
88-
////////////////////////////////////////////////////////////////////////////////
89-
INLINE void named_sync(const int barrier, const int num_threads);
90-
9186
////////////////////////////////////////////////////////////////////////////////
9287
// Teams Reduction Scratchpad Helpers
9388
////////////////////////////////////////////////////////////////////////////////

openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -268,17 +268,6 @@ INLINE void *SafeFree(void *ptr, const char *msg) {
268268
return NULL;
269269
}
270270

271-
////////////////////////////////////////////////////////////////////////////////
272-
// Named Barrier Routines
273-
////////////////////////////////////////////////////////////////////////////////
274-
275-
INLINE void named_sync(const int barrier, const int num_threads) {
276-
asm volatile("bar.sync %0, %1;"
277-
:
278-
: "r"(barrier), "r"(num_threads)
279-
: "memory");
280-
}
281-
282271
////////////////////////////////////////////////////////////////////////////////
283272
// Teams Reduction Scratchpad Helpers
284273
////////////////////////////////////////////////////////////////////////////////

openmp/libomptarget/deviceRTLs/nvptx/src/sync.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
6161
"call kmpc_barrier with %d omp threads, sync parameter %d\n",
6262
(int)numberOfActiveOMPThreads, (int)threads);
6363
// Barrier #1 is for synchronization among active threads.
64-
named_sync(L1_BARRIER, threads);
64+
__kmpc_impl_named_sync(L1_BARRIER, threads);
6565
}
6666
} else {
6767
// Still need to flush the memory per the standard.
@@ -92,7 +92,7 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
9292
"%d\n",
9393
(int)numberOfActiveOMPThreads, (int)threads);
9494
// Barrier #1 is for synchronization among active threads.
95-
named_sync(L1_BARRIER, threads);
95+
__kmpc_impl_named_sync(L1_BARRIER, threads);
9696
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n");
9797
}
9898

openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,4 +153,11 @@ INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
153153
#endif // CUDA_VERSION
154154
}
155155

156+
INLINE void __kmpc_impl_named_sync(int barrier, uint32_t num_threads) {
157+
asm volatile("bar.sync %0, %1;"
158+
:
159+
: "r"(barrier), "r"(num_threads)
160+
: "memory");
161+
}
162+
156163
#endif

0 commit comments

Comments
 (0)