diff --git a/offload/liboffload/API/Event.td b/offload/liboffload/API/Event.td index 9d217ae230384..041bbd23ee4c7 100644 --- a/offload/liboffload/API/Event.td +++ b/offload/liboffload/API/Event.td @@ -48,7 +48,8 @@ def : Enum { let desc = "Supported event info."; let is_typed = 1; let etors = [ - TaggedEtor<"QUEUE", "ol_queue_handle_t", "The handle of the queue associated with the device."> + TaggedEtor<"QUEUE", "ol_queue_handle_t", "The handle of the queue associated with the device.">, + TaggedEtor<"IS_COMPLETE", "bool", "True if and only if the event is complete.">, ]; } diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index f5365ca274308..85826b6f1fee9 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -553,8 +553,8 @@ Error olGetQueueInfoSize_impl(ol_queue_handle_t Queue, ol_queue_info_t PropName, } Error olSyncEvent_impl(ol_event_handle_t Event) { + // No event info means that this event was complete on creation if (!Event->EventInfo) - // Event always complete return Plugin::success(); if (auto Res = Event->Queue->Device->Device->syncEvent(Event->EventInfo)) @@ -575,10 +575,22 @@ Error olGetEventInfoImplDetail(ol_event_handle_t Event, ol_event_info_t PropName, size_t PropSize, void *PropValue, size_t *PropSizeRet) { InfoWriter Info(PropSize, PropValue, PropSizeRet); + auto Queue = Event->Queue; switch (PropName) { case OL_EVENT_INFO_QUEUE: - return Info.write(Event->Queue); + return Info.write(Queue); + case OL_EVENT_INFO_IS_COMPLETE: { + // No event info means that this event was complete on creation + if (!Event->EventInfo) + return Info.write(true); + + auto Res = Queue->Device->Device->isEventComplete(Event->EventInfo, + Queue->AsyncInfo); + if (auto Err = Res.takeError()) + return Err; + return Info.write(*Res); + } default: return createOffloadError(ErrorCode::INVALID_ENUMERATION, "olGetEventInfo enum '%i' is invalid", PropName); diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 796182075ff3d..d491e94ca2dc7 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1519,6 +1519,9 @@ struct AMDGPUStreamTy { /// actions for that and prior events. Error synchronizeOn(AMDGPUEventTy &Event); + /// Return true if the event from this queue is complete + Expected isEventComplete(const AMDGPUEventTy &Event); + /// Query the stream and complete pending post actions if operations finished. /// Return whether all the operations completed. This operation does not block /// the calling thread. @@ -1683,6 +1686,18 @@ Error AMDGPUStreamTy::synchronizeOn(AMDGPUEventTy &Event) { return completeUntil(Event.RecordedSlot); } +Expected AMDGPUStreamTy::isEventComplete(const AMDGPUEventTy &Event) { + std::lock_guard Lock(Mutex); + assert(Event.RecordedStream == this && "event is for a different stream"); + + if (Event.RecordedSyncCycle < SyncCycle) { + return true; + } + assert(Event.RecordedSyncCycle == SyncCycle && "event is from the future?"); + + return !Slots[Event.RecordedSlot].Signal->load(); +} + struct AMDGPUStreamManagerTy final : GenericDeviceResourceManagerTy> { using ResourceRef = AMDGPUResourceRef; @@ -2601,6 +2616,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Query.takeError(); } + Expected isEventCompleteImpl(void *EventPtr, + AsyncInfoWrapperTy &AsyncInfo) override { + AMDGPUEventTy *Event = reinterpret_cast(EventPtr); + auto Stream = AsyncInfo.getQueueAs(); + return Stream && Stream->isEventComplete(*Event); + } + /// Synchronize the current thread with the event. Error syncEventImpl(void *EventPtr) override { AMDGPUEventTy *Event = reinterpret_cast(EventPtr); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index c9ab34b024b77..dda2d4d226b25 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -984,6 +984,11 @@ struct GenericDeviceTy : public DeviceAllocatorTy { virtual Error waitEventImpl(void *EventPtr, AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; + /// Check if the event enqueued to AsyncInfo is complete + Expected isEventComplete(void *Event, __tgt_async_info *AsyncInfo); + virtual Expected + isEventCompleteImpl(void *EventPtr, AsyncInfoWrapperTy &AsyncInfoWrapper) = 0; + /// Synchronize the current thread with the event. Error syncEvent(void *EventPtr); virtual Error syncEventImpl(void *EventPtr) = 0; diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 083d41659a469..ee06be4499755 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -1648,6 +1648,22 @@ Expected GenericDeviceTy::hasPendingWork(__tgt_async_info *AsyncInfo) { return Res; } +Expected GenericDeviceTy::isEventComplete(void *Event, + __tgt_async_info *AsyncInfo) { + AsyncInfoWrapperTy AsyncInfoWrapper(*this, AsyncInfo); + auto Res = isEventCompleteImpl(Event, AsyncInfoWrapper); + if (auto Err = Res.takeError()) { + AsyncInfoWrapper.finalize(Err); + return Err; + } + + auto Err = Plugin::success(); + AsyncInfoWrapper.finalize(Err); + if (Err) + return Err; + return Res; +} + Error GenericDeviceTy::syncEvent(void *EventPtr) { return syncEventImpl(EventPtr); } diff --git a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp index 361a781e8f9b6..69205b336606f 100644 --- a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp +++ b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.cpp @@ -82,6 +82,7 @@ DLWRAP(cuCtxSetLimit, 2) DLWRAP(cuEventCreate, 2) DLWRAP(cuEventRecord, 2) +DLWRAP(cuEventQuery, 1) DLWRAP(cuStreamWaitEvent, 3) DLWRAP(cuEventSynchronize, 1) DLWRAP(cuEventDestroy, 1) diff --git a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h index b6c022c8e7e8b..7fbf3ab01c9ca 100644 --- a/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h +++ b/offload/plugins-nextgen/cuda/dynamic_cuda/cuda.h @@ -352,6 +352,7 @@ CUresult cuCtxSetLimit(CUlimit, size_t); CUresult cuEventCreate(CUevent *, unsigned int); CUresult cuEventRecord(CUevent, CUstream); +CUresult cuEventQuery(CUevent); CUresult cuStreamWaitEvent(CUstream, CUevent, unsigned int); CUresult cuEventSynchronize(CUevent); CUresult cuEventDestroy(CUevent); diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index e94f3f6af7dd4..fea73b1bc80ec 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -919,6 +919,20 @@ struct CUDADeviceTy : public GenericDeviceTy { return true; } + Expected isEventCompleteImpl(void *EventPtr, + AsyncInfoWrapperTy &) override { + CUevent Event = reinterpret_cast(EventPtr); + + CUresult Ret = cuEventQuery(Event); + if (Ret == CUDA_SUCCESS) + return true; + + if (Ret == CUDA_ERROR_NOT_READY) + return false; + + return Plugin::check(Ret, "error in cuEventQuery: %s"); + } + /// Synchronize the current thread with the event. Error syncEventImpl(void *EventPtr) override { CUevent Event = reinterpret_cast(EventPtr); diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index ed5213531999d..e5f65113e17a2 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -337,6 +337,10 @@ struct GenELF64DeviceTy : public GenericDeviceTy { Expected hasPendingWorkImpl(AsyncInfoWrapperTy &AsyncInfo) override { return true; } + Expected isEventCompleteImpl(void *Event, + AsyncInfoWrapperTy &AsyncInfo) override { + return true; + } Error syncEventImpl(void *EventPtr) override { return Plugin::success(); } /// Print information about the device. diff --git a/offload/unittests/OffloadAPI/event/olGetEventInfo.cpp b/offload/unittests/OffloadAPI/event/olGetEventInfo.cpp index 908d2dcb6df5d..b86d15f045ebc 100644 --- a/offload/unittests/OffloadAPI/event/olGetEventInfo.cpp +++ b/offload/unittests/OffloadAPI/event/olGetEventInfo.cpp @@ -13,13 +13,22 @@ using olGetEventInfoTest = OffloadEventTest; OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olGetEventInfoTest); -TEST_P(olGetEventInfoTest, SuccessDevice) { +TEST_P(olGetEventInfoTest, SuccessQueue) { ol_queue_handle_t RetrievedQueue; ASSERT_SUCCESS(olGetEventInfo(Event, OL_EVENT_INFO_QUEUE, sizeof(ol_queue_handle_t), &RetrievedQueue)); ASSERT_EQ(Queue, RetrievedQueue); } +TEST_P(olGetEventInfoTest, SuccessIsComplete) { + bool Complete = false; + while (!Complete) { + ASSERT_SUCCESS(olGetEventInfo(Event, OL_EVENT_INFO_IS_COMPLETE, + sizeof(Complete), &Complete)); + } + ASSERT_EQ(Complete, true); +} + TEST_P(olGetEventInfoTest, InvalidNullHandle) { ol_queue_handle_t RetrievedQueue; ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, diff --git a/offload/unittests/OffloadAPI/event/olGetEventInfoSize.cpp b/offload/unittests/OffloadAPI/event/olGetEventInfoSize.cpp index d7dee58e35e8d..36f36c3a187f2 100644 --- a/offload/unittests/OffloadAPI/event/olGetEventInfoSize.cpp +++ b/offload/unittests/OffloadAPI/event/olGetEventInfoSize.cpp @@ -19,6 +19,12 @@ TEST_P(olGetEventInfoSizeTest, SuccessQueue) { ASSERT_EQ(Size, sizeof(ol_queue_handle_t)); } +TEST_P(olGetEventInfoSizeTest, SuccessIsComplete) { + size_t Size = 0; + ASSERT_SUCCESS(olGetEventInfoSize(Event, OL_EVENT_INFO_IS_COMPLETE, &Size)); + ASSERT_EQ(Size, sizeof(bool)); +} + TEST_P(olGetEventInfoSizeTest, InvalidNullHandle) { size_t Size = 0; ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,