diff --git a/offload/liboffload/API/Program.td b/offload/liboffload/API/Program.td index 8c88fe6e21e6a..0476fa1f7c27a 100644 --- a/offload/liboffload/API/Program.td +++ b/offload/liboffload/API/Program.td @@ -13,7 +13,9 @@ def : Function { let name = "olCreateProgram"; let desc = "Create a program for the device from the binary image pointed to by `ProgData`."; - let details = []; + let details = [ + "The provided `ProgData` will be copied and need not outlive the returned handle", + ]; let params = [ Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>, Param<"const void*", "ProgData", "pointer to the program binary data", PARAM_IN>, diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 770c212d804d2..bbc9f927ae27c 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -458,6 +458,9 @@ Error olCreateProgram_impl(ol_device_handle_t Device, const void *ProgData, } Error olDestroyProgram_impl(ol_program_handle_t Program) { + if (auto Err = Program->Image->getDevice().unloadBinary(Program->Image)) + return Err; + return olDestroy(Program); } diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index 73e1e66928fac..bc1a768feafdd 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -2023,6 +2023,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { return Plugin::success(); } + Error unloadBinaryImpl(DeviceImageTy *Image) override { + AMDGPUDeviceImageTy &AMDImage = static_cast(*Image); + + // Unload the executable of the image. + return AMDImage.unloadExecutable(); + } + /// Deinitialize the device and release its resources. Error deinitImpl() override { // Deinitialize the stream and event pools. @@ -2035,19 +2042,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { if (auto Err = AMDGPUSignalManager.deinit()) return Err; - // Close modules if necessary. - if (!LoadedImages.empty()) { - // Each image has its own module. - for (DeviceImageTy *Image : LoadedImages) { - AMDGPUDeviceImageTy &AMDImage = - static_cast(*Image); - - // Unload the executable of the image. - if (auto Err = AMDImage.unloadExecutable()) - return Err; - } - } - // Invalidate agent reference. Agent = {0}; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index f5d995532b7a5..67e66ef095e56 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -735,6 +735,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy { virtual Expected loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0; + /// Unload a previously loaded Image from the device + Error unloadBinary(DeviceImageTy *Image); + virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0; + /// Setup the device environment if needed. Notice this setup may not be run /// on some plugins. By default, it will be executed, but plugins can change /// this behavior by overriding the shouldSetupDeviceEnvironment function. diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 6fd3405d03afa..038fd7aeb75ee 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -821,26 +821,52 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) { return Plugin::success(); } -Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { - for (DeviceImageTy *Image : LoadedImages) - if (auto Err = callGlobalDestructors(Plugin, *Image)) - return Err; +Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) { + if (auto Err = callGlobalDestructors(Plugin, *Image)) + return Err; if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) { GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler(); - for (auto *Image : LoadedImages) { - DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0}; - GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker", - sizeof(DeviceMemoryPoolTrackingTy), - &ImageDeviceMemoryPoolTracking); - if (auto Err = - GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) { - consumeError(std::move(Err)); - continue; - } - DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking); + DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0}; + GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker", + sizeof(DeviceMemoryPoolTrackingTy), + &ImageDeviceMemoryPoolTracking); + if (auto Err = + GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) { + consumeError(std::move(Err)); } + DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking); + } + + GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); + auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image); + if (!ProfOrErr) + return ProfOrErr.takeError(); + + if (!ProfOrErr->empty()) { + // Dump out profdata + if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) == + uint32_t(DeviceDebugKind::PGODump)) + ProfOrErr->dump(); + + // Write data to profiling file + if (auto Err = ProfOrErr->write()) + return Err; + } + + LoadedImages.erase( + std::find(LoadedImages.begin(), LoadedImages.end(), Image)); + return unloadBinaryImpl(Image); +} + +Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { + while (!LoadedImages.empty()) { + if (auto Err = unloadBinary(LoadedImages.back())) + return Err; + } + + if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) { // TODO: Write this by default into a file. printf("\n\n|-----------------------\n" "| Device memory tracker:\n" @@ -856,25 +882,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { DeviceMemoryPoolTracking.AllocationMax); } - for (auto *Image : LoadedImages) { - GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); - auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image); - if (!ProfOrErr) - return ProfOrErr.takeError(); - - if (ProfOrErr->empty()) - continue; - - // Dump out profdata - if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) == - uint32_t(DeviceDebugKind::PGODump)) - ProfOrErr->dump(); - - // Write data to profiling file - if (auto Err = ProfOrErr->write()) - return Err; - } - // Delete the memory manager before deinitializing the device. Otherwise, // we may delete device allocations after the device is deinitialized. if (MemoryManager) diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 9943f533ef5a8..0e662b038c363 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -358,6 +358,19 @@ struct CUDADeviceTy : public GenericDeviceTy { return Plugin::success(); } + Error unloadBinaryImpl(DeviceImageTy *Image) override { + assert(Context && "Invalid CUDA context"); + + // Each image has its own module. + CUDADeviceImageTy &CUDAImage = static_cast(*Image); + + // Unload the module of the image. + if (auto Err = CUDAImage.unloadModule()) + return Err; + + return Plugin::success(); + } + /// Deinitialize the device and release its resources. Error deinitImpl() override { if (Context) { @@ -372,20 +385,6 @@ struct CUDADeviceTy : public GenericDeviceTy { if (auto Err = CUDAEventManager.deinit()) return Err; - // Close modules if necessary. - if (!LoadedImages.empty()) { - assert(Context && "Invalid CUDA context"); - - // Each image has its own module. - for (DeviceImageTy *Image : LoadedImages) { - CUDADeviceImageTy &CUDAImage = static_cast(*Image); - - // Unload the module of the image. - if (auto Err = CUDAImage.unloadModule()) - return Err; - } - } - if (Context) { CUresult Res = cuDevicePrimaryCtxRelease(Device); if (auto Err =