Skip to content

Commit 4ea1fc8

Browse files
committed
[Offload] Add an unloadBinary interface to PluginInterface
This allows removal of a specific Image from a Device, rather than requiring all image data to outlive the device they were created for. This is required for `ol_program_handle_t`s, which now specify the lifetime of the buffer used to create the program.
1 parent 97ac648 commit 4ea1fc8

File tree

6 files changed

+76
-62
lines changed

6 files changed

+76
-62
lines changed

offload/liboffload/API/Program.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
def : Function {
1414
let name = "olCreateProgram";
1515
let desc = "Create a program for the device from the binary image pointed to by `ProgData`.";
16-
let details = [];
16+
let details = [
17+
"`ProgData` must remain valid for the entire lifetime of `Program`",
18+
];
1719
let params = [
1820
Param<"ol_device_handle_t", "Device", "handle of the device", PARAM_IN>,
1921
Param<"const void*", "ProgData", "pointer to the program binary data", PARAM_IN>,

offload/liboffload/src/OffloadImpl.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,9 @@ Error olCreateProgram_impl(ol_device_handle_t Device, const void *ProgData,
465465
}
466466

467467
Error olDestroyProgram_impl(ol_program_handle_t Program) {
468+
if (auto Err = Program->Image->getDevice().unloadBinary(Program->Image))
469+
return Err;
470+
468471
return olDestroy(Program);
469472
}
470473

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2023,6 +2023,16 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
20232023
return Plugin::success();
20242024
}
20252025

2026+
Error unloadBinaryImpl(DeviceImageTy *Image) override {
2027+
AMDGPUDeviceImageTy &AMDImage = static_cast<AMDGPUDeviceImageTy &>(*Image);
2028+
2029+
// Unload the executable of the image.
2030+
if (auto Err = AMDImage.unloadExecutable())
2031+
return Err;
2032+
2033+
return Plugin::success();
2034+
}
2035+
20262036
/// Deinitialize the device and release its resources.
20272037
Error deinitImpl() override {
20282038
// Deinitialize the stream and event pools.
@@ -2035,19 +2045,6 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
20352045
if (auto Err = AMDGPUSignalManager.deinit())
20362046
return Err;
20372047

2038-
// Close modules if necessary.
2039-
if (!LoadedImages.empty()) {
2040-
// Each image has its own module.
2041-
for (DeviceImageTy *Image : LoadedImages) {
2042-
AMDGPUDeviceImageTy &AMDImage =
2043-
static_cast<AMDGPUDeviceImageTy &>(*Image);
2044-
2045-
// Unload the executable of the image.
2046-
if (auto Err = AMDImage.unloadExecutable())
2047-
return Err;
2048-
}
2049-
}
2050-
20512048
// Invalidate agent reference.
20522049
Agent = {0};
20532050

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -712,6 +712,10 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
712712
virtual Expected<DeviceImageTy *>
713713
loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;
714714

715+
/// Unload a previously loaded Image from the device
716+
Error unloadBinary(DeviceImageTy *Image);
717+
virtual Error unloadBinaryImpl(DeviceImageTy *Image) = 0;
718+
715719
/// Setup the device environment if needed. Notice this setup may not be run
716720
/// on some plugins. By default, it will be executed, but plugins can change
717721
/// this behavior by overriding the shouldSetupDeviceEnvironment function.

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -821,26 +821,52 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) {
821821
return Plugin::success();
822822
}
823823

824-
Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
825-
for (DeviceImageTy *Image : LoadedImages)
826-
if (auto Err = callGlobalDestructors(Plugin, *Image))
827-
return Err;
824+
Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) {
825+
if (auto Err = callGlobalDestructors(Plugin, *Image))
826+
return Err;
828827

829828
if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
830829
GenericGlobalHandlerTy &GHandler = Plugin.getGlobalHandler();
831-
for (auto *Image : LoadedImages) {
832-
DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
833-
GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
834-
sizeof(DeviceMemoryPoolTrackingTy),
835-
&ImageDeviceMemoryPoolTracking);
836-
if (auto Err =
837-
GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
838-
consumeError(std::move(Err));
839-
continue;
840-
}
841-
DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
830+
DeviceMemoryPoolTrackingTy ImageDeviceMemoryPoolTracking = {0, 0, ~0U, 0};
831+
GlobalTy TrackerGlobal("__omp_rtl_device_memory_pool_tracker",
832+
sizeof(DeviceMemoryPoolTrackingTy),
833+
&ImageDeviceMemoryPoolTracking);
834+
if (auto Err =
835+
GHandler.readGlobalFromDevice(*this, *Image, TrackerGlobal)) {
836+
consumeError(std::move(Err));
842837
}
838+
DeviceMemoryPoolTracking.combine(ImageDeviceMemoryPoolTracking);
839+
}
840+
841+
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
842+
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
843+
if (!ProfOrErr)
844+
return ProfOrErr.takeError();
845+
846+
if (!ProfOrErr->empty()) {
847+
// Dump out profdata
848+
if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
849+
uint32_t(DeviceDebugKind::PGODump))
850+
ProfOrErr->dump();
851+
852+
// Write data to profiling file
853+
if (auto Err = ProfOrErr->write())
854+
return Err;
855+
}
856+
857+
LoadedImages.erase(
858+
std::find(LoadedImages.begin(), LoadedImages.end(), Image));
843859

860+
return unloadBinaryImpl(Image);
861+
}
862+
863+
Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
864+
while (!LoadedImages.empty()) {
865+
if (auto Err = unloadBinary(LoadedImages.back()))
866+
return Err;
867+
}
868+
869+
if (OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::AllocationTracker)) {
844870
// TODO: Write this by default into a file.
845871
printf("\n\n|-----------------------\n"
846872
"| Device memory tracker:\n"
@@ -856,25 +882,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) {
856882
DeviceMemoryPoolTracking.AllocationMax);
857883
}
858884

859-
for (auto *Image : LoadedImages) {
860-
GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler();
861-
auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image);
862-
if (!ProfOrErr)
863-
return ProfOrErr.takeError();
864-
865-
if (ProfOrErr->empty())
866-
continue;
867-
868-
// Dump out profdata
869-
if ((OMPX_DebugKind.get() & uint32_t(DeviceDebugKind::PGODump)) ==
870-
uint32_t(DeviceDebugKind::PGODump))
871-
ProfOrErr->dump();
872-
873-
// Write data to profiling file
874-
if (auto Err = ProfOrErr->write())
875-
return Err;
876-
}
877-
878885
// Delete the memory manager before deinitializing the device. Otherwise,
879886
// we may delete device allocations after the device is deinitialized.
880887
if (MemoryManager)

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,21 @@ struct CUDADeviceTy : public GenericDeviceTy {
358358
return Plugin::success();
359359
}
360360

361+
Error unloadBinaryImpl(DeviceImageTy *Image) override {
362+
assert(Context && "Invalid CUDA context");
363+
364+
// Each image has its own module.
365+
for (DeviceImageTy *Image : LoadedImages) {
366+
CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
367+
368+
// Unload the module of the image.
369+
if (auto Err = CUDAImage.unloadModule())
370+
return Err;
371+
}
372+
373+
return Plugin::success();
374+
}
375+
361376
/// Deinitialize the device and release its resources.
362377
Error deinitImpl() override {
363378
if (Context) {
@@ -372,20 +387,6 @@ struct CUDADeviceTy : public GenericDeviceTy {
372387
if (auto Err = CUDAEventManager.deinit())
373388
return Err;
374389

375-
// Close modules if necessary.
376-
if (!LoadedImages.empty()) {
377-
assert(Context && "Invalid CUDA context");
378-
379-
// Each image has its own module.
380-
for (DeviceImageTy *Image : LoadedImages) {
381-
CUDADeviceImageTy &CUDAImage = static_cast<CUDADeviceImageTy &>(*Image);
382-
383-
// Unload the module of the image.
384-
if (auto Err = CUDAImage.unloadModule())
385-
return Err;
386-
}
387-
}
388-
389390
if (Context) {
390391
CUresult Res = cuDevicePrimaryCtxRelease(Device);
391392
if (auto Err =

0 commit comments

Comments
 (0)