@@ -1080,8 +1080,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1080
1080
}
1081
1081
1082
1082
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitKernelLaunch (
1083
- const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1084
- EmitFallbackCallbackTy emitTargetCallFallbackCB , TargetKernelArgs &Args,
1083
+ const LocationDescription &Loc, Value *OutlinedFnID,
1084
+ EmitFallbackCallbackTy EmitTargetCallFallbackCB , TargetKernelArgs &Args,
1085
1085
Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP) {
1086
1086
1087
1087
if (!updateToLocation (Loc))
@@ -1134,7 +1134,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitKernelLaunch(
1134
1134
1135
1135
auto CurFn = Builder.GetInsertBlock ()->getParent ();
1136
1136
emitBlock (OffloadFailedBlock, CurFn);
1137
- Builder.restoreIP (emitTargetCallFallbackCB (Builder.saveIP ()));
1137
+ Builder.restoreIP (EmitTargetCallFallbackCB (Builder.saveIP ()));
1138
1138
emitBranch (OffloadContBlock);
1139
1139
emitBlock (OffloadContBlock, CurFn, /* IsFinished=*/ true );
1140
1140
return Builder.saveIP ();
@@ -1736,7 +1736,7 @@ void OpenMPIRBuilder::createTaskyield(const LocationDescription &Loc) {
1736
1736
// - All code is inserted in the entry block of the current function.
1737
1737
static Value *emitTaskDependencies (
1738
1738
OpenMPIRBuilder &OMPBuilder,
1739
- SmallVectorImpl<OpenMPIRBuilder::DependData> &Dependencies) {
1739
+ const SmallVectorImpl<OpenMPIRBuilder::DependData> &Dependencies) {
1740
1740
// Early return if we have no dependencies to process
1741
1741
if (Dependencies.empty ())
1742
1742
return nullptr ;
@@ -6403,16 +6403,45 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
6403
6403
SrcLocInfo = getOrCreateIdent (SrcLocStr, SrcLocStrSize);
6404
6404
}
6405
6405
6406
- Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
6407
- PointerNum, RTArgs.BasePointersArray ,
6408
- RTArgs.PointersArray , RTArgs.SizesArray ,
6409
- RTArgs.MapTypesArray , RTArgs.MapNamesArray ,
6410
- RTArgs.MappersArray };
6406
+ SmallVector<llvm::Value *, 13 > OffloadingArgs = {
6407
+ SrcLocInfo, DeviceID,
6408
+ PointerNum, RTArgs.BasePointersArray ,
6409
+ RTArgs.PointersArray , RTArgs.SizesArray ,
6410
+ RTArgs.MapTypesArray , RTArgs.MapNamesArray ,
6411
+ RTArgs.MappersArray };
6411
6412
6412
6413
if (IsStandAlone) {
6413
6414
assert (MapperFunc && " MapperFunc missing for standalone target data" );
6414
- Builder.CreateCall (getOrCreateRuntimeFunctionPtr (*MapperFunc),
6415
- OffloadingArgs);
6415
+
6416
+ auto TaskBodyCB = [&](Value *, Value *, IRBuilderBase::InsertPoint) {
6417
+ if (Info.HasNoWait ) {
6418
+ OffloadingArgs.append ({llvm::Constant::getNullValue (Int32),
6419
+ llvm::Constant::getNullValue (VoidPtr),
6420
+ llvm::Constant::getNullValue (Int32),
6421
+ llvm::Constant::getNullValue (VoidPtr)});
6422
+ }
6423
+
6424
+ Builder.CreateCall (getOrCreateRuntimeFunctionPtr (*MapperFunc),
6425
+ OffloadingArgs);
6426
+
6427
+ if (Info.HasNoWait ) {
6428
+ BasicBlock *OffloadContBlock =
6429
+ BasicBlock::Create (Builder.getContext (), " omp_offload.cont" );
6430
+ Function *CurFn = Builder.GetInsertBlock ()->getParent ();
6431
+ emitBranch (OffloadContBlock);
6432
+ emitBlock (OffloadContBlock, CurFn, /* IsFinished=*/ true );
6433
+ Builder.restoreIP (Builder.saveIP ());
6434
+ }
6435
+ };
6436
+
6437
+ bool RequiresOuterTargetTask = Info.HasNoWait ;
6438
+
6439
+ if (!RequiresOuterTargetTask)
6440
+ TaskBodyCB (/* DeviceID=*/ nullptr , /* RTLoc=*/ nullptr ,
6441
+ /* TargetTaskAllocaIP=*/ {});
6442
+ else
6443
+ emitTargetTask (TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
6444
+ /* Dependencies=*/ {}, Info.HasNoWait );
6416
6445
} else {
6417
6446
Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr (
6418
6447
omp::OMPRTL___tgt_target_data_begin_mapper);
@@ -6836,13 +6865,18 @@ static void emitTargetOutlinedFunction(
6836
6865
OMPBuilder.emitTargetRegionFunction (EntryInfo, GenerateOutlinedFunction,
6837
6866
IsOffloadEntry, OutlinedFn, OutlinedFnID);
6838
6867
}
6868
+
6839
6869
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask (
6840
- Function *OutlinedFn, Value *OutlinedFnID,
6841
- EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
6842
- Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP,
6843
- SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies,
6870
+ TaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc,
6871
+ OpenMPIRBuilder::InsertPointTy AllocaIP,
6872
+ const SmallVector<llvm::OpenMPIRBuilder::DependData> &Dependencies,
6844
6873
bool HasNoWait) {
6845
6874
6875
+ // The following explains the code-gen scenario for the `target` directive. A
6876
+ // similar scneario is followed for other device-related directives (e.g.
6877
+ // `target enter data`) but in similar fashion since we only need to emit task
6878
+ // that encapsulates the proper runtime call.
6879
+ //
6846
6880
// When we arrive at this function, the target region itself has been
6847
6881
// outlined into the function OutlinedFn.
6848
6882
// So at ths point, for
@@ -6950,22 +6984,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetTask(
6950
6984
6951
6985
Builder.restoreIP (TargetTaskBodyIP);
6952
6986
6953
- if (OutlinedFnID) {
6954
- // emitKernelLaunch makes the necessary runtime call to offload the kernel.
6955
- // We then outline all that code into a separate function
6956
- // ('kernel_launch_function' in the pseudo code above). This function is
6957
- // then called by the target task proxy function (see
6958
- // '@.omp_target_task_proxy_func' in the pseudo code above)
6959
- // "@.omp_target_task_proxy_func' is generated by
6960
- // emitTargetTaskProxyFunction.
6961
- Builder.restoreIP (emitKernelLaunch (Builder, OutlinedFn, OutlinedFnID,
6962
- EmitTargetCallFallbackCB, Args, DeviceID,
6963
- RTLoc, TargetTaskAllocaIP));
6964
- } else {
6965
- // When OutlinedFnID is set to nullptr, then it's not an offloading call. In
6966
- // this case, we execute the host implementation directly.
6967
- Builder.restoreIP (EmitTargetCallFallbackCB (Builder.saveIP ()));
6968
- }
6987
+ TaskBodyCB (DeviceID, RTLoc, TargetTaskAllocaIP);
6969
6988
6970
6989
OI.ExitBB = Builder.saveIP ().getBlock ();
6971
6990
OI.PostOutlineCB = [this , ToBeDeleted, Dependencies, HasNoWait,
@@ -7153,18 +7172,40 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
7153
7172
bool HasDependencies = Dependencies.size () > 0 ;
7154
7173
bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
7155
7174
7175
+ OpenMPIRBuilder::TargetKernelArgs KArgs;
7176
+
7177
+ auto TaskBodyCB = [&](Value *DeviceID, Value *RTLoc,
7178
+ IRBuilderBase::InsertPoint TargetTaskAllocaIP) {
7179
+ if (OutlinedFnID) {
7180
+ // emitKernelLaunch makes the necessary runtime call to offload the
7181
+ // kernel. We then outline all that code into a separate function
7182
+ // ('kernel_launch_function' in the pseudo code above). This function is
7183
+ // then called by the target task proxy function (see
7184
+ // '@.omp_target_task_proxy_func' in the pseudo code above)
7185
+ // "@.omp_target_task_proxy_func' is generated by
7186
+ // emitTargetTaskProxyFunction.
7187
+ Builder.restoreIP (OMPBuilder.emitKernelLaunch (
7188
+ Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, DeviceID,
7189
+ RTLoc, TargetTaskAllocaIP));
7190
+ } else {
7191
+ // When OutlinedFnID is set to nullptr, then it's not an offloading
7192
+ // call. In this case, we execute the host implementation directly.
7193
+ OMPBuilder.Builder .restoreIP (
7194
+ EmitTargetCallFallbackCB (OMPBuilder.Builder .saveIP ()));
7195
+ }
7196
+ };
7197
+
7156
7198
// If we don't have an ID for the target region, it means an offload entry
7157
7199
// wasn't created. In this case we just run the host fallback directly.
7158
7200
if (!OutlinedFnID) {
7159
7201
if (RequiresOuterTargetTask) {
7160
7202
// Arguments that are intended to be directly forwarded to an
7161
7203
// emitKernelLaunch call are pased as nullptr, since OutlinedFnID=nullptr
7162
7204
// results in that call not being done.
7163
- OpenMPIRBuilder::TargetKernelArgs KArgs;
7164
- Builder.restoreIP (OMPBuilder.emitTargetTask (
7165
- OutlinedFn, /* OutlinedFnID=*/ nullptr , EmitTargetCallFallbackCB, KArgs,
7166
- /* DeviceID=*/ nullptr , /* RTLoc=*/ nullptr , AllocaIP, Dependencies,
7167
- HasNoWait));
7205
+ Builder.restoreIP (OMPBuilder.emitTargetTask (TaskBodyCB,
7206
+ /* DeviceID=*/ nullptr ,
7207
+ /* RTLoc=*/ nullptr , AllocaIP,
7208
+ Dependencies, HasNoWait));
7168
7209
} else {
7169
7210
Builder.restoreIP (EmitTargetCallFallbackCB (Builder.saveIP ()));
7170
7211
}
@@ -7201,20 +7242,19 @@ emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
7201
7242
// TODO: Use correct DynCGGroupMem
7202
7243
Value *DynCGGroupMem = Builder.getInt32 (0 );
7203
7244
7204
- OpenMPIRBuilder::TargetKernelArgs KArgs (NumTargetItems, RTArgs, NumIterations,
7205
- NumTeamsC, NumThreadsC, DynCGGroupMem ,
7206
- HasNoWait);
7245
+ KArgs = OpenMPIRBuilder::TargetKernelArgs (
7246
+ NumTargetItems, RTArgs, NumIterations, NumTeamsC, NumThreadsC,
7247
+ DynCGGroupMem, HasNoWait);
7207
7248
7208
7249
// The presence of certain clauses on the target directive require the
7209
7250
// explicit generation of the target task.
7210
7251
if (RequiresOuterTargetTask) {
7211
7252
Builder.restoreIP (OMPBuilder.emitTargetTask (
7212
- OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, DeviceID,
7213
- RTLoc, AllocaIP, Dependencies, HasNoWait));
7253
+ TaskBodyCB, DeviceID, RTLoc, AllocaIP, Dependencies, HasNoWait));
7214
7254
} else {
7215
7255
Builder.restoreIP (OMPBuilder.emitKernelLaunch (
7216
- Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
7217
- DeviceID, RTLoc, AllocaIP));
7256
+ Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs, DeviceID, RTLoc ,
7257
+ AllocaIP));
7218
7258
}
7219
7259
}
7220
7260
0 commit comments