diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index f732131bf5b0b789077c1bd6f007d04182219a4c..bfb7f99740a280965aa58195da13393cd8850cd6 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -608,9 +608,13 @@ enum OpenMPRTLFunction { // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, // kmp_int32 num_teams, kmp_int32 thread_limit); OMPRTL__kmpc_push_num_teams, - /// \brief Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, - /// kmpc_micro microtask, ...); + // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro + // microtask, ...); OMPRTL__kmpc_fork_teams, + // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + OMPRTL__kmpc_taskloop, // // Offloading related calls @@ -842,10 +846,11 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction( assert(!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks"); auto *CS = cast<CapturedStmt>(D.getAssociatedStmt()); + auto *TD = dyn_cast<OMPTaskDirective>(&D); CodeGenFunction CGF(CGM, true); - CGOpenMPTaskOutlinedRegionInfo CGInfo( - *CS, ThreadIDVar, CodeGen, InnermostKind, - cast<OMPTaskDirective>(D).hasCancel(), Action); + CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, + InnermostKind, + TD ? TD->hasCancel() : false, Action); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); auto *Res = CGF.GenerateCapturedStmtFunction(*CS); if (!Tied) @@ -1433,6 +1438,26 @@ CGOpenMPRuntime::createRuntimeFunction(unsigned Function) { RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams"); break; } + case OMPRTL__kmpc_taskloop: { + // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + llvm::Type *TypeParams[] = {getIdentTyPointerTy(), + CGM.IntTy, + CGM.VoidPtrTy, + CGM.IntTy, + CGM.Int64Ty->getPointerTo(), + CGM.Int64Ty->getPointerTo(), + CGM.Int64Ty, + CGM.IntTy, + CGM.IntTy, + CGM.Int64Ty, + CGM.VoidPtrTy}; + llvm::FunctionType *FnTy = + llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); + RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop"); + break; + } case OMPRTL__tgt_target: { // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t @@ -2492,6 +2517,14 @@ enum KmpTaskTFields { KmpTaskTPartId, /// \brief Function with call of destructors for private variables. KmpTaskTDestructors, + /// (Taskloops only) Lower bound. + KmpTaskTLowerBound, + /// (Taskloops only) Upper bound. + KmpTaskTUpperBound, + /// (Taskloops only) Stride. + KmpTaskTStride, + /// (Taskloops only) Is last iteration flag. + KmpTaskTLastIter, }; } // anonymous namespace @@ -2999,7 +3032,8 @@ createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { } static RecordDecl * -createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, +createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, + QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy) { auto &C = CGM.getContext(); // Build struct kmp_task_t { @@ -3007,6 +3041,11 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, // kmp_routine_entry_t routine; // kmp_int32 part_id; // kmp_routine_entry_t destructors; + // For taskloops additional fields: + // kmp_uint64 lb; + // kmp_uint64 ub; + // kmp_int64 st; + // kmp_int32 liter; // }; auto *RD = C.buildImplicitRecord("kmp_task_t"); RD->startDefinition(); @@ -3014,6 +3053,16 @@ createKmpTaskTRecordDecl(CodeGenModule &CGM, QualType KmpInt32Ty, addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); addFieldToRecordDecl(C, RD, KmpInt32Ty); addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); + if (isOpenMPTaskLoopDirective(Kind)) { + QualType KmpUInt64Ty = + CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); + QualType KmpInt64Ty = + CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); + addFieldToRecordDecl(C, RD, KmpUInt64Ty); + addFieldToRecordDecl(C, RD, KmpUInt64Ty); + addFieldToRecordDecl(C, RD, KmpInt64Ty); + addFieldToRecordDecl(C, RD, KmpInt32Ty); + } RD->completeDefinition(); return RD; } @@ -3041,13 +3090,16 @@ createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, /// \code /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, +/// For taskloops: +/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, /// tt->shareds); /// return 0; /// } /// \endcode static llvm::Value * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, - QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, + OpenMPDirectiveKind Kind, QualType KmpInt32Ty, + QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap) { @@ -3071,7 +3123,10 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args); // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, - // tt, tt->task_data.shareds); + // tt, + // For taskloops: + // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, + // tt->task_data.shareds); auto *GtidParam = CGF.EmitLoadOfScalar( CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); LValue TDBase = CGF.EmitLoadOfPointerLValue( @@ -3098,16 +3153,37 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( PrivatesLVal.getPointer(), CGF.VoidPtrTy); - } else { + } else PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); - } - llvm::Value *CallArgs[] = {GtidParam, PartidParam, PrivatesParam, - TaskPrivatesMap, - CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( - TDBase.getAddress(), CGF.VoidPtrTy) - .getPointer(), - SharedsParam}; + llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, + TaskPrivatesMap, + CGF.Builder + .CreatePointerBitCastOrAddrSpaceCast( + TDBase.getAddress(), CGF.VoidPtrTy) + .getPointer()}; + SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), + std::end(CommonArgs)); + if (isOpenMPTaskLoopDirective(Kind)) { + auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); + auto LBLVal = CGF.EmitLValueForField(Base, *LBFI); + auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal(); + auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); + auto UBLVal = CGF.EmitLValueForField(Base, *UBFI); + auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal(); + auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); + auto StLVal = CGF.EmitLValueForField(Base, *StFI); + auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal(); + auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); + auto LILVal = CGF.EmitLValueForField(Base, *LIFI); + auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal(); + CallArgs.push_back(LBParam); + CallArgs.push_back(UBParam); + CallArgs.push_back(StParam); + CallArgs.push_back(LIParam); + } + CallArgs.push_back(SharedsParam); + CGF.EmitCallOrInvoke(TaskFunction, CallArgs); CGF.EmitStoreThroughLValue( RValue::get(CGF.Builder.getInt32(/*C=*/0)), @@ -3244,20 +3320,17 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1, return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0); } -void CGOpenMPRuntime::emitTaskCall( +CGOpenMPRuntime::TaskDataTy CGOpenMPRuntime::emitTaskInit( CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, - Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, + Address Shareds, ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> PrivateCopies, ArrayRef<const Expr *> FirstprivateVars, ArrayRef<const Expr *> FirstprivateCopies, - ArrayRef<const Expr *> FirstprivateInits, - ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { - if (!CGF.HaveInsertPoint()) - return; + ArrayRef<const Expr *> FirstprivateInits) { auto &C = CGM.getContext(); - llvm::SmallVector<PrivateDataTy, 8> Privates; + llvm::SmallVector<PrivateDataTy, 4> Privates; // Aggregate privates and sort them by the alignment. auto I = PrivateCopies.begin(); for (auto *E : PrivateVars) { @@ -3287,8 +3360,8 @@ void CGOpenMPRuntime::emitTaskCall( emitKmpRoutineEntryT(KmpInt32Ty); // Build type kmp_task_t (if not built yet). if (KmpTaskTQTy.isNull()) { - KmpTaskTQTy = C.getRecordType( - createKmpTaskTRecordDecl(CGM, KmpInt32Ty, KmpRoutineEntryPtrQTy)); + KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( + CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); } auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); // Build particular struct kmp_task_t for the given task. @@ -3321,8 +3394,9 @@ void CGOpenMPRuntime::emitTaskCall( // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, // kmp_task_t *tt); auto *TaskEntry = emitProxyTaskFunction( - CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTy, - KmpTaskTQTy, SharedsPtrTy, TaskFunction, TaskPrivatesMap); + CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, + KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, + TaskPrivatesMap); // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, @@ -3454,7 +3528,38 @@ void CGOpenMPRuntime::emitTaskCall( CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( DestructorFn, KmpRoutineEntryPtrTy), Destructor); + TaskDataTy Data; + Data.NewTask = NewTask; + Data.TaskEntry = TaskEntry; + Data.NewTaskNewTaskTTy = NewTaskNewTaskTTy; + Data.TDBase = TDBase; + Data.KmpTaskTQTyRD = KmpTaskTQTyRD; + return Data; +} +void CGOpenMPRuntime::emitTaskCall( + CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, + bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, + unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, + ArrayRef<const Expr *> PrivateCopies, + ArrayRef<const Expr *> FirstprivateVars, + ArrayRef<const Expr *> FirstprivateCopies, + ArrayRef<const Expr *> FirstprivateInits, + ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences) { + if (!CGF.HaveInsertPoint()) + return; + + TaskDataTy Data = + emitTaskInit(CGF, Loc, D, Tied, Final, NumberOfParts, TaskFunction, + SharedsTy, Shareds, PrivateVars, PrivateCopies, + FirstprivateVars, FirstprivateCopies, FirstprivateInits); + llvm::Value *NewTask = Data.NewTask; + llvm::Value *TaskEntry = Data.TaskEntry; + llvm::Value *NewTaskNewTaskTTy = Data.NewTaskNewTaskTTy; + LValue TDBase = Data.TDBase; + RecordDecl *KmpTaskTQTyRD = Data.KmpTaskTQTyRD; + auto &C = CGM.getContext(); // Process list of dependences. Address DependenciesArray = Address::invalid(); unsigned NumDependencies = Dependences.size(); @@ -3629,6 +3734,71 @@ void CGOpenMPRuntime::emitTaskCall( } } +void CGOpenMPRuntime::emitTaskLoopCall( + CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, + bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, bool Nogroup, + unsigned NumberOfParts, llvm::Value *TaskFunction, QualType SharedsTy, + Address Shareds, const Expr *IfCond, ArrayRef<const Expr *> PrivateVars, + ArrayRef<const Expr *> PrivateCopies, + ArrayRef<const Expr *> FirstprivateVars, + ArrayRef<const Expr *> FirstprivateCopies, + ArrayRef<const Expr *> FirstprivateInits) { + if (!CGF.HaveInsertPoint()) + return; + TaskDataTy Data = + emitTaskInit(CGF, Loc, D, Tied, Final, NumberOfParts, TaskFunction, + SharedsTy, Shareds, PrivateVars, PrivateCopies, + FirstprivateVars, FirstprivateCopies, FirstprivateInits); + // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc() + // libcall. + // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int + // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int + // sched, kmp_uint64 grainsize, void *task_dup); + llvm::Value *ThreadID = getThreadID(CGF, Loc); + llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); + llvm::Value *IfVal; + if (IfCond) { + IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, + /*isSigned=*/true); + } else + IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); + + LValue LBLVal = CGF.EmitLValueForField( + Data.TDBase, + *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); + auto *LBVar = + cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(), + /*IsInitializer=*/true); + LValue UBLVal = CGF.EmitLValueForField( + Data.TDBase, + *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); + auto *UBVar = + cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); + CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(), + /*IsInitializer=*/true); + LValue StLVal = CGF.EmitLValueForField( + Data.TDBase, + *std::next(Data.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); + auto *StVar = + cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); + CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(), + /*IsInitializer=*/true); + llvm::Value *TaskArgs[] = { + UpLoc, + ThreadID, + Data.NewTask, + IfVal, + LBLVal.getPointer(), + UBLVal.getPointer(), + CGF.EmitLoadOfScalar(StLVal, SourceLocation()), + llvm::ConstantInt::getSigned(CGF.IntTy, Nogroup ? 1 : 0), + llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/0), + llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), + llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; + CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_taskloop), TaskArgs); +} + /// \brief Emit reduction operation for each element of array (required for /// array sections) LHS op = RHS. /// \param Type Type of array. diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index a7efd45fd45253177eb0a693877a6aed532b40a0..028e46dec7ac2af0322b1e2d847bb7a6268c4928 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H +#include "CGValue.h" #include "clang/AST/Type.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/SourceLocation.h" @@ -37,6 +38,7 @@ namespace clang { class Expr; class GlobalDecl; class OMPExecutableDirective; +class OMPLoopDirective; class VarDecl; class OMPDeclareReductionDecl; class IdentifierInfo; @@ -431,6 +433,64 @@ private: /// llvm::Value *getCriticalRegionLock(StringRef CriticalName); + struct TaskDataTy { + llvm::Value *NewTask; + llvm::Value *TaskEntry; + llvm::Value *NewTaskNewTaskTTy; + LValue TDBase; + RecordDecl *KmpTaskTQTyRD; + }; + /// Emit task region for the task directive. The task region is emitted in + /// several steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// \param D Current task directive. + /// \param Tied true if the task is tied (the task is tied to the thread that + /// can suspend its task region), false - untied (the task is not tied to any + /// thread). + /// \param Final Contains either constant bool value, or llvm::Value * of i1 + /// type for final clause. If the value is true, the task forces all of its + /// child tasks to become final and included tasks. + /// \param NumberOfParts Number of parts in untied tasks. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \p + /// TaskFunction. + /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr + /// otherwise. + /// \param PrivateVars List of references to private variables for the task + /// directive. + /// \param PrivateCopies List of private copies for each private variable in + /// \p PrivateVars. + /// \param FirstprivateVars List of references to private variables for the + /// task directive. + /// \param FirstprivateCopies List of private copies for each private variable + /// in \p FirstprivateVars. + /// \param FirstprivateInits List of references to auto generated variables + /// used for initialization of a single array element. Used if firstprivate + /// variable is of array type. + TaskDataTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, + const OMPExecutableDirective &D, bool Tied, + llvm::PointerIntPair<llvm::Value *, 1, bool> Final, + unsigned NumberOfParts, llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, + ArrayRef<const Expr *> PrivateVars, + ArrayRef<const Expr *> PrivateCopies, + ArrayRef<const Expr *> FirstprivateVars, + ArrayRef<const Expr *> FirstprivateCopies, + ArrayRef<const Expr *> FirstprivateInits); + public: explicit CGOpenMPRuntime(CodeGenModule &CGM); virtual ~CGOpenMPRuntime() {} @@ -774,6 +834,62 @@ public: ArrayRef<const Expr *> FirstprivateInits, ArrayRef<std::pair<OpenMPDependClauseKind, const Expr *>> Dependences); + /// Emit task region for the taskloop directive. The taskloop region is + /// emitted in several steps: + /// 1. Emit a call to kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 + /// gtid, kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, + /// kmp_routine_entry_t *task_entry). Here task_entry is a pointer to the + /// function: + /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { + /// TaskFunction(gtid, tt->part_id, tt->shareds); + /// return 0; + /// } + /// 2. Copy a list of shared variables to field shareds of the resulting + /// structure kmp_task_t returned by the previous call (if any). + /// 3. Copy a pointer to destructions function to field destructions of the + /// resulting structure kmp_task_t. + /// 4. Emit a call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t + /// *task, int if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int + /// nogroup, int sched, kmp_uint64 grainsize, void *task_dup ), where new_task + /// is a resulting structure from + /// previous items. + /// \param D Current task directive. + /// \param Tied true if the task is tied (the task is tied to the thread that + /// can suspend its task region), false - untied (the task is not tied to any + /// thread). + /// \param Final Contains either constant bool value, or llvm::Value * of i1 + /// type for final clause. If the value is true, the task forces all of its + /// child tasks to become final and included tasks. + /// \param Nogroup true if nogroup clause was specified, false otherwise. + /// \param NumberOfParts Number of parts in untied taskloops. + /// \param TaskFunction An LLVM function with type void (*)(i32 /*gtid*/, i32 + /// /*part_id*/, captured_struct */*__context*/); + /// \param SharedsTy A type which contains references the shared variables. + /// \param Shareds Context with the list of shared variables from the \p + /// TaskFunction. + /// \param IfCond Not a nullptr if 'if' clause was specified, nullptr + /// otherwise. + /// \param PrivateVars List of references to private variables for the task + /// directive. + /// \param PrivateCopies List of private copies for each private variable in + /// \p PrivateVars. + /// \param FirstprivateVars List of references to private variables for the + /// task directive. + /// \param FirstprivateCopies List of private copies for each private variable + /// in \p FirstprivateVars. + /// \param FirstprivateInits List of references to auto generated variables + /// used for initialization of a single array element. Used if firstprivate + /// variable is of array type. + virtual void emitTaskLoopCall( + CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, + bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final, + bool Nogroup, unsigned NumberOfParts, llvm::Value *TaskFunction, + QualType SharedsTy, Address Shareds, const Expr *IfCond, + ArrayRef<const Expr *> PrivateVars, ArrayRef<const Expr *> PrivateCopies, + ArrayRef<const Expr *> FirstprivateVars, + ArrayRef<const Expr *> FirstprivateCopies, + ArrayRef<const Expr *> FirstprivateInits); + /// \brief Emit code for the directive that does not require outlining. /// /// \param InnermostKind Kind of innermost directive (for simple directives it diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index c4f8311f2ea9604912c4ccf387eab95ec8db6d7c..4d9ecf0068a91914eaeae37001aac5885a92da0e 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -1630,8 +1630,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic, // IV < UB BoolCondVal = EvaluateExprAsBool(S.getCond()); } else { - BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, - IL, LB, UB, ST); + BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL, + LB, UB, ST); } // If there are any cleanups between here and the loop-exit scope, @@ -2280,10 +2280,12 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective( emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen); } -void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { +void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, + const RegionCodeGenTy &BodyGen, + const TaskGenTy &TaskGen, + bool Tied) { // Emit outlined function for task construct. auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); - auto CapturedStruct = GenerateCapturedStmtArgument(*CS); auto *I = CS->getCapturedDecl()->param_begin(); auto *PartId = std::next(I); auto *TaskT = std::next(I, 4); @@ -2291,52 +2293,44 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { // part id (0 for tied tasks, >=0 for untied task). llvm::DenseSet<const VarDecl *> EmittedAsPrivate; // Get list of private variables. - llvm::SmallVector<const Expr *, 8> PrivateVars; - llvm::SmallVector<const Expr *, 8> PrivateCopies; + OMPPrivateDataTy Data; + Data.Tied = Tied; for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { auto IRef = C->varlist_begin(); for (auto *IInit : C->private_copies()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - PrivateVars.push_back(*IRef); - PrivateCopies.push_back(IInit); + Data.PrivateVars.push_back(*IRef); + Data.PrivateCopies.push_back(IInit); } ++IRef; } } EmittedAsPrivate.clear(); // Get list of firstprivate variables. - llvm::SmallVector<const Expr *, 8> FirstprivateVars; - llvm::SmallVector<const Expr *, 8> FirstprivateCopies; - llvm::SmallVector<const Expr *, 8> FirstprivateInits; for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { auto IRef = C->varlist_begin(); auto IElemInitRef = C->inits().begin(); for (auto *IInit : C->private_copies()) { auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { - FirstprivateVars.push_back(*IRef); - FirstprivateCopies.push_back(IInit); - FirstprivateInits.push_back(*IElemInitRef); + Data.FirstprivateVars.push_back(*IRef); + Data.FirstprivateCopies.push_back(IInit); + Data.FirstprivateInits.push_back(*IElemInitRef); } ++IRef; ++IElemInitRef; } } // Build list of dependences. - llvm::SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 8> - Dependences; - for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { - for (auto *IRef : C->varlists()) { - Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); - } - } - auto &&CodeGen = [&S, &PrivateVars, &FirstprivateVars]( - CodeGenFunction &CGF, PrePostActionTy &Action) { - OMPPrivateScope Scope(CGF); + for (const auto *C : S.getClausesOfKind<OMPDependClause>()) + for (auto *IRef : C->varlists()) + Data.Dependences.push_back(std::make_pair(C->getDependencyKind(), IRef)); + auto &&CodeGen = [PartId, &S, &Data, CS, &BodyGen](CodeGenFunction &CGF, + PrePostActionTy &Action) { // Set proper addresses for generated private copies. - auto *CS = cast<CapturedStmt>(S.getAssociatedStmt()); - if (!PrivateVars.empty() || !FirstprivateVars.empty()) { + OMPPrivateScope Scope(CGF); + if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty()) { auto *CopyFn = CGF.Builder.CreateLoad( CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(3))); auto *PrivatesPtr = CGF.Builder.CreateLoad( @@ -2345,14 +2339,14 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; llvm::SmallVector<llvm::Value *, 16> CallArgs; CallArgs.push_back(PrivatesPtr); - for (auto *E : PrivateVars) { + for (auto *E : Data.PrivateVars) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp( CGF.getContext().getPointerType(E->getType()), ".priv.ptr.addr"); PrivatePtrs.push_back(std::make_pair(VD, PrivatePtr)); CallArgs.push_back(PrivatePtr.getPointer()); } - for (auto *E : FirstprivateVars) { + for (auto *E : Data.FirstprivateVars) { auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); Address PrivatePtr = CGF.CreateMemTemp(CGF.getContext().getPointerType(E->getType()), @@ -2370,13 +2364,21 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { (void)Scope.Privatize(); Action.Enter(CGF); - CGF.EmitStmt(CS->getCapturedStmt()); + BodyGen(CGF); }; + auto *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( + S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, + Data.NumberOfParts); + OMPLexicalScope Scope(*this, S); + TaskGen(*this, OutlinedFn, Data); +} + +void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { + // Emit outlined function for task construct. + auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); // Check if we should emit tied or untied task. bool Tied = !S.getSingleClause<OMPUntiedClause>(); - unsigned NumberOfParts; - auto OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( - S, *I, *PartId, *TaskT, OMPD_task, CodeGen, Tied, NumberOfParts); // Check if the task is final llvm::PointerIntPair<llvm::Value *, 1, bool> Final; if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { @@ -2401,11 +2403,20 @@ void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { break; } } - OMPLexicalScope Scope(*this, S); - CGM.getOpenMPRuntime().emitTaskCall( - *this, S.getLocStart(), S, Tied, Final, NumberOfParts, OutlinedFn, - SharedsTy, CapturedStruct, IfCond, PrivateVars, PrivateCopies, - FirstprivateVars, FirstprivateCopies, FirstprivateInits, Dependences); + + auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { + CGF.EmitStmt(CS->getCapturedStmt()); + }; + auto &&TaskGen = [&S, &Final, SharedsTy, CapturedStruct, + IfCond](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + const OMPPrivateDataTy &Data) { + CGF.CGM.getOpenMPRuntime().emitTaskCall( + CGF, S.getLocStart(), S, Data.Tied, Final, Data.NumberOfParts, + OutlinedFn, SharedsTy, CapturedStruct, IfCond, Data.PrivateVars, + Data.PrivateCopies, Data.FirstprivateVars, Data.FirstprivateCopies, + Data.FirstprivateInits, Data.Dependences); + }; + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, Tied); } void CodeGenFunction::EmitOMPTaskyieldDirective( @@ -3230,15 +3241,136 @@ void CodeGenFunction::EmitOMPTargetParallelForDirective( // TODO: codegen for target parallel for. } +/// Emit a helper variable and return corresponding lvalue. +static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, + const ImplicitParamDecl *PVD, + CodeGenFunction::OMPPrivateScope &Privates) { + auto *VDecl = cast<VarDecl>(Helper->getDecl()); + Privates.addPrivate( + VDecl, [&CGF, PVD]() -> Address { return CGF.GetAddrOfLocalVar(PVD); }); +} + +void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { + assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); + // Emit outlined function for task construct. + auto CS = cast<CapturedStmt>(S.getAssociatedStmt()); + auto CapturedStruct = GenerateCapturedStmtArgument(*CS); + auto SharedsTy = getContext().getRecordType(CS->getCapturedRecordDecl()); + const Expr *IfCond = nullptr; + for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { + if (C->getNameModifier() == OMPD_unknown || + C->getNameModifier() == OMPD_taskloop) { + IfCond = C->getCondition(); + break; + } + } + bool Nogroup = S.getSingleClause<OMPNogroupClause>(); + // TODO: Check if we should emit tied or untied task. + // Check if the task is final + llvm::PointerIntPair<llvm::Value *, 1, bool> Final; + if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { + // If the condition constant folds and can be elided, try to avoid emitting + // the condition and the dead arm of the if/else. + auto *Cond = Clause->getCondition(); + bool CondConstant; + if (ConstantFoldsToSimpleInteger(Cond, CondConstant)) + Final.setInt(CondConstant); + else + Final.setPointer(EvaluateExprAsBool(Cond)); + } else { + // By default the task is not final. + Final.setInt(/*IntVal=*/false); + } + + auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { + // if (PreCond) { + // for (IV in 0..LastIteration) BODY; + // <Final counter/linear vars updates>; + // } + // + + // Emit: if (PreCond) - begin. + // If the condition constant folds and can be elided, avoid emitting the + // whole loop. + bool CondConstant; + llvm::BasicBlock *ContBlock = nullptr; + OMPLoopScope PreInitScope(CGF, S); + if (CGF.ConstantFoldsToSimpleInteger(S.getPreCond(), CondConstant)) { + if (!CondConstant) + return; + } else { + auto *ThenBlock = CGF.createBasicBlock("taskloop.if.then"); + ContBlock = CGF.createBasicBlock("taskloop.if.end"); + emitPreCond(CGF, S, S.getPreCond(), ThenBlock, ContBlock, + CGF.getProfileCount(&S)); + CGF.EmitBlock(ThenBlock); + CGF.incrementProfileCounter(&S); + } + + OMPPrivateScope LoopScope(CGF); + // Emit helper vars inits. + enum { LowerBound = 5, UpperBound, Stride, LastIter }; + auto *I = CS->getCapturedDecl()->param_begin(); + auto *LBP = std::next(I, LowerBound); + auto *UBP = std::next(I, UpperBound); + auto *STP = std::next(I, Stride); + auto *LIP = std::next(I, LastIter); + mapParam(CGF, cast<DeclRefExpr>(S.getLowerBoundVariable()), *LBP, + LoopScope); + mapParam(CGF, cast<DeclRefExpr>(S.getUpperBoundVariable()), *UBP, + LoopScope); + mapParam(CGF, cast<DeclRefExpr>(S.getStrideVariable()), *STP, LoopScope); + mapParam(CGF, cast<DeclRefExpr>(S.getIsLastIterVariable()), *LIP, + LoopScope); + CGF.EmitOMPPrivateLoopCounters(S, LoopScope); + (void)LoopScope.Privatize(); + // Emit the loop iteration variable. + const Expr *IVExpr = S.getIterationVariable(); + const VarDecl *IVDecl = cast<VarDecl>(cast<DeclRefExpr>(IVExpr)->getDecl()); + CGF.EmitVarDecl(*IVDecl); + CGF.EmitIgnoredExpr(S.getInit()); + + // Emit the iterations count variable. + // If it is not a variable, Sema decided to calculate iterations count on + // each iteration (e.g., it is foldable into a constant). + if (auto LIExpr = dyn_cast<DeclRefExpr>(S.getLastIteration())) { + CGF.EmitVarDecl(*cast<VarDecl>(LIExpr->getDecl())); + // Emit calculation of the iterations count. + CGF.EmitIgnoredExpr(S.getCalcLastIteration()); + } + + CGF.EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), + S.getInc(), + [&S](CodeGenFunction &CGF) { + CGF.EmitOMPLoopBody(S, JumpDest()); + CGF.EmitStopPoint(&S); + }, + [](CodeGenFunction &) {}); + // Emit: if (PreCond) - end. + if (ContBlock) { + CGF.EmitBranch(ContBlock); + CGF.EmitBlock(ContBlock, true); + } + }; + auto &&TaskGen = [&S, SharedsTy, CapturedStruct, IfCond, &Final, + Nogroup](CodeGenFunction &CGF, llvm::Value *OutlinedFn, + const OMPPrivateDataTy &Data) { + auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &) { + OMPLoopScope PreInitScope(CGF, S); + CGF.CGM.getOpenMPRuntime().emitTaskLoopCall( + CGF, S.getLocStart(), S, Data.Tied, Final, Nogroup, + Data.NumberOfParts, OutlinedFn, SharedsTy, CapturedStruct, IfCond, + Data.PrivateVars, Data.PrivateCopies, Data.FirstprivateVars, + Data.FirstprivateCopies, Data.FirstprivateInits); + }; + CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, + CodeGen); + }; + EmitOMPTaskBasedDirective(S, BodyGen, TaskGen, /*Tied=*/true); +} + void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { - // emit the code inside the construct for now - OMPLexicalScope Scope(*this, S); - CGM.getOpenMPRuntime().emitInlinedDirective( - *this, OMPD_taskloop, [&S](CodeGenFunction &CGF, PrePostActionTy &) { - OMPLoopScope PreInitScope(CGF, S); - CGF.EmitStmt( - cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt()); - }); + EmitOMPTaskLoopBasedDirective(S); } void CodeGenFunction::EmitOMPTaskLoopSimdDirective( diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 17b7fcaa255afea3d29d2508e29ad150477f802c..148a9453f6cfbf014462d9232edfd0c467242c4b 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -85,6 +85,7 @@ class BlockByrefHelpers; class BlockByrefInfo; class BlockFlags; class BlockFieldFlags; +class RegionCodeGenTy; class TargetCodeGenInfo; /// The kind of evaluation to perform on values of a particular @@ -2340,6 +2341,24 @@ public: /// \param D Directive (possibly) with the 'linear' clause. void EmitOMPLinearClauseInit(const OMPLoopDirective &D); + struct OMPPrivateDataTy { + bool Tied; + unsigned NumberOfParts; + SmallVector<const Expr *, 4> PrivateVars; + SmallVector<const Expr *, 4> PrivateCopies; + SmallVector<const Expr *, 4> FirstprivateVars; + SmallVector<const Expr *, 4> FirstprivateCopies; + SmallVector<const Expr *, 4> FirstprivateInits; + SmallVector<std::pair<OpenMPDependClauseKind, const Expr *>, 4> Dependences; + }; + typedef const llvm::function_ref<void(CodeGenFunction & /*CGF*/, + llvm::Value * /*OutlinedFn*/, + const OMPPrivateDataTy & /*Data*/)> + TaskGenTy; + void EmitOMPTaskBasedDirective(const OMPExecutableDirective &S, + const RegionCodeGenTy &BodyGen, + const TaskGenTy &TaskGen, bool Tied); + void EmitOMPParallelDirective(const OMPParallelDirective &S); void EmitOMPSimdDirective(const OMPSimdDirective &S); void EmitOMPForDirective(const OMPForDirective &S); @@ -2371,6 +2390,7 @@ public: void EmitOMPCancellationPointDirective(const OMPCancellationPointDirective &S); void EmitOMPCancelDirective(const OMPCancelDirective &S); + void EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S); void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S); void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S); void EmitOMPDistributeDirective(const OMPDistributeDirective &S); diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 97647c3f99c2d8a23adbe7c5411b2c9359a017cd..d2c08ca97288bd72b2b2a41a8bdf04f2db258320 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -1675,11 +1675,37 @@ void Sema::ActOnOpenMPRegionStart(OpenMPDirectiveKind DKind, Scope *CurScope) { break; } case OMPD_taskloop: { + QualType KmpInt32Ty = + Context.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); + QualType KmpUInt64Ty = + Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); + QualType KmpInt64Ty = + Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); + QualType Args[] = {Context.VoidPtrTy.withConst().withRestrict()}; + FunctionProtoType::ExtProtoInfo EPI; + EPI.Variadic = true; + QualType CopyFnType = Context.getFunctionType(Context.VoidTy, Args, EPI); Sema::CapturedParamNameType Params[] = { + std::make_pair(".global_tid.", KmpInt32Ty), + std::make_pair(".part_id.", Context.getPointerType(KmpInt32Ty)), + std::make_pair(".privates.", + Context.VoidPtrTy.withConst().withRestrict()), + std::make_pair( + ".copy_fn.", + Context.getPointerType(CopyFnType).withConst().withRestrict()), + std::make_pair(".task_t.", Context.VoidPtrTy.withConst()), + std::make_pair(".lb.", KmpUInt64Ty), + std::make_pair(".ub.", KmpUInt64Ty), std::make_pair(".st.", KmpInt64Ty), + std::make_pair(".liter.", KmpInt32Ty), std::make_pair(StringRef(), QualType()) // __context with shared vars }; ActOnCapturedRegionStart(DSAStack->getConstructLoc(), CurScope, CR_OpenMP, Params); + // Mark this captured region as inlined, because we don't use outlined + // function directly. + getCurCapturedRegion()->TheCapturedDecl->addAttr( + AlwaysInlineAttr::CreateImplicit( + Context, AlwaysInlineAttr::Keyword_forceinline, SourceRange())); break; } case OMPD_taskloop_simd: { @@ -4614,6 +4640,15 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, LastIteration32.get()->getType()->hasSignedIntegerRepresentation(), LastIteration64.get(), SemaRef))) LastIteration = LastIteration32; + QualType VType = LastIteration.get()->getType(); + QualType RealVType = VType; + QualType StrideVType = VType; + if (isOpenMPTaskLoopDirective(DKind)) { + VType = + SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); + StrideVType = + SemaRef.Context.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); + } if (!LastIteration.isUsable()) return 0; @@ -4649,7 +4684,6 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, SourceLocation InitLoc = IterSpaces[0].InitSrcRange.getBegin(); - QualType VType = LastIteration.get()->getType(); // Build variables passed into runtime, nesessary for worksharing directives. ExprResult LB, UB, IL, ST, EUB; if (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || @@ -4678,8 +4712,9 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, /*DirectInit*/ false, /*TypeMayContainAuto*/ false); // Stride variable returned by runtime (we initialize it to 1 by default). - VarDecl *STDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.stride"); - ST = buildDeclRefExpr(SemaRef, STDecl, VType, InitLoc); + VarDecl *STDecl = + buildVarDecl(SemaRef, InitLoc, StrideVType, ".omp.stride"); + ST = buildDeclRefExpr(SemaRef, STDecl, StrideVType, InitLoc); SemaRef.AddInitializerToDecl( STDecl, SemaRef.ActOnIntegerConstant(InitLoc, 1).get(), /*DirectInit*/ false, /*TypeMayContainAuto*/ false); @@ -4699,8 +4734,8 @@ CheckOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr, ExprResult IV; ExprResult Init; { - VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, VType, ".omp.iv"); - IV = buildDeclRefExpr(SemaRef, IVDecl, VType, InitLoc); + VarDecl *IVDecl = buildVarDecl(SemaRef, InitLoc, RealVType, ".omp.iv"); + IV = buildDeclRefExpr(SemaRef, IVDecl, RealVType, InitLoc); Expr *RHS = (isOpenMPWorksharingDirective(DKind) || isOpenMPTaskLoopDirective(DKind) || isOpenMPDistributeDirective(DKind)) diff --git a/test/OpenMP/taskloop_codegen.cpp b/test/OpenMP/taskloop_codegen.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f3ae1b122089302cb4cee9f8e95b69e4fd7f5089 --- /dev/null +++ b/test/OpenMP/taskloop_codegen.cpp @@ -0,0 +1,198 @@ +// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp -x c++ -emit-llvm %s -o - -femit-all-decls | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -x c++ -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - -femit-all-decls | FileCheck %s +// expected-no-diagnostics +// REQUIRES: x86-registered-target +#ifndef HEADER +#define HEADER + +// CHECK-LABEL: @main +int main(int argc, char **argv) { +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK1:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}} +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 9, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) +#pragma omp taskloop + for (int i = 0; i < 10; ++i) + ; +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK2:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}} +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 9, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 1, i32 0, i64 0, i8* null) +#pragma omp taskloop nogroup + for (int i = 0; i < 10; ++i) + ; +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 24, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK3:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}} +// CHECK: [[IF:%.+]] = icmp ne i32 %{{.+}}, 0 +// CHECK: [[IF_INT:%.+]] = sext i1 [[IF]] to i32 +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 %{{.+}}, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 [[IF_INT]], i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) + int i; +#pragma omp taskloop if(argc) shared(argc, argv) collapse(2) + for (i = 0; i < argc; ++i) + for (int j = argc; j < argv[argc][argc]; ++j) + ; +} + +// CHECK: define internal i32 [[TASK1]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 +// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], +// CHECK: br label +// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], +// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] +// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} +// CHECK: load i32, i32* % +// CHECK: store i32 % +// CHECK: load i32, i32* % +// CHECK: add nsw i32 %{{.+}}, 1 +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label % +// CHECK: ret i32 0 + +// CHECK: define internal i32 [[TASK2]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 +// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], +// CHECK: br label +// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], +// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] +// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} +// CHECK: load i32, i32* % +// CHECK: store i32 % +// CHECK: load i32, i32* % +// CHECK: add nsw i32 %{{.+}}, 1 +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label % +// CHECK: ret i32 0 + +// CHECK: define internal i32 [[TASK3]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: store i64 [[LB_VAL]], i64* [[CNT:%.+]], +// CHECK: br label +// CHECK: ret i32 0 + +// CHECK-LABEL: @_ZN1SC2Ei +struct S { + int a; + S(int c) { +// CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num(%ident_t* [[DEFLOC:@.+]]) +// CHECK: [[TASKV:%.+]] = call i8* @__kmpc_omp_task_alloc(%ident_t* [[DEFLOC]], i32 [[GTID]], i32 1, i64 64, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[TDP_TY:%.+]]*)* [[TASK4:@.+]] to i32 (i32, i8*)*)) +// CHECK: [[TASK:%.+]] = bitcast i8* [[TASKV]] to [[TDP_TY]]* +// CHECK: [[TASK_DATA:%.+]] = getelementptr inbounds [[TDP_TY]], [[TDP_TY]]* [[TASK]], i32 0, i32 0 +// CHECK: getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 3 +// CHECK: store i32 (i32, i8*)* null, i32 (i32, i8*)** %{{.+}} +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 4 +// CHECK: store i64 0, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 5 +// CHECK: store i64 %{{.+}}, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* [[TASK_DATA]], i32 0, i32 6 +// CHECK: store i64 1, i64* [[ST]], +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: call void @__kmpc_taskloop(%ident_t* [[DEFLOC]], i32 [[GTID]], i8* [[TASKV]], i32 1, i64* [[DOWN]], i64* [[UP]], i64 [[ST_VAL]], i32 0, i32 0, i64 0, i8* null) +#pragma omp taskloop shared(c) + for (a = 0; a < c; ++a) + ; + } +} s(1); + +// CHECK: define internal i32 [[TASK4]]( +// CHECK: [[DOWN:%.+]] = getelementptr inbounds [[TD_TY:%.+]], [[TD_TY]]* %{{.+}}, i32 0, i32 4 +// CHECK: [[DOWN_VAL:%.+]] = load i64, i64* [[DOWN]], +// CHECK: [[UP:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 5 +// CHECK: [[UP_VAL:%.+]] = load i64, i64* [[UP]], +// CHECK: [[ST:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 6 +// CHECK: [[ST_VAL:%.+]] = load i64, i64* [[ST]], +// CHECK: [[LITER:%.+]] = getelementptr inbounds [[TD_TY]], [[TD_TY]]* %{{.+}}, i32 0, i32 7 +// CHECK: [[LITER_VAL:%.+]] = load i32, i32* [[LITER]], +// CHECK: store i64 [[DOWN_VAL]], i64* [[LB:%[^,]+]], +// CHECK: store i64 [[UP_VAL]], i64* [[UB:%[^,]+]], +// CHECK: store i64 [[ST_VAL]], i64* [[ST:%[^,]+]], +// CHECK: store i32 [[LITER_VAL]], i32* [[LITER:%[^,]+]], +// CHECK: [[LB_VAL:%.+]] = load i64, i64* [[LB]], +// CHECK: [[LB_I32:%.+]] = trunc i64 [[LB_VAL]] to i32 +// CHECK: store i32 [[LB_I32]], i32* [[CNT:%.+]], +// CHECK: br label +// CHECK: [[VAL:%.+]] = load i32, i32* [[CNT]], +// CHECK: [[VAL_I64:%.+]] = sext i32 [[VAL]] to i64 +// CHECK: [[UB_VAL:%.+]] = load i64, i64* [[UB]], +// CHECK: [[CMP:%.+]] = icmp ule i64 [[VAL_I64]], [[UB_VAL]] +// CHECK: br i1 [[CMP]], label %{{.+}}, label %{{.+}} +// CHECK: load i32, i32* % +// CHECK: store i32 % +// CHECK: load i32, i32* % +// CHECK: add nsw i32 %{{.+}}, 1 +// CHECK: store i32 %{{.+}}, i32* % +// CHECK: br label % +// CHECK: ret i32 0 + +#endif