//===- SPIRVToOCL20.cpp - Transform SPIR-V builtins to OCL20 builtins-------===// // // The LLVM/SPIRV Translator // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // // Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a // copy of this software and associated documentation files (the "Software"), // to deal with the Software without restriction, including without limitation // the rights to use, copy, modify, merge, publish, distribute, sublicense, // and/or sell copies of the Software, and to permit persons to whom the // Software is furnished to do so, subject to the following conditions: // // Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimers. // Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimers in the documentation // and/or other materials provided with the distribution. // Neither the names of Advanced Micro Devices, Inc., nor the names of its // contributors may be used to endorse or promote products derived from this // Software without specific prior written permission. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE // CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH // THE SOFTWARE. // //===----------------------------------------------------------------------===// // // This file implements transform SPIR-V builtins to OCL 2.0 builtins. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "spvtocl20" #include "SPIRVInternal.h" #include "OCLUtil.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/IR/InstVisitor.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/PassSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include <cstring> using namespace llvm; using namespace SPIRV; using namespace OCLUtil; namespace SPIRV { static cl::opt<std::string> MangledAtomicTypeNamePrefix("spirv-atomic-prefix", cl::desc("Mangled atomic type name prefix"), cl::init("U7_Atomic")); class SPIRVToOCL20: public ModulePass, public InstVisitor<SPIRVToOCL20> { public: SPIRVToOCL20():ModulePass(ID), M(nullptr), Ctx(nullptr) { initializeSPIRVToOCL20Pass(*PassRegistry::getPassRegistry()); } virtual bool runOnModule(Module &M); void visitCallInst(CallInst &CI); // SPIR-V reader should translate vector casts into OCL built-ins because // such conversions are not defined neither by OpenCL C/C++ nor // by SPIR 1.2/2.0 standards. So, it is safer to convert such casts into // appropriate calls to conversion built-ins defined by the standards. void visitCastInst(CastInst &CI); /// Transform __spirv_ImageQuerySize[Lod] into vector of the same lenght /// containing {[get_image_width | get_image_dim], get_image_array_size} /// for all images except image1d_t which is always converted into /// get_image_width returning scalar result. void visitCallSPRIVImageQuerySize(CallInst *CI); /// Transform __spirv_Atomic* to atomic_*. /// __spirv_Atomic*(atomic_op, scope, sema, ops, ...) => /// atomic_*(atomic_op, ops, ..., order(sema), map(scope)) void visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC); /// Transform __spirv_Group* to {work_group|sub_group}_*. /// /// Special handling of work_group_broadcast. /// __spirv_GroupBroadcast(a, vec3(x, y, z)) /// => /// work_group_broadcast(a, x, y, z) /// /// Transform OpenCL group builtin function names from group_ /// to workgroup_ and sub_group_. /// Insert group operation part: reduce_/inclusive_scan_/exclusive_scan_ /// Transform the operation part: /// fadd/iadd/sadd => add /// fmax/smax => max /// fmin/smin => min /// Keep umax/umin unchanged. void visitCallSPIRVGroupBuiltin(CallInst *CI, Op OC); /// Transform __spirv_MemoryBarrier to atomic_work_item_fence. /// __spirv_MemoryBarrier(scope, sema) => /// atomic_work_item_fence(flag(sema), order(sema), map(scope)) void visitCallSPIRVMemoryBarrier(CallInst *CI); /// Transform __spirv_{PipeOpName} to OCL pipe builtin functions. void visitCallSPIRVPipeBuiltin(CallInst *CI, Op OC); /// Transform __spirv_* builtins to OCL 2.0 builtins. /// No change with arguments. void visitCallSPIRVBuiltin(CallInst *CI, Op OC); /// Translate mangled atomic type name: "atomic_" => /// MangledAtomicTypeNamePrefix void translateMangledAtomicTypeName(); /// Get prefix work_/sub_ for OCL group builtin functions. /// Assuming the first argument of \param CI is a constant integer for /// workgroup/subgroup scope enums. std::string getGroupBuiltinPrefix(CallInst *CI); static char ID; private: Module *M; LLVMContext *Ctx; }; char SPIRVToOCL20::ID = 0; bool SPIRVToOCL20::runOnModule(Module& Module) { M = &Module; Ctx = &M->getContext(); visit(*M); translateMangledAtomicTypeName(); eraseUselessFunctions(&Module); DEBUG(dbgs() << "After SPIRVToOCL20:\n" << *M); std::string Err; raw_string_ostream ErrorOS(Err); if (verifyModule(*M, &ErrorOS)){ DEBUG(errs() << "Fails to verify module: " << ErrorOS.str()); } return true; } void SPIRVToOCL20::visitCallInst(CallInst& CI) { DEBUG(dbgs() << "[visistCallInst] " << CI << '\n'); auto F = CI.getCalledFunction(); if (!F) return; auto MangledName = F->getName(); std::string DemangledName; Op OC = OpNop; if (!oclIsBuiltin(MangledName, &DemangledName) || (OC = getSPIRVFuncOC(DemangledName)) == OpNop) return; DEBUG(dbgs() << "DemangledName = " << DemangledName.c_str() << '\n' << "OpCode = " << OC << '\n'); if (OC == OpImageQuerySize || OC == OpImageQuerySizeLod) { visitCallSPRIVImageQuerySize(&CI); return; } if (OC == OpMemoryBarrier) { visitCallSPIRVMemoryBarrier(&CI); return; } if (isAtomicOpCode(OC)) { visitCallSPIRVAtomicBuiltin(&CI, OC); return; } if (isGroupOpCode(OC)) { visitCallSPIRVGroupBuiltin(&CI, OC); return; } if (isPipeOpCode(OC)) { visitCallSPIRVPipeBuiltin(&CI, OC); return; } if (OCLSPIRVBuiltinMap::rfind(OC)) visitCallSPIRVBuiltin(&CI, OC); } void SPIRVToOCL20::visitCallSPIRVMemoryBarrier(CallInst* CI) { AttributeSet Attrs = CI->getCalledFunction()->getAttributes(); mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args){ auto getArg = [=](unsigned I){ return cast<ConstantInt>(Args[I])->getZExtValue(); }; auto MScope = static_cast<Scope>(getArg(0)); auto Sema = mapSPIRVMemSemanticToOCL(getArg(1)); Args.resize(3); Args[0] = getInt32(M, Sema.first); Args[1] = getInt32(M, Sema.second); Args[2] = getInt32(M, rmap<OCLScopeKind>(MScope)); return kOCLBuiltinName::AtomicWorkItemFence; }, &Attrs); } void SPIRVToOCL20::visitCallSPRIVImageQuerySize(CallInst *CI) { Function * func = CI->getCalledFunction(); // Get image type Type * argTy = func->getFunctionType()->getParamType(0); assert(argTy->isPointerTy() && "argument must be a pointer to opaque structure"); StructType * imgTy = cast<StructType>(argTy->getPointerElementType()); assert(imgTy->isOpaque() && "image type must be an opaque structure"); StringRef imgTyName = imgTy->getName(); assert(imgTyName.startswith("opencl.image") && "not an OCL image type"); unsigned imgDim = 0; bool imgArray = false; if (imgTyName.startswith("opencl.image1d")) { imgDim = 1; } else if (imgTyName.startswith("opencl.image2d")) { imgDim = 2; } else if (imgTyName.startswith("opencl.image3d")) { imgDim = 3; } assert(imgDim != 0 && "unexpected image dimensionality"); if (imgTyName.count("_array_") != 0) { imgArray = true; } AttributeSet attributes = CI->getCalledFunction()->getAttributes(); BuiltinFuncMangleInfo mangle; Type * int32Ty = Type::getInt32Ty(*Ctx); Instruction * getImageSize = nullptr; if (imgDim == 1) { // OpImageQuerySize from non-arrayed 1d image is always translated // into get_image_width returning scalar argument getImageSize = addCallInst(M, kOCLBuiltinName::GetImageWidth, int32Ty, CI->getArgOperand(0), &attributes, CI, &mangle, CI->getName(), false); // The width of integer type returning by OpImageQuerySize[Lod] may // differ from i32 if (CI->getType()->getScalarType() != int32Ty) { getImageSize = CastInst::CreateIntegerCast(getImageSize, CI->getType()->getScalarType(), false, CI->getName(), CI); } } else { assert((imgDim == 2 || imgDim == 3) && "invalid image type"); assert(CI->getType()->isVectorTy() && "this code can handle vector result type only"); // get_image_dim returns int2 and int4 for 2d and 3d images respecitvely. const unsigned imgDimRetEls = imgDim == 2 ? 2 : 4; VectorType * retTy = VectorType::get(int32Ty, imgDimRetEls); getImageSize = addCallInst(M, kOCLBuiltinName::GetImageDim, retTy, CI->getArgOperand(0), &attributes, CI, &mangle, CI->getName(), false); // The width of integer type returning by OpImageQuerySize[Lod] may // differ from i32 if (CI->getType()->getScalarType() != int32Ty) { getImageSize = CastInst::CreateIntegerCast(getImageSize, VectorType::get(CI->getType()->getScalarType(), getImageSize->getType()->getVectorNumElements()), false, CI->getName(), CI); } } if (imgArray || imgDim == 3) { assert(CI->getType()->isVectorTy() && "OpImageQuerySize[Lod] must return vector for arrayed and 3d images"); const unsigned imgQuerySizeRetEls = CI->getType()->getVectorNumElements(); if (imgDim == 1) { // get_image_width returns scalar result while OpImageQuerySize // for image1d_array_t returns <2 x i32> vector. assert(imgQuerySizeRetEls == 2 && "OpImageQuerySize[Lod] must return <2 x iN> vector type"); getImageSize = InsertElementInst::Create(UndefValue::get(CI->getType()), getImageSize, ConstantInt::get(int32Ty, 0), CI->getName(), CI); } else { // get_image_dim and OpImageQuerySize returns different vector // types for arrayed and 3d images. SmallVector<Constant*, 4> maskEls; for(unsigned idx = 0; idx < imgQuerySizeRetEls; ++idx) maskEls.push_back(ConstantInt::get(int32Ty, idx)); Constant * mask = ConstantVector::get(maskEls); getImageSize = new ShuffleVectorInst(getImageSize, UndefValue::get(getImageSize->getType()), mask, CI->getName(), CI); } } if (imgArray) { assert((imgDim == 1 || imgDim == 2) && "invalid image array type"); // Insert get_image_array_size to the last position of the resulting vector. Type * sizeTy = Type::getIntNTy(*Ctx, M->getDataLayout().getPointerSizeInBits(0)); Instruction * getImageArraySize = addCallInst(M, kOCLBuiltinName::GetImageArraySize, sizeTy, CI->getArgOperand(0), &attributes, CI, &mangle, CI->getName(), false); // The width of integer type returning by OpImageQuerySize[Lod] may // differ from size_t which is returned by get_image_array_size if (getImageArraySize->getType() != CI->getType()->getScalarType()) { getImageArraySize = CastInst::CreateIntegerCast(getImageArraySize, CI->getType()->getScalarType(), false, CI->getName(), CI); } getImageSize = InsertElementInst::Create(getImageSize, getImageArraySize, ConstantInt::get(int32Ty, CI->getType()->getVectorNumElements() - 1), CI->getName(), CI); } assert(getImageSize && "must not be null"); CI->replaceAllUsesWith(getImageSize); CI->eraseFromParent(); } void SPIRVToOCL20::visitCallSPIRVAtomicBuiltin(CallInst* CI, Op OC) { AttributeSet Attrs = CI->getCalledFunction()->getAttributes(); Instruction * pInsertBefore = CI; mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args, Type *& RetTy){ auto Ptr = findFirstPtr(Args); auto Name = OCLSPIRVBuiltinMap::rmap(OC); auto NumOrder = getAtomicBuiltinNumMemoryOrderArgs(Name); auto ScopeIdx = Ptr + 1; auto OrderIdx = Ptr + 2; if (OC == OpAtomicIIncrement || OC == OpAtomicIDecrement) { // Since OpenCL 1.2 atomic_inc and atomic_dec builtins don't have, memory // scope and memory order syntax, and OpenCL 2.0 doesn't have such // builtins, therefore we translate these instructions to // atomic_fetch_add_explicit and atomic_fetch_sub_explicit OpenCL 2.0 // builtins with "operand" argument = 1. Name = OCLSPIRVBuiltinMap::rmap(OC == OpAtomicIIncrement ? OpAtomicIAdd: OpAtomicISub); Type* ValueTy = cast<PointerType>(Args[Ptr]->getType())->getElementType(); assert(ValueTy->isIntegerTy()); Args.push_back(llvm::ConstantInt::get(ValueTy, 1)); } Args[ScopeIdx] = mapUInt(M, cast<ConstantInt>(Args[ScopeIdx]), [](unsigned I) { return rmap<OCLScopeKind>(static_cast<Scope>(I));}); for (size_t I = 0; I < NumOrder; ++I) Args[OrderIdx + I] = mapUInt(M, cast<ConstantInt>(Args[OrderIdx + I]), [](unsigned Ord) { return mapSPIRVMemOrderToOCL(Ord); }); std::swap(Args[ScopeIdx], Args.back()); if(OC == OpAtomicCompareExchange || OC == OpAtomicCompareExchangeWeak) { // OpAtomicCompareExchange[Weak] semantics is different from // atomic_compare_exchange_[strong|weak] semantics as well as // arguments order. // OCL built-ins returns boolean value and stores a new/original // value by pointer passed as 2nd argument (aka expected) while SPIR-V // instructions returns this new/original value as a resulting value. AllocaInst *pExpected = new AllocaInst(CI->getType(), "expected", static_cast<Instruction*>(pInsertBefore->getParent()->getParent()->getEntryBlock().getFirstInsertionPt())); pExpected->setAlignment(CI->getType()->getScalarSizeInBits() / 8); new StoreInst(Args[1], pExpected, pInsertBefore); Args[1] = pExpected; std::swap(Args[3], Args[4]); std::swap(Args[2], Args[3]); RetTy = Type::getInt1Ty(*Ctx); } return Name; }, [=](CallInst * CI) -> Instruction * { if(OC == OpAtomicCompareExchange || OC == OpAtomicCompareExchangeWeak) { // OCL built-ins atomic_compare_exchange_[strong|weak] return boolean value. So, // to obtain the same value as SPIR-V instruction is returning it has to be loaded // from the memory where 'expected' value is stored. This memory must contain the // needed value after a call to OCL built-in is completed. LoadInst * pOriginal = new LoadInst(CI->getArgOperand(1), "original", pInsertBefore); return pOriginal; } // For other built-ins the return values match. return CI; }, &Attrs); } void SPIRVToOCL20::visitCallSPIRVBuiltin(CallInst* CI, Op OC) { AttributeSet Attrs = CI->getCalledFunction()->getAttributes(); mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args){ return OCLSPIRVBuiltinMap::rmap(OC); }, &Attrs); } void SPIRVToOCL20::visitCallSPIRVGroupBuiltin(CallInst* CI, Op OC) { auto DemangledName = OCLSPIRVBuiltinMap::rmap(OC); assert(DemangledName.find(kSPIRVName::GroupPrefix) == 0); std::string Prefix = getGroupBuiltinPrefix(CI); bool HasGroupOperation = hasGroupOperation(OC); if (!HasGroupOperation) { DemangledName = Prefix + DemangledName; } else { auto GO = getArgAs<spv::GroupOperation>(CI, 1); StringRef Op = DemangledName; Op = Op.drop_front(strlen(kSPIRVName::GroupPrefix)); bool Unsigned = Op.front() == 'u'; if (!Unsigned) Op = Op.drop_front(1); DemangledName = Prefix + kSPIRVName::GroupPrefix + SPIRSPIRVGroupOperationMap::rmap(GO) + '_' + Op.str(); } AttributeSet Attrs = CI->getCalledFunction()->getAttributes(); mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args){ Args.erase(Args.begin(), Args.begin() + (HasGroupOperation ? 2 : 1)); if (OC == OpGroupBroadcast) expandVector(CI, Args, 1); return DemangledName; }, &Attrs); } void SPIRVToOCL20::visitCallSPIRVPipeBuiltin(CallInst* CI, Op OC) { switch(OC) { case OpReservedReadPipe: OC = OpReadPipe; break; case OpReservedWritePipe: OC = OpWritePipe; break; default: // Do nothing. break; } auto DemangledName = OCLSPIRVBuiltinMap::rmap(OC); bool HasScope = DemangledName.find(kSPIRVName::GroupPrefix) == 0; if (HasScope) DemangledName = getGroupBuiltinPrefix(CI) + DemangledName; AttributeSet Attrs = CI->getCalledFunction()->getAttributes(); mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args){ if (HasScope) Args.erase(Args.begin(), Args.begin() + 1); if (!(OC == OpReadPipe || OC == OpWritePipe || OC == OpReservedReadPipe || OC == OpReservedWritePipe)) return DemangledName; auto &P = Args[Args.size() - 3]; auto T = P->getType(); assert(isa<PointerType>(T)); auto ET = T->getPointerElementType(); if (!ET->isIntegerTy(8) || T->getPointerAddressSpace() != SPIRAS_Generic) { auto NewTy = PointerType::getInt8PtrTy(*Ctx, SPIRAS_Generic); P = CastInst::CreatePointerBitCastOrAddrSpaceCast(P, NewTy, "", CI); } return DemangledName; }, &Attrs); } void SPIRVToOCL20::translateMangledAtomicTypeName() { for (auto &I:M->functions()) { if (!I.hasName()) continue; std::string MangledName = I.getName(); std::string DemangledName; if (!oclIsBuiltin(MangledName, &DemangledName) || DemangledName.find(kOCLBuiltinName::AtomPrefix) != 0) continue; auto Loc = MangledName.find(kOCLBuiltinName::AtomPrefix); Loc = MangledName.find(kMangledName::AtomicPrefixInternal, Loc); MangledName.replace(Loc, strlen(kMangledName::AtomicPrefixInternal), MangledAtomicTypeNamePrefix); I.setName(MangledName); } } std::string SPIRVToOCL20::getGroupBuiltinPrefix(CallInst* CI) { std::string Prefix; auto ES = getArgAsScope(CI, 0); switch(ES) { case ScopeWorkgroup: Prefix = kOCLBuiltinName::WorkPrefix; break; case ScopeSubgroup: Prefix = kOCLBuiltinName::SubPrefix; break; default: llvm_unreachable("Invalid execution scope"); } return Prefix; } void SPIRVToOCL20::visitCastInst(CastInst &Cast) { if(!isa<ZExtInst>(Cast) && !isa<SExtInst>(Cast) && !isa<TruncInst>(Cast) && !isa<FPTruncInst>(Cast) && !isa<FPExtInst>(Cast) && !isa<FPToUIInst>(Cast) && !isa<FPToSIInst>(Cast) && !isa<UIToFPInst>(Cast) && !isa<SIToFPInst>(Cast)) return; Type const* srcTy = Cast.getSrcTy(); Type * dstVecTy = Cast.getDestTy(); // Leave scalar casts as is. Skip boolean vector casts becase there // are no suitable OCL built-ins. if(!dstVecTy->isVectorTy() || srcTy->getScalarSizeInBits() == 1 || dstVecTy->getScalarSizeInBits() == 1) return; // Assemble built-in name -> convert_gentypeN std::string castBuiltInName(kOCLBuiltinName::ConvertPrefix); // Check if this is 'floating point -> unsigned integer' cast castBuiltInName += mapLLVMTypeToOCLType(dstVecTy, !isa<FPToUIInst>(Cast)); // Replace LLVM conversion instruction with call to conversion built-in BuiltinFuncMangleInfo mangle; // It does matter if the source is unsigned integer or not. SExt is for // signed source, ZExt and UIToFPInst are for unsigned source. if(isa<ZExtInst>(Cast) || isa<UIToFPInst>(Cast)) mangle.addUnsignedArg(0); AttributeSet attributes; CallInst *call = addCallInst(M, castBuiltInName, dstVecTy, Cast.getOperand(0), &attributes, &Cast, &mangle, Cast.getName(), false); Cast.replaceAllUsesWith(call); Cast.eraseFromParent(); } } // namespace SPIRV INITIALIZE_PASS(SPIRVToOCL20, "spvtoocl20", "Translate SPIR-V builtins to OCL 2.0 builtins", false, false) ModulePass *llvm::createSPIRVToOCL20() { return new SPIRVToOCL20(); }