//===- SPIRVToOCL20.cpp - Transform SPIR-V builtins to OCL20 builtins-------===//
//
// The LLVM/SPIRV Translator
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
// Copyright (c) 2014 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimers in the documentation
// and/or other materials provided with the distribution.
// Neither the names of Advanced Micro Devices, Inc., nor the names of its
// contributors may be used to endorse or promote products derived from this
// Software without specific prior written permission.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH
// THE SOFTWARE.
//
//===----------------------------------------------------------------------===//
//
// This file implements transform SPIR-V builtins to OCL 2.0 builtins.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "spvtocl20"
#include "SPIRVInternal.h"
#include "OCLUtil.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/PassSupport.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <cstring>
using namespace llvm;
using namespace SPIRV;
using namespace OCLUtil;
namespace SPIRV {
static cl::opt<std::string>
MangledAtomicTypeNamePrefix("spirv-atomic-prefix",
cl::desc("Mangled atomic type name prefix"), cl::init("U7_Atomic"));
class SPIRVToOCL20: public ModulePass,
public InstVisitor<SPIRVToOCL20> {
public:
SPIRVToOCL20():ModulePass(ID), M(nullptr), Ctx(nullptr) {
initializeSPIRVToOCL20Pass(*PassRegistry::getPassRegistry());
}
virtual bool runOnModule(Module &M);
void visitCallInst(CallInst &CI);
// SPIR-V reader should translate vector casts into OCL built-ins because
// such conversions are not defined neither by OpenCL C/C++ nor
// by SPIR 1.2/2.0 standards. So, it is safer to convert such casts into
// appropriate calls to conversion built-ins defined by the standards.
void visitCastInst(CastInst &CI);
/// Transform __spirv_ImageQuerySize[Lod] into vector of the same lenght
/// containing {[get_image_width | get_image_dim], get_image_array_size}
/// for all images except image1d_t which is always converted into
/// get_image_width returning scalar result.
void visitCallSPRIVImageQuerySize(CallInst *CI);
/// Transform __spirv_Atomic* to atomic_*.
/// __spirv_Atomic*(atomic_op, scope, sema, ops, ...) =>
/// atomic_*(atomic_op, ops, ..., order(sema), map(scope))
void visitCallSPIRVAtomicBuiltin(CallInst *CI, Op OC);
/// Transform __spirv_Group* to {work_group|sub_group}_*.
///
/// Special handling of work_group_broadcast.
/// __spirv_GroupBroadcast(a, vec3(x, y, z))
/// =>
/// work_group_broadcast(a, x, y, z)
///
/// Transform OpenCL group builtin function names from group_
/// to workgroup_ and sub_group_.
/// Insert group operation part: reduce_/inclusive_scan_/exclusive_scan_
/// Transform the operation part:
/// fadd/iadd/sadd => add
/// fmax/smax => max
/// fmin/smin => min
/// Keep umax/umin unchanged.
void visitCallSPIRVGroupBuiltin(CallInst *CI, Op OC);
/// Transform __spirv_MemoryBarrier to atomic_work_item_fence.
/// __spirv_MemoryBarrier(scope, sema) =>
/// atomic_work_item_fence(flag(sema), order(sema), map(scope))
void visitCallSPIRVMemoryBarrier(CallInst *CI);
/// Transform __spirv_{PipeOpName} to OCL pipe builtin functions.
void visitCallSPIRVPipeBuiltin(CallInst *CI, Op OC);
/// Transform __spirv_* builtins to OCL 2.0 builtins.
/// No change with arguments.
void visitCallSPIRVBuiltin(CallInst *CI, Op OC);
/// Translate mangled atomic type name: "atomic_" =>
/// MangledAtomicTypeNamePrefix
void translateMangledAtomicTypeName();
/// Get prefix work_/sub_ for OCL group builtin functions.
/// Assuming the first argument of \param CI is a constant integer for
/// workgroup/subgroup scope enums.
std::string getGroupBuiltinPrefix(CallInst *CI);
static char ID;
private:
Module *M;
LLVMContext *Ctx;
};
char SPIRVToOCL20::ID = 0;
bool
SPIRVToOCL20::runOnModule(Module& Module) {
M = &Module;
Ctx = &M->getContext();
visit(*M);
translateMangledAtomicTypeName();
eraseUselessFunctions(&Module);
DEBUG(dbgs() << "After SPIRVToOCL20:\n" << *M);
std::string Err;
raw_string_ostream ErrorOS(Err);
if (verifyModule(*M, &ErrorOS)){
DEBUG(errs() << "Fails to verify module: " << ErrorOS.str());
}
return true;
}
void
SPIRVToOCL20::visitCallInst(CallInst& CI) {
DEBUG(dbgs() << "[visistCallInst] " << CI << '\n');
auto F = CI.getCalledFunction();
if (!F)
return;
auto MangledName = F->getName();
std::string DemangledName;
Op OC = OpNop;
if (!oclIsBuiltin(MangledName, &DemangledName) ||
(OC = getSPIRVFuncOC(DemangledName)) == OpNop)
return;
DEBUG(dbgs() << "DemangledName = " << DemangledName.c_str() << '\n'
<< "OpCode = " << OC << '\n');
if (OC == OpImageQuerySize || OC == OpImageQuerySizeLod) {
visitCallSPRIVImageQuerySize(&CI);
return;
}
if (OC == OpMemoryBarrier) {
visitCallSPIRVMemoryBarrier(&CI);
return;
}
if (isAtomicOpCode(OC)) {
visitCallSPIRVAtomicBuiltin(&CI, OC);
return;
}
if (isGroupOpCode(OC)) {
visitCallSPIRVGroupBuiltin(&CI, OC);
return;
}
if (isPipeOpCode(OC)) {
visitCallSPIRVPipeBuiltin(&CI, OC);
return;
}
if (OCLSPIRVBuiltinMap::rfind(OC))
visitCallSPIRVBuiltin(&CI, OC);
}
void SPIRVToOCL20::visitCallSPIRVMemoryBarrier(CallInst* CI) {
AttributeSet Attrs = CI->getCalledFunction()->getAttributes();
mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args){
auto getArg = [=](unsigned I){
return cast<ConstantInt>(Args[I])->getZExtValue();
};
auto MScope = static_cast<Scope>(getArg(0));
auto Sema = mapSPIRVMemSemanticToOCL(getArg(1));
Args.resize(3);
Args[0] = getInt32(M, Sema.first);
Args[1] = getInt32(M, Sema.second);
Args[2] = getInt32(M, rmap<OCLScopeKind>(MScope));
return kOCLBuiltinName::AtomicWorkItemFence;
}, &Attrs);
}
void SPIRVToOCL20::visitCallSPRIVImageQuerySize(CallInst *CI) {
Function * func = CI->getCalledFunction();
// Get image type
Type * argTy = func->getFunctionType()->getParamType(0);
assert(argTy->isPointerTy() && "argument must be a pointer to opaque structure");
StructType * imgTy = cast<StructType>(argTy->getPointerElementType());
assert(imgTy->isOpaque() && "image type must be an opaque structure");
StringRef imgTyName = imgTy->getName();
assert(imgTyName.startswith("opencl.image") && "not an OCL image type");
unsigned imgDim = 0;
bool imgArray = false;
if (imgTyName.startswith("opencl.image1d")) {
imgDim = 1;
} else if (imgTyName.startswith("opencl.image2d")) {
imgDim = 2;
} else if (imgTyName.startswith("opencl.image3d")) {
imgDim = 3;
}
assert(imgDim != 0 && "unexpected image dimensionality");
if (imgTyName.count("_array_") != 0) {
imgArray = true;
}
AttributeSet attributes = CI->getCalledFunction()->getAttributes();
BuiltinFuncMangleInfo mangle;
Type * int32Ty = Type::getInt32Ty(*Ctx);
Instruction * getImageSize = nullptr;
if (imgDim == 1) {
// OpImageQuerySize from non-arrayed 1d image is always translated
// into get_image_width returning scalar argument
getImageSize =
addCallInst(M, kOCLBuiltinName::GetImageWidth, int32Ty,
CI->getArgOperand(0), &attributes,
CI, &mangle, CI->getName(), false);
// The width of integer type returning by OpImageQuerySize[Lod] may
// differ from i32
if (CI->getType()->getScalarType() != int32Ty) {
getImageSize =
CastInst::CreateIntegerCast(getImageSize, CI->getType()->getScalarType(), false,
CI->getName(), CI);
}
} else {
assert((imgDim == 2 || imgDim == 3) && "invalid image type");
assert(CI->getType()->isVectorTy() && "this code can handle vector result type only");
// get_image_dim returns int2 and int4 for 2d and 3d images respecitvely.
const unsigned imgDimRetEls = imgDim == 2 ? 2 : 4;
VectorType * retTy = VectorType::get(int32Ty, imgDimRetEls);
getImageSize =
addCallInst(M, kOCLBuiltinName::GetImageDim, retTy,
CI->getArgOperand(0), &attributes,
CI, &mangle, CI->getName(), false);
// The width of integer type returning by OpImageQuerySize[Lod] may
// differ from i32
if (CI->getType()->getScalarType() != int32Ty) {
getImageSize =
CastInst::CreateIntegerCast(getImageSize,
VectorType::get(CI->getType()->getScalarType(),
getImageSize->getType()->getVectorNumElements()),
false, CI->getName(), CI);
}
}
if (imgArray || imgDim == 3) {
assert(CI->getType()->isVectorTy() &&
"OpImageQuerySize[Lod] must return vector for arrayed and 3d images");
const unsigned imgQuerySizeRetEls = CI->getType()->getVectorNumElements();
if (imgDim == 1) {
// get_image_width returns scalar result while OpImageQuerySize
// for image1d_array_t returns <2 x i32> vector.
assert(imgQuerySizeRetEls == 2 &&
"OpImageQuerySize[Lod] must return <2 x iN> vector type");
getImageSize =
InsertElementInst::Create(UndefValue::get(CI->getType()), getImageSize,
ConstantInt::get(int32Ty, 0), CI->getName(), CI);
} else {
// get_image_dim and OpImageQuerySize returns different vector
// types for arrayed and 3d images.
SmallVector<Constant*, 4> maskEls;
for(unsigned idx = 0; idx < imgQuerySizeRetEls; ++idx)
maskEls.push_back(ConstantInt::get(int32Ty, idx));
Constant * mask = ConstantVector::get(maskEls);
getImageSize =
new ShuffleVectorInst(getImageSize, UndefValue::get(getImageSize->getType()),
mask, CI->getName(), CI);
}
}
if (imgArray) {
assert((imgDim == 1 || imgDim == 2) && "invalid image array type");
// Insert get_image_array_size to the last position of the resulting vector.
Type * sizeTy = Type::getIntNTy(*Ctx, M->getDataLayout().getPointerSizeInBits(0));
Instruction * getImageArraySize =
addCallInst(M, kOCLBuiltinName::GetImageArraySize, sizeTy,
CI->getArgOperand(0), &attributes,
CI, &mangle, CI->getName(), false);
// The width of integer type returning by OpImageQuerySize[Lod] may
// differ from size_t which is returned by get_image_array_size
if (getImageArraySize->getType() != CI->getType()->getScalarType()) {
getImageArraySize =
CastInst::CreateIntegerCast(getImageArraySize, CI->getType()->getScalarType(),
false, CI->getName(), CI);
}
getImageSize =
InsertElementInst::Create(getImageSize, getImageArraySize,
ConstantInt::get(int32Ty,
CI->getType()->getVectorNumElements() - 1),
CI->getName(), CI);
}
assert(getImageSize && "must not be null");
CI->replaceAllUsesWith(getImageSize);
CI->eraseFromParent();
}
void SPIRVToOCL20::visitCallSPIRVAtomicBuiltin(CallInst* CI, Op OC) {
AttributeSet Attrs = CI->getCalledFunction()->getAttributes();
Instruction * pInsertBefore = CI;
mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args, Type *& RetTy){
auto Ptr = findFirstPtr(Args);
auto Name = OCLSPIRVBuiltinMap::rmap(OC);
auto NumOrder = getAtomicBuiltinNumMemoryOrderArgs(Name);
auto ScopeIdx = Ptr + 1;
auto OrderIdx = Ptr + 2;
if (OC == OpAtomicIIncrement ||
OC == OpAtomicIDecrement) {
// Since OpenCL 1.2 atomic_inc and atomic_dec builtins don't have, memory
// scope and memory order syntax, and OpenCL 2.0 doesn't have such
// builtins, therefore we translate these instructions to
// atomic_fetch_add_explicit and atomic_fetch_sub_explicit OpenCL 2.0
// builtins with "operand" argument = 1.
Name = OCLSPIRVBuiltinMap::rmap(OC == OpAtomicIIncrement ?
OpAtomicIAdd: OpAtomicISub);
Type* ValueTy = cast<PointerType>(Args[Ptr]->getType())->getElementType();
assert(ValueTy->isIntegerTy());
Args.push_back(llvm::ConstantInt::get(ValueTy, 1));
}
Args[ScopeIdx] = mapUInt(M, cast<ConstantInt>(Args[ScopeIdx]),
[](unsigned I) { return rmap<OCLScopeKind>(static_cast<Scope>(I));});
for (size_t I = 0; I < NumOrder; ++I)
Args[OrderIdx + I] = mapUInt(M, cast<ConstantInt>(Args[OrderIdx + I]),
[](unsigned Ord) { return mapSPIRVMemOrderToOCL(Ord); });
std::swap(Args[ScopeIdx], Args.back());
if(OC == OpAtomicCompareExchange ||
OC == OpAtomicCompareExchangeWeak) {
// OpAtomicCompareExchange[Weak] semantics is different from
// atomic_compare_exchange_[strong|weak] semantics as well as
// arguments order.
// OCL built-ins returns boolean value and stores a new/original
// value by pointer passed as 2nd argument (aka expected) while SPIR-V
// instructions returns this new/original value as a resulting value.
AllocaInst *pExpected = new AllocaInst(CI->getType(), "expected",
static_cast<Instruction*>(pInsertBefore->getParent()->getParent()->getEntryBlock().getFirstInsertionPt()));
pExpected->setAlignment(CI->getType()->getScalarSizeInBits() / 8);
new StoreInst(Args[1], pExpected, pInsertBefore);
Args[1] = pExpected;
std::swap(Args[3], Args[4]);
std::swap(Args[2], Args[3]);
RetTy = Type::getInt1Ty(*Ctx);
}
return Name;
},
[=](CallInst * CI) -> Instruction * {
if(OC == OpAtomicCompareExchange ||
OC == OpAtomicCompareExchangeWeak) {
// OCL built-ins atomic_compare_exchange_[strong|weak] return boolean value. So,
// to obtain the same value as SPIR-V instruction is returning it has to be loaded
// from the memory where 'expected' value is stored. This memory must contain the
// needed value after a call to OCL built-in is completed.
LoadInst * pOriginal = new LoadInst(CI->getArgOperand(1), "original", pInsertBefore);
return pOriginal;
}
// For other built-ins the return values match.
return CI;
},
&Attrs);
}
void SPIRVToOCL20::visitCallSPIRVBuiltin(CallInst* CI, Op OC) {
AttributeSet Attrs = CI->getCalledFunction()->getAttributes();
mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args){
return OCLSPIRVBuiltinMap::rmap(OC);
}, &Attrs);
}
void SPIRVToOCL20::visitCallSPIRVGroupBuiltin(CallInst* CI, Op OC) {
auto DemangledName = OCLSPIRVBuiltinMap::rmap(OC);
assert(DemangledName.find(kSPIRVName::GroupPrefix) == 0);
std::string Prefix = getGroupBuiltinPrefix(CI);
bool HasGroupOperation = hasGroupOperation(OC);
if (!HasGroupOperation) {
DemangledName = Prefix + DemangledName;
} else {
auto GO = getArgAs<spv::GroupOperation>(CI, 1);
StringRef Op = DemangledName;
Op = Op.drop_front(strlen(kSPIRVName::GroupPrefix));
bool Unsigned = Op.front() == 'u';
if (!Unsigned)
Op = Op.drop_front(1);
DemangledName = Prefix + kSPIRVName::GroupPrefix +
SPIRSPIRVGroupOperationMap::rmap(GO) + '_' + Op.str();
}
AttributeSet Attrs = CI->getCalledFunction()->getAttributes();
mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args){
Args.erase(Args.begin(), Args.begin() + (HasGroupOperation ? 2 : 1));
if (OC == OpGroupBroadcast)
expandVector(CI, Args, 1);
return DemangledName;
}, &Attrs);
}
void SPIRVToOCL20::visitCallSPIRVPipeBuiltin(CallInst* CI, Op OC) {
switch(OC) {
case OpReservedReadPipe:
OC = OpReadPipe;
break;
case OpReservedWritePipe:
OC = OpWritePipe;
break;
default:
// Do nothing.
break;
}
auto DemangledName = OCLSPIRVBuiltinMap::rmap(OC);
bool HasScope = DemangledName.find(kSPIRVName::GroupPrefix) == 0;
if (HasScope)
DemangledName = getGroupBuiltinPrefix(CI) + DemangledName;
AttributeSet Attrs = CI->getCalledFunction()->getAttributes();
mutateCallInstOCL(M, CI, [=](CallInst *, std::vector<Value *> &Args){
if (HasScope)
Args.erase(Args.begin(), Args.begin() + 1);
if (!(OC == OpReadPipe ||
OC == OpWritePipe ||
OC == OpReservedReadPipe ||
OC == OpReservedWritePipe))
return DemangledName;
auto &P = Args[Args.size() - 3];
auto T = P->getType();
assert(isa<PointerType>(T));
auto ET = T->getPointerElementType();
if (!ET->isIntegerTy(8) ||
T->getPointerAddressSpace() != SPIRAS_Generic) {
auto NewTy = PointerType::getInt8PtrTy(*Ctx, SPIRAS_Generic);
P = CastInst::CreatePointerBitCastOrAddrSpaceCast(P, NewTy, "", CI);
}
return DemangledName;
}, &Attrs);
}
void SPIRVToOCL20::translateMangledAtomicTypeName() {
for (auto &I:M->functions()) {
if (!I.hasName())
continue;
std::string MangledName = I.getName();
std::string DemangledName;
if (!oclIsBuiltin(MangledName, &DemangledName) ||
DemangledName.find(kOCLBuiltinName::AtomPrefix) != 0)
continue;
auto Loc = MangledName.find(kOCLBuiltinName::AtomPrefix);
Loc = MangledName.find(kMangledName::AtomicPrefixInternal, Loc);
MangledName.replace(Loc, strlen(kMangledName::AtomicPrefixInternal),
MangledAtomicTypeNamePrefix);
I.setName(MangledName);
}
}
std::string
SPIRVToOCL20::getGroupBuiltinPrefix(CallInst* CI) {
std::string Prefix;
auto ES = getArgAsScope(CI, 0);
switch(ES) {
case ScopeWorkgroup:
Prefix = kOCLBuiltinName::WorkPrefix;
break;
case ScopeSubgroup:
Prefix = kOCLBuiltinName::SubPrefix;
break;
default:
llvm_unreachable("Invalid execution scope");
}
return Prefix;
}
void SPIRVToOCL20::visitCastInst(CastInst &Cast) {
if(!isa<ZExtInst>(Cast) && !isa<SExtInst>(Cast) &&
!isa<TruncInst>(Cast) && !isa<FPTruncInst>(Cast) &&
!isa<FPExtInst>(Cast) && !isa<FPToUIInst>(Cast) &&
!isa<FPToSIInst>(Cast) && !isa<UIToFPInst>(Cast) &&
!isa<SIToFPInst>(Cast))
return;
Type const* srcTy = Cast.getSrcTy();
Type * dstVecTy = Cast.getDestTy();
// Leave scalar casts as is. Skip boolean vector casts becase there
// are no suitable OCL built-ins.
if(!dstVecTy->isVectorTy() ||
srcTy->getScalarSizeInBits() == 1 ||
dstVecTy->getScalarSizeInBits() == 1)
return;
// Assemble built-in name -> convert_gentypeN
std::string castBuiltInName(kOCLBuiltinName::ConvertPrefix);
// Check if this is 'floating point -> unsigned integer' cast
castBuiltInName +=
mapLLVMTypeToOCLType(dstVecTy, !isa<FPToUIInst>(Cast));
// Replace LLVM conversion instruction with call to conversion built-in
BuiltinFuncMangleInfo mangle;
// It does matter if the source is unsigned integer or not. SExt is for
// signed source, ZExt and UIToFPInst are for unsigned source.
if(isa<ZExtInst>(Cast) || isa<UIToFPInst>(Cast))
mangle.addUnsignedArg(0);
AttributeSet attributes;
CallInst *call = addCallInst(M, castBuiltInName, dstVecTy, Cast.getOperand(0),
&attributes, &Cast, &mangle, Cast.getName(), false);
Cast.replaceAllUsesWith(call);
Cast.eraseFromParent();
}
} // namespace SPIRV
INITIALIZE_PASS(SPIRVToOCL20, "spvtoocl20",
"Translate SPIR-V builtins to OCL 2.0 builtins", false, false)
ModulePass *llvm::createSPIRVToOCL20() {
return new SPIRVToOCL20();
}