/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "code_generator_x86_64.h" #include "mirror/array-inl.h" namespace art { namespace x86_64 { // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy. #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; case Primitive::kPrimFloat: case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = instruction->GetLocations(); XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); __ punpcklbw(reg, reg); __ punpcklwd(reg, reg); __ pshufd(reg, reg, Immediate(0)); break; case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); __ punpcklwd(reg, reg); __ pshufd(reg, reg, Immediate(0)); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); __ pshufd(reg, reg, Immediate(0)); break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit __ punpcklqdq(reg, reg); break; case Primitive::kPrimFloat: DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(4u, instruction->GetVectorLength()); __ shufps(reg, reg, Immediate(0)); break; case Primitive::kPrimDouble: DCHECK(locations->InAt(0).Equals(locations->Out())); DCHECK_EQ(2u, instruction->GetVectorLength()); __ shufpd(reg, reg, Immediate(0)); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } // Helper to set up locations for vector unary operations. static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { LocationSummary* locations = new (arena) LocationSummary(instruction); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { LocationSummary* locations = instruction->GetLocations(); XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); Primitive::Type from = instruction->GetInputType(); Primitive::Type to = instruction->GetResultType(); if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ cvtdq2ps(dst, src); } else { LOG(FATAL) << "Unsupported SIMD type"; } } void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { LocationSummary* locations = instruction->GetLocations(); XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubb(dst, src); break; case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubw(dst, src); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubd(dst, src); break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubq(dst, src); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ xorps(dst, dst); __ subps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ xorpd(dst, dst); __ subpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); if (instruction->GetPackedType() == Primitive::kPrimInt) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) { LocationSummary* locations = instruction->GetLocations(); XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimInt: { DCHECK_EQ(4u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movaps(dst, src); __ pxor(tmp, tmp); __ pcmpgtd(tmp, dst); __ pxor(dst, tmp); __ psubd(dst, tmp); break; } case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ psrld(dst, Immediate(1)); __ andps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ psrlq(dst, Immediate(1)); __ andpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) { CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); // Boolean-not requires a temporary to construct the 16 x one. if (instruction->GetPackedType() == Primitive::kPrimBoolean) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { LocationSummary* locations = instruction->GetLocations(); XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: { // special case boolean-not DCHECK_EQ(16u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ pxor(dst, dst); __ pcmpeqb(tmp, tmp); // all ones __ psubb(dst, tmp); // 16 x one __ pxor(dst, src); break; } case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pcmpeqb(dst, dst); // all ones __ pxor(dst, src); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ xorps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ xorpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } // Helper to set up locations for vector binary operations. static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { LocationSummary* locations = new (arena) LocationSummary(instruction); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); __ paddb(dst, src); break; case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); __ paddw(dst, src); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); __ paddd(dst, src); break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); __ paddq(dst, src); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ addps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ addpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); __ pavgb(dst, src); return; case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pavgw(dst, src); return; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimByte: DCHECK_EQ(16u, instruction->GetVectorLength()); __ psubb(dst, src); break; case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psubw(dst, src); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); __ psubd(dst, src); break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); __ psubq(dst, src); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ subps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ subpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pmullw(dst, src); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pmulld(dst, src); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ mulps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ mulpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ divps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ divpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { LOG(FATAL) << "No SIMD for " << instruction->GetId(); } void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pand(dst, src); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ andps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ andpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pandn(dst, src); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ andnps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ andnpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ por(dst, src); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ orps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ orpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) { CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pxor(dst, src); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); __ xorps(dst, src); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); __ xorpd(dst, src); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } // Helper to set up locations for vector shift operations. static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { LocationSummary* locations = new (arena) LocationSummary(instruction); switch (instruction->GetPackedType()) { case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); locations->SetOut(Location::SameAsFirstInput()); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) { CreateVecShiftLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psllw(dst, Immediate(static_cast<int8_t>(value))); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pslld(dst, Immediate(static_cast<int8_t>(value))); break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); __ psllq(dst, Immediate(static_cast<int8_t>(value))); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) { CreateVecShiftLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psraw(dst, Immediate(static_cast<int8_t>(value))); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); __ psrad(dst, Immediate(static_cast<int8_t>(value))); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) { CreateVecShiftLocations(GetGraph()->GetArena(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { LocationSummary* locations = instruction->GetLocations(); DCHECK(locations->InAt(0).Equals(locations->Out())); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { case Primitive::kPrimChar: case Primitive::kPrimShort: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psrlw(dst, Immediate(static_cast<int8_t>(value))); break; case Primitive::kPrimInt: DCHECK_EQ(4u, instruction->GetVectorLength()); __ psrld(dst, Immediate(static_cast<int8_t>(value))); break; case Primitive::kPrimLong: DCHECK_EQ(2u, instruction->GetVectorLength()); __ psrlq(dst, Immediate(static_cast<int8_t>(value))); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { LOG(FATAL) << "No SIMD for " << instr->GetId(); } void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { LOG(FATAL) << "No SIMD for " << instr->GetId(); } // Helper to set up locations for vector memory operations. static void CreateVecMemLocations(ArenaAllocator* arena, HVecMemoryOperation* instruction, bool is_load) { LocationSummary* locations = new (arena) LocationSummary(instruction); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: case Primitive::kPrimFloat: case Primitive::kPrimDouble: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (is_load) { locations->SetOut(Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RequiresFpuRegister()); } break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } // Helper to set up registers and address for vector memory operations. static Address CreateVecMemRegisters(HVecMemoryOperation* instruction, Location* reg_loc, bool is_load) { LocationSummary* locations = instruction->GetLocations(); Location base = locations->InAt(0); Location index = locations->InAt(1); *reg_loc = is_load ? locations->Out() : locations->InAt(2); size_t size = Primitive::ComponentSize(instruction->GetPackedType()); uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); ScaleFactor scale = TIMES_1; switch (size) { case 2: scale = TIMES_2; break; case 4: scale = TIMES_4; break; case 8: scale = TIMES_8; break; default: break; } return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset); } void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); } void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { Location reg_loc = Location::NoLocation(); Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ true); XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) { CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); } void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { Location reg_loc = Location::NoLocation(); Address address = CreateVecMemRegisters(instruction, ®_loc, /*is_load*/ false); XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { case Primitive::kPrimBoolean: case Primitive::kPrimByte: case Primitive::kPrimChar: case Primitive::kPrimShort: case Primitive::kPrimInt: case Primitive::kPrimLong: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); break; case Primitive::kPrimFloat: DCHECK_EQ(4u, instruction->GetVectorLength()); is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); break; case Primitive::kPrimDouble: DCHECK_EQ(2u, instruction->GetVectorLength()); is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); break; default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); } } #undef __ } // namespace x86_64 } // namespace art