/*
* Copyright (C) 2011 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "codegen_arm64.h"
#include "arm64_lir.h"
#include "base/logging.h"
#include "dex/quick/mir_to_lir-inl.h"
#include "dex/reg_storage_eq.h"
namespace art {
/* This file contains codegen for the A64 ISA. */
int32_t Arm64Mir2Lir::EncodeImmSingle(uint32_t bits) {
/*
* Valid values will have the form:
*
* aBbb.bbbc.defg.h000.0000.0000.0000.0000
*
* where B = not(b). In other words, if b == 1, then B == 0 and viceversa.
*/
// bits[19..0] are cleared.
if ((bits & 0x0007ffff) != 0)
return -1;
// bits[29..25] are all set or all cleared.
uint32_t b_pattern = (bits >> 16) & 0x3e00;
if (b_pattern != 0 && b_pattern != 0x3e00)
return -1;
// bit[30] and bit[29] are opposite.
if (((bits ^ (bits << 1)) & 0x40000000) == 0)
return -1;
// bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
// bit7: a000.0000
uint32_t bit7 = ((bits >> 31) & 0x1) << 7;
// bit6: 0b00.0000
uint32_t bit6 = ((bits >> 29) & 0x1) << 6;
// bit5_to_0: 00cd.efgh
uint32_t bit5_to_0 = (bits >> 19) & 0x3f;
return (bit7 | bit6 | bit5_to_0);
}
int32_t Arm64Mir2Lir::EncodeImmDouble(uint64_t bits) {
/*
* Valid values will have the form:
*
* aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
* 0000.0000.0000.0000.0000.0000.0000.0000
*
* where B = not(b).
*/
// bits[47..0] are cleared.
if ((bits & UINT64_C(0xffffffffffff)) != 0)
return -1;
// bits[61..54] are all set or all cleared.
uint32_t b_pattern = (bits >> 48) & 0x3fc0;
if (b_pattern != 0 && b_pattern != 0x3fc0)
return -1;
// bit[62] and bit[61] are opposite.
if (((bits ^ (bits << 1)) & UINT64_C(0x4000000000000000)) == 0)
return -1;
// bit7: a000.0000
uint32_t bit7 = ((bits >> 63) & 0x1) << 7;
// bit6: 0b00.0000
uint32_t bit6 = ((bits >> 61) & 0x1) << 6;
// bit5_to_0: 00cd.efgh
uint32_t bit5_to_0 = (bits >> 48) & 0x3f;
return (bit7 | bit6 | bit5_to_0);
}
size_t Arm64Mir2Lir::GetLoadStoreSize(LIR* lir) {
bool opcode_is_wide = IS_WIDE(lir->opcode);
A64Opcode opcode = UNWIDE(lir->opcode);
DCHECK(!IsPseudoLirOp(opcode));
const A64EncodingMap *encoder = &EncodingMap[opcode];
uint32_t bits = opcode_is_wide ? encoder->xskeleton : encoder->wskeleton;
return (bits >> 30);
}
size_t Arm64Mir2Lir::GetInstructionOffset(LIR* lir) {
size_t offset = lir->operands[2];
uint64_t check_flags = GetTargetInstFlags(lir->opcode);
DCHECK((check_flags & IS_LOAD) || (check_flags & IS_STORE));
if (check_flags & SCALED_OFFSET_X0) {
DCHECK(check_flags & IS_TERTIARY_OP);
offset = offset * (1 << GetLoadStoreSize(lir));
}
return offset;
}
LIR* Arm64Mir2Lir::LoadFPConstantValue(RegStorage r_dest, int32_t value) {
DCHECK(r_dest.IsSingle());
if (value == 0) {
return NewLIR2(kA64Fmov2sw, r_dest.GetReg(), rwzr);
} else {
int32_t encoded_imm = EncodeImmSingle((uint32_t)value);
if (encoded_imm >= 0) {
return NewLIR2(kA64Fmov2fI, r_dest.GetReg(), encoded_imm);
}
}
LIR* data_target = ScanLiteralPool(literal_list_, value, 0);
if (data_target == nullptr) {
// Wide, as we need 8B alignment.
data_target = AddWideData(&literal_list_, value, 0);
}
ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
LIR* load_pc_rel = RawLIR(current_dalvik_offset_, kA64Ldr2fp,
r_dest.GetReg(), 0, 0, 0, 0, data_target);
AppendLIR(load_pc_rel);
return load_pc_rel;
}
LIR* Arm64Mir2Lir::LoadFPConstantValueWide(RegStorage r_dest, int64_t value) {
DCHECK(r_dest.IsDouble());
if (value == 0) {
return NewLIR2(kA64Fmov2Sx, r_dest.GetReg(), rxzr);
} else {
int32_t encoded_imm = EncodeImmDouble(value);
if (encoded_imm >= 0) {
return NewLIR2(WIDE(kA64Fmov2fI), r_dest.GetReg(), encoded_imm);
}
}
// No short form - load from the literal pool.
int32_t val_lo = Low32Bits(value);
int32_t val_hi = High32Bits(value);
LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
if (data_target == nullptr) {
data_target = AddWideData(&literal_list_, val_lo, val_hi);
}
ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
LIR* load_pc_rel = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2fp),
r_dest.GetReg(), 0, 0, 0, 0, data_target);
AppendLIR(load_pc_rel);
return load_pc_rel;
}
static int CountLeadingZeros(bool is_wide, uint64_t value) {
return (is_wide) ? __builtin_clzll(value) : __builtin_clz((uint32_t)value);
}
static int CountTrailingZeros(bool is_wide, uint64_t value) {
return (is_wide) ? __builtin_ctzll(value) : __builtin_ctz((uint32_t)value);
}
static int CountSetBits(bool is_wide, uint64_t value) {
return ((is_wide) ?
__builtin_popcountll(value) : __builtin_popcount((uint32_t)value));
}
/**
* @brief Try encoding an immediate in the form required by logical instructions.
*
* @param is_wide Whether @p value is a 64-bit (as opposed to 32-bit) value.
* @param value An integer to be encoded. This is interpreted as 64-bit if @p is_wide is true and as
* 32-bit if @p is_wide is false.
* @return A non-negative integer containing the encoded immediate or -1 if the encoding failed.
* @note This is the inverse of Arm64Mir2Lir::DecodeLogicalImmediate().
*/
int Arm64Mir2Lir::EncodeLogicalImmediate(bool is_wide, uint64_t value) {
unsigned n, imm_s, imm_r;
// Logical immediates are encoded using parameters n, imm_s and imm_r using
// the following table:
//
// N imms immr size S R
// 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
// 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
// 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
// 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
// 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
// 0 11110s xxxxxr 2 UInt(s) UInt(r)
// (s bits must not be all set)
//
// A pattern is constructed of size bits, where the least significant S+1
// bits are set. The pattern is rotated right by R, and repeated across a
// 32 or 64-bit value, depending on destination register width.
//
// To test if an arbitary immediate can be encoded using this scheme, an
// iterative algorithm is used.
//
// 1. If the value has all set or all clear bits, it can't be encoded.
if (value == 0 || value == ~UINT64_C(0) ||
(!is_wide && (uint32_t)value == ~UINT32_C(0))) {
return -1;
}
unsigned lead_zero = CountLeadingZeros(is_wide, value);
unsigned lead_one = CountLeadingZeros(is_wide, ~value);
unsigned trail_zero = CountTrailingZeros(is_wide, value);
unsigned trail_one = CountTrailingZeros(is_wide, ~value);
unsigned set_bits = CountSetBits(is_wide, value);
// The fixed bits in the immediate s field.
// If width == 64 (X reg), start at 0xFFFFFF80.
// If width == 32 (W reg), start at 0xFFFFFFC0, as the iteration for 64-bit
// widths won't be executed.
unsigned width = (is_wide) ? 64 : 32;
int imm_s_fixed = (is_wide) ? -128 : -64;
int imm_s_mask = 0x3f;
for (;;) {
// 2. If the value is two bits wide, it can be encoded.
if (width == 2) {
n = 0;
imm_s = 0x3C;
imm_r = (value & 3) - 1;
break;
}
n = (width == 64) ? 1 : 0;
imm_s = ((imm_s_fixed | (set_bits - 1)) & imm_s_mask);
if ((lead_zero + set_bits) == width) {
imm_r = 0;
} else {
imm_r = (lead_zero > 0) ? (width - trail_zero) : lead_one;
}
// 3. If the sum of leading zeros, trailing zeros and set bits is
// equal to the bit width of the value, it can be encoded.
if (lead_zero + trail_zero + set_bits == width) {
break;
}
// 4. If the sum of leading ones, trailing ones and unset bits in the
// value is equal to the bit width of the value, it can be encoded.
if (lead_one + trail_one + (width - set_bits) == width) {
break;
}
// 5. If the most-significant half of the bitwise value is equal to
// the least-significant half, return to step 2 using the
// least-significant half of the value.
uint64_t mask = (UINT64_C(1) << (width >> 1)) - 1;
if ((value & mask) == ((value >> (width >> 1)) & mask)) {
width >>= 1;
set_bits >>= 1;
imm_s_fixed >>= 1;
continue;
}
// 6. Otherwise, the value can't be encoded.
return -1;
}
return (n << 12 | imm_r << 6 | imm_s);
}
// Maximum number of instructions to use for encoding the immediate.
static const int max_num_ops_per_const_load = 2;
/**
* @brief Return the number of fast halfwords in the given uint64_t integer.
* @details The input integer is split into 4 halfwords (bits 0-15, 16-31, 32-47, 48-63). The
* number of fast halfwords (halfwords that are either 0 or 0xffff) is returned. See below for
* a more accurate description.
* @param value The input 64-bit integer.
* @return Return @c retval such that (retval & 0x7) is the maximum between n and m, where n is
* the number of halfwords with all bits unset (0) and m is the number of halfwords with all bits
* set (0xffff). Additionally (retval & 0x8) is set when m > n.
*/
static int GetNumFastHalfWords(uint64_t value) {
unsigned int num_0000_halfwords = 0;
unsigned int num_ffff_halfwords = 0;
for (int shift = 0; shift < 64; shift += 16) {
uint16_t halfword = static_cast<uint16_t>(value >> shift);
if (halfword == 0)
num_0000_halfwords++;
else if (halfword == UINT16_C(0xffff))
num_ffff_halfwords++;
}
if (num_0000_halfwords >= num_ffff_halfwords) {
DCHECK_LE(num_0000_halfwords, 4U);
return num_0000_halfwords;
} else {
DCHECK_LE(num_ffff_halfwords, 4U);
return num_ffff_halfwords | 0x8;
}
}
// The InexpensiveConstantXXX variants below are used in the promotion algorithm to determine how a
// constant is considered for promotion. If the constant is "inexpensive" then the promotion
// algorithm will give it a low priority for promotion, even when it is referenced many times in
// the code.
bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value ATTRIBUTE_UNUSED) {
// A 32-bit int can always be loaded with 2 instructions (and without using the literal pool).
// We therefore return true and give it a low priority for promotion.
return true;
}
bool Arm64Mir2Lir::InexpensiveConstantFloat(int32_t value) {
return EncodeImmSingle(value) >= 0;
}
bool Arm64Mir2Lir::InexpensiveConstantLong(int64_t value) {
int num_slow_halfwords = 4 - (GetNumFastHalfWords(value) & 0x7);
if (num_slow_halfwords <= max_num_ops_per_const_load) {
return true;
}
return (EncodeLogicalImmediate(/*is_wide=*/true, value) >= 0);
}
bool Arm64Mir2Lir::InexpensiveConstantDouble(int64_t value) {
return EncodeImmDouble(value) >= 0;
}
// The InexpensiveConstantXXX variants below are used to determine which A64 instructions to use
// when one of the operands is an immediate (e.g. register version or immediate version of add).
bool Arm64Mir2Lir::InexpensiveConstantInt(int32_t value, Instruction::Code opcode) {
switch (opcode) {
case Instruction::IF_EQ:
case Instruction::IF_NE:
case Instruction::IF_LT:
case Instruction::IF_GE:
case Instruction::IF_GT:
case Instruction::IF_LE:
case Instruction::ADD_INT:
case Instruction::ADD_INT_2ADDR:
case Instruction::SUB_INT:
case Instruction::SUB_INT_2ADDR:
// The code below is consistent with the implementation of OpRegRegImm().
{
uint32_t abs_value = (value == INT_MIN) ? value : std::abs(value);
if (abs_value < 0x1000) {
return true;
} else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
return true;
}
return false;
}
case Instruction::SHL_INT:
case Instruction::SHL_INT_2ADDR:
case Instruction::SHR_INT:
case Instruction::SHR_INT_2ADDR:
case Instruction::USHR_INT:
case Instruction::USHR_INT_2ADDR:
return true;
case Instruction::AND_INT:
case Instruction::AND_INT_2ADDR:
case Instruction::AND_INT_LIT16:
case Instruction::AND_INT_LIT8:
case Instruction::OR_INT:
case Instruction::OR_INT_2ADDR:
case Instruction::OR_INT_LIT16:
case Instruction::OR_INT_LIT8:
case Instruction::XOR_INT:
case Instruction::XOR_INT_2ADDR:
case Instruction::XOR_INT_LIT16:
case Instruction::XOR_INT_LIT8:
if (value == 0 || value == INT32_C(-1)) {
return true;
}
return (EncodeLogicalImmediate(/*is_wide=*/false, value) >= 0);
default:
return false;
}
}
/*
* Load a immediate using one single instruction when possible; otherwise
* use a pair of movz and movk instructions.
*
* No additional register clobbering operation performed. Use this version when
* 1) r_dest is freshly returned from AllocTemp or
* 2) The codegen is under fixed register usage
*/
LIR* Arm64Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
LIR* res;
if (r_dest.IsFloat()) {
return LoadFPConstantValue(r_dest, value);
}
if (r_dest.Is64Bit()) {
return LoadConstantWide(r_dest, value);
}
// Loading SP/ZR with an immediate is not supported.
DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
// Compute how many movk, movz instructions are needed to load the value.
uint16_t high_bits = High16Bits(value);
uint16_t low_bits = Low16Bits(value);
bool low_fast = ((uint16_t)(low_bits + 1) <= 1);
bool high_fast = ((uint16_t)(high_bits + 1) <= 1);
if (LIKELY(low_fast || high_fast)) {
// 1 instruction is enough to load the immediate.
if (LIKELY(low_bits == high_bits)) {
// Value is either 0 or -1: we can just use wzr.
A64Opcode opcode = LIKELY(low_bits == 0) ? kA64Mov2rr : kA64Mvn2rr;
res = NewLIR2(opcode, r_dest.GetReg(), rwzr);
} else {
uint16_t uniform_bits, useful_bits;
int shift;
if (LIKELY(high_fast)) {
shift = 0;
uniform_bits = high_bits;
useful_bits = low_bits;
} else {
shift = 1;
uniform_bits = low_bits;
useful_bits = high_bits;
}
if (UNLIKELY(uniform_bits != 0)) {
res = NewLIR3(kA64Movn3rdM, r_dest.GetReg(), ~useful_bits, shift);
} else {
res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), useful_bits, shift);
}
}
} else {
// movk, movz require 2 instructions. Try detecting logical immediates.
int log_imm = EncodeLogicalImmediate(/*is_wide=*/false, value);
if (log_imm >= 0) {
res = NewLIR3(kA64Orr3Rrl, r_dest.GetReg(), rwzr, log_imm);
} else {
// Use 2 instructions.
res = NewLIR3(kA64Movz3rdM, r_dest.GetReg(), low_bits, 0);
NewLIR3(kA64Movk3rdM, r_dest.GetReg(), high_bits, 1);
}
}
return res;
}
// TODO: clean up the names. LoadConstantWide() should really be LoadConstantNoClobberWide().
LIR* Arm64Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
if (r_dest.IsFloat()) {
return LoadFPConstantValueWide(r_dest, value);
}
DCHECK(r_dest.Is64Bit());
// Loading SP/ZR with an immediate is not supported.
DCHECK(!A64_REG_IS_SP(r_dest.GetReg()));
DCHECK(!A64_REG_IS_ZR(r_dest.GetReg()));
if (LIKELY(value == INT64_C(0) || value == INT64_C(-1))) {
// value is either 0 or -1: we can just use xzr.
A64Opcode opcode = LIKELY(value == 0) ? WIDE(kA64Mov2rr) : WIDE(kA64Mvn2rr);
return NewLIR2(opcode, r_dest.GetReg(), rxzr);
}
// At least one in value's halfwords is not 0x0, nor 0xffff: find out how many.
uint64_t uvalue = static_cast<uint64_t>(value);
int num_fast_halfwords = GetNumFastHalfWords(uvalue);
int num_slow_halfwords = 4 - (num_fast_halfwords & 0x7);
bool more_ffff_halfwords = (num_fast_halfwords & 0x8) != 0;
if (num_slow_halfwords > 1) {
// A single movz/movn is not enough. Try the logical immediate route.
int log_imm = EncodeLogicalImmediate(/*is_wide=*/true, value);
if (log_imm >= 0) {
return NewLIR3(WIDE(kA64Orr3Rrl), r_dest.GetReg(), rxzr, log_imm);
}
}
if (num_slow_halfwords <= max_num_ops_per_const_load) {
// We can encode the number using a movz/movn followed by one or more movk.
A64Opcode op;
uint16_t background;
LIR* res = nullptr;
// Decide whether to use a movz or a movn.
if (more_ffff_halfwords) {
op = WIDE(kA64Movn3rdM);
background = 0xffff;
} else {
op = WIDE(kA64Movz3rdM);
background = 0;
}
// Emit the first instruction (movz, movn).
int shift;
for (shift = 0; shift < 4; shift++) {
uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
if (halfword != background) {
res = NewLIR3(op, r_dest.GetReg(), halfword ^ background, shift);
break;
}
}
// Emit the movk instructions.
for (shift++; shift < 4; shift++) {
uint16_t halfword = static_cast<uint16_t>(uvalue >> (shift << 4));
if (halfword != background) {
NewLIR3(WIDE(kA64Movk3rdM), r_dest.GetReg(), halfword, shift);
}
}
return res;
}
// Use the literal pool.
int32_t val_lo = Low32Bits(value);
int32_t val_hi = High32Bits(value);
LIR* data_target = ScanLiteralPoolWide(literal_list_, val_lo, val_hi);
if (data_target == nullptr) {
data_target = AddWideData(&literal_list_, val_lo, val_hi);
}
ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
LIR *res = RawLIR(current_dalvik_offset_, WIDE(kA64Ldr2rp),
r_dest.GetReg(), 0, 0, 0, 0, data_target);
AppendLIR(res);
return res;
}
LIR* Arm64Mir2Lir::OpUnconditionalBranch(LIR* target) {
LIR* res = NewLIR1(kA64B1t, 0 /* offset to be patched during assembly */);
res->target = target;
return res;
}
LIR* Arm64Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
LIR* branch = NewLIR2(kA64B2ct, ArmConditionEncoding(cc),
0 /* offset to be patched */);
branch->target = target;
return branch;
}
LIR* Arm64Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
A64Opcode opcode = kA64Brk1d;
switch (op) {
case kOpBlx:
opcode = kA64Blr1x;
break;
default:
LOG(FATAL) << "Bad opcode " << op;
}
return NewLIR1(opcode, r_dest_src.GetReg());
}
LIR* Arm64Mir2Lir::OpRegRegShift(OpKind op, RegStorage r_dest_src1, RegStorage r_src2, int shift) {
A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
CHECK_EQ(r_dest_src1.Is64Bit(), r_src2.Is64Bit());
A64Opcode opcode = kA64Brk1d;
switch (op) {
case kOpCmn:
opcode = kA64Cmn3rro;
break;
case kOpCmp:
opcode = kA64Cmp3rro;
break;
case kOpMov:
opcode = kA64Mov2rr;
break;
case kOpMvn:
opcode = kA64Mvn2rr;
break;
case kOpNeg:
opcode = kA64Neg3rro;
break;
case kOpTst:
opcode = kA64Tst3rro;
break;
case kOpRev:
DCHECK_EQ(shift, 0);
// Binary, but rm is encoded twice.
return NewLIR2(kA64Rev2rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
case kOpRevsh:
// Binary, but rm is encoded twice.
NewLIR2(kA64Rev162rr | wide, r_dest_src1.GetReg(), r_src2.GetReg());
// "sxth r1, r2" is "sbfm r1, r2, #0, #15"
return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), 0, 15);
case kOp2Byte:
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
// "sbfx r1, r2, #imm1, #imm2" is "sbfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
// For now we use sbfm directly.
return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 7);
case kOp2Short:
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
// For now we use sbfm rather than its alias, sbfx.
return NewLIR4(kA64Sbfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
case kOp2Char:
// "ubfx r1, r2, #imm1, #imm2" is "ubfm r1, r2, #imm1, #(imm1 + imm2 - 1)".
// For now we use ubfm directly.
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
return NewLIR4(kA64Ubfm4rrdd | wide, r_dest_src1.GetReg(), r_src2.GetReg(), 0, 15);
default:
return OpRegRegRegShift(op, r_dest_src1, r_dest_src1, r_src2, shift);
}
DCHECK(!IsPseudoLirOp(opcode));
if (EncodingMap[opcode].flags & IS_BINARY_OP) {
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
return NewLIR2(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg());
} else if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
if (kind == kFmtShift) {
return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(), shift);
}
}
LOG(FATAL) << "Unexpected encoding operand count";
return nullptr;
}
LIR* Arm64Mir2Lir::OpRegRegExtend(OpKind op, RegStorage r_dest_src1, RegStorage r_src2,
A64RegExtEncodings ext, uint8_t amount) {
A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
A64Opcode opcode = kA64Brk1d;
switch (op) {
case kOpCmn:
opcode = kA64Cmn3Rre;
break;
case kOpCmp:
opcode = kA64Cmp3Rre;
break;
case kOpAdd:
// Note: intentional fallthrough
case kOpSub:
return OpRegRegRegExtend(op, r_dest_src1, r_dest_src1, r_src2, ext, amount);
default:
LOG(FATAL) << "Bad Opcode: " << opcode;
UNREACHABLE();
}
DCHECK(!IsPseudoLirOp(opcode));
if (EncodingMap[opcode].flags & IS_TERTIARY_OP) {
A64EncodingKind kind = EncodingMap[opcode].field_loc[2].kind;
if (kind == kFmtExtend) {
return NewLIR3(opcode | wide, r_dest_src1.GetReg(), r_src2.GetReg(),
EncodeExtend(ext, amount));
}
}
LOG(FATAL) << "Unexpected encoding operand count";
return nullptr;
}
LIR* Arm64Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
/* RegReg operations with SP in first parameter need extended register instruction form.
* Only CMN, CMP, ADD & SUB instructions are implemented.
*/
if (r_dest_src1 == rs_sp) {
return OpRegRegExtend(op, r_dest_src1, r_src2, kA64Uxtx, 0);
} else {
return OpRegRegShift(op, r_dest_src1, r_src2, ENCODE_NO_SHIFT);
}
}
LIR* Arm64Mir2Lir::OpMovRegMem(RegStorage r_dest, RegStorage r_base, int offset,
MoveType move_type) {
UNUSED(r_dest, r_base, offset, move_type);
UNIMPLEMENTED(FATAL);
UNREACHABLE();
}
LIR* Arm64Mir2Lir::OpMovMemReg(RegStorage r_base, int offset, RegStorage r_src,
MoveType move_type) {
UNUSED(r_base, offset, r_src, move_type);
UNIMPLEMENTED(FATAL);
return nullptr;
}
LIR* Arm64Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, RegStorage r_src) {
UNUSED(op, cc, r_dest, r_src);
LOG(FATAL) << "Unexpected use of OpCondRegReg for Arm64";
UNREACHABLE();
}
LIR* Arm64Mir2Lir::OpRegRegRegShift(OpKind op, RegStorage r_dest, RegStorage r_src1,
RegStorage r_src2, int shift) {
A64Opcode opcode = kA64Brk1d;
switch (op) {
case kOpAdd:
opcode = kA64Add4rrro;
break;
case kOpSub:
opcode = kA64Sub4rrro;
break;
// case kOpRsub:
// opcode = kA64RsubWWW;
// break;
case kOpAdc:
opcode = kA64Adc3rrr;
break;
case kOpAnd:
opcode = kA64And4rrro;
break;
case kOpXor:
opcode = kA64Eor4rrro;
break;
case kOpMul:
opcode = kA64Mul3rrr;
break;
case kOpDiv:
opcode = kA64Sdiv3rrr;
break;
case kOpOr:
opcode = kA64Orr4rrro;
break;
case kOpSbc:
opcode = kA64Sbc3rrr;
break;
case kOpLsl:
opcode = kA64Lsl3rrr;
break;
case kOpLsr:
opcode = kA64Lsr3rrr;
break;
case kOpAsr:
opcode = kA64Asr3rrr;
break;
case kOpRor:
opcode = kA64Ror3rrr;
break;
default:
LOG(FATAL) << "Bad opcode: " << op;
break;
}
// The instructions above belong to two kinds:
// - 4-operands instructions, where the last operand is a shift/extend immediate,
// - 3-operands instructions with no shift/extend.
A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
CHECK_EQ(r_dest.Is64Bit(), r_src1.Is64Bit());
CHECK_EQ(r_dest.Is64Bit(), r_src2.Is64Bit());
if (EncodingMap[opcode].flags & IS_QUAD_OP) {
DCHECK(!IsExtendEncoding(shift));
return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(), shift);
} else {
DCHECK(EncodingMap[opcode].flags & IS_TERTIARY_OP);
DCHECK_EQ(shift, ENCODE_NO_SHIFT);
return NewLIR3(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg());
}
}
LIR* Arm64Mir2Lir::OpRegRegRegExtend(OpKind op, RegStorage r_dest, RegStorage r_src1,
RegStorage r_src2, A64RegExtEncodings ext, uint8_t amount) {
A64Opcode opcode = kA64Brk1d;
switch (op) {
case kOpAdd:
opcode = kA64Add4RRre;
break;
case kOpSub:
opcode = kA64Sub4RRre;
break;
default:
UNIMPLEMENTED(FATAL) << "Unimplemented opcode: " << op;
UNREACHABLE();
}
A64Opcode widened_opcode = r_dest.Is64Bit() ? WIDE(opcode) : opcode;
if (r_dest.Is64Bit()) {
CHECK(r_src1.Is64Bit());
// dest determines whether the op is wide or not. Up-convert src2 when necessary.
// Note: this is not according to aarch64 specifications, but our encoding.
if (!r_src2.Is64Bit()) {
r_src2 = As64BitReg(r_src2);
}
} else {
CHECK(!r_src1.Is64Bit());
CHECK(!r_src2.Is64Bit());
}
// Sanity checks.
// 1) Amount is in the range 0..4
CHECK_LE(amount, 4);
return NewLIR4(widened_opcode, r_dest.GetReg(), r_src1.GetReg(), r_src2.GetReg(),
EncodeExtend(ext, amount));
}
LIR* Arm64Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1, RegStorage r_src2) {
return OpRegRegRegShift(op, r_dest, r_src1, r_src2, ENCODE_NO_SHIFT);
}
LIR* Arm64Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src1, int value) {
return OpRegRegImm64(op, r_dest, r_src1, static_cast<int64_t>(value));
}
LIR* Arm64Mir2Lir::OpRegRegImm64(OpKind op, RegStorage r_dest, RegStorage r_src1, int64_t value) {
LIR* res;
bool neg = (value < 0);
uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
A64Opcode opcode = kA64Brk1d;
A64Opcode alt_opcode = kA64Brk1d;
bool is_logical = false;
bool is_wide = r_dest.Is64Bit();
A64Opcode wide = (is_wide) ? WIDE(0) : UNWIDE(0);
int info = 0;
switch (op) {
case kOpLsl: {
// "lsl w1, w2, #imm" is an alias of "ubfm w1, w2, #(-imm MOD 32), #(31-imm)"
// and "lsl x1, x2, #imm" of "ubfm x1, x2, #(-imm MOD 64), #(63-imm)".
// For now, we just use ubfm directly.
int max_value = (is_wide) ? 63 : 31;
return NewLIR4(kA64Ubfm4rrdd | wide, r_dest.GetReg(), r_src1.GetReg(),
(-value) & max_value, max_value - value);
}
case kOpLsr:
return NewLIR3(kA64Lsr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
case kOpAsr:
return NewLIR3(kA64Asr3rrd | wide, r_dest.GetReg(), r_src1.GetReg(), value);
case kOpRor:
// "ror r1, r2, #imm" is an alias of "extr r1, r2, r2, #imm".
// For now, we just use extr directly.
return NewLIR4(kA64Extr4rrrd | wide, r_dest.GetReg(), r_src1.GetReg(), r_src1.GetReg(),
value);
case kOpAdd:
neg = !neg;
FALLTHROUGH_INTENDED;
case kOpSub:
// Add and sub below read/write sp rather than xzr.
if (abs_value < 0x1000) {
opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value, 0);
} else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
opcode = (neg) ? kA64Add4RRdT : kA64Sub4RRdT;
return NewLIR4(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), abs_value >> 12, 1);
} else {
alt_opcode = (op == kOpAdd) ? kA64Add4RRre : kA64Sub4RRre;
info = EncodeExtend(is_wide ? kA64Uxtx : kA64Uxtw, 0);
}
break;
case kOpAdc:
alt_opcode = kA64Adc3rrr;
break;
case kOpSbc:
alt_opcode = kA64Sbc3rrr;
break;
case kOpOr:
is_logical = true;
opcode = kA64Orr3Rrl;
alt_opcode = kA64Orr4rrro;
break;
case kOpAnd:
is_logical = true;
opcode = kA64And3Rrl;
alt_opcode = kA64And4rrro;
break;
case kOpXor:
is_logical = true;
opcode = kA64Eor3Rrl;
alt_opcode = kA64Eor4rrro;
break;
case kOpMul:
// TUNING: power of 2, shift & add
alt_opcode = kA64Mul3rrr;
break;
default:
LOG(FATAL) << "Bad opcode: " << op;
}
if (is_logical) {
int log_imm = EncodeLogicalImmediate(is_wide, value);
if (log_imm >= 0) {
return NewLIR3(opcode | wide, r_dest.GetReg(), r_src1.GetReg(), log_imm);
} else {
// When the immediate is either 0 or ~0, the logical operation can be trivially reduced
// to a - possibly negated - assignment.
if (value == 0) {
switch (op) {
case kOpOr:
case kOpXor:
// Or/Xor by zero reduces to an assignment.
return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
default:
// And by zero reduces to a `mov rdest, xzr'.
DCHECK(op == kOpAnd);
return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
}
} else if (value == INT64_C(-1)
|| (!is_wide && static_cast<uint32_t>(value) == ~UINT32_C(0))) {
switch (op) {
case kOpAnd:
// And by -1 reduces to an assignment.
return NewLIR2(kA64Mov2rr | wide, r_dest.GetReg(), r_src1.GetReg());
case kOpXor:
// Xor by -1 reduces to an `mvn rdest, rsrc'.
return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), r_src1.GetReg());
default:
// Or by -1 reduces to a `mvn rdest, xzr'.
DCHECK(op == kOpOr);
return NewLIR2(kA64Mvn2rr | wide, r_dest.GetReg(), (is_wide) ? rxzr : rwzr);
}
}
}
}
RegStorage r_scratch;
if (is_wide) {
r_scratch = AllocTempWide();
LoadConstantWide(r_scratch, value);
} else {
r_scratch = AllocTemp();
LoadConstant(r_scratch, value);
}
if (EncodingMap[alt_opcode].flags & IS_QUAD_OP)
res = NewLIR4(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg(), info);
else
res = NewLIR3(alt_opcode | wide, r_dest.GetReg(), r_src1.GetReg(), r_scratch.GetReg());
FreeTemp(r_scratch);
return res;
}
LIR* Arm64Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
return OpRegImm64(op, r_dest_src1, static_cast<int64_t>(value));
}
LIR* Arm64Mir2Lir::OpRegImm64(OpKind op, RegStorage r_dest_src1, int64_t value) {
A64Opcode wide = (r_dest_src1.Is64Bit()) ? WIDE(0) : UNWIDE(0);
A64Opcode opcode = kA64Brk1d;
A64Opcode neg_opcode = kA64Brk1d;
bool shift;
bool neg = (value < 0);
uint64_t abs_value = (neg & !(value == LLONG_MIN)) ? -value : value;
if (LIKELY(abs_value < 0x1000)) {
// abs_value is a 12-bit immediate.
shift = false;
} else if ((abs_value & UINT64_C(0xfff)) == 0 && ((abs_value >> 12) < 0x1000)) {
// abs_value is a shifted 12-bit immediate.
shift = true;
abs_value >>= 12;
} else if (LIKELY(abs_value < 0x1000000 && (op == kOpAdd || op == kOpSub))) {
// Note: It is better to use two ADD/SUB instead of loading a number to a temp register.
// This works for both normal registers and SP.
// For a frame size == 0x2468, it will be encoded as:
// sub sp, #0x2000
// sub sp, #0x468
if (neg) {
op = (op == kOpAdd) ? kOpSub : kOpAdd;
}
OpRegImm64(op, r_dest_src1, abs_value & (~INT64_C(0xfff)));
return OpRegImm64(op, r_dest_src1, abs_value & 0xfff);
} else {
RegStorage r_tmp;
LIR* res;
if (IS_WIDE(wide)) {
r_tmp = AllocTempWide();
res = LoadConstantWide(r_tmp, value);
} else {
r_tmp = AllocTemp();
res = LoadConstant(r_tmp, value);
}
OpRegReg(op, r_dest_src1, r_tmp);
FreeTemp(r_tmp);
return res;
}
switch (op) {
case kOpAdd:
neg_opcode = kA64Sub4RRdT;
opcode = kA64Add4RRdT;
break;
case kOpSub:
neg_opcode = kA64Add4RRdT;
opcode = kA64Sub4RRdT;
break;
case kOpCmp:
neg_opcode = kA64Cmn3RdT;
opcode = kA64Cmp3RdT;
break;
default:
LOG(FATAL) << "Bad op-kind in OpRegImm: " << op;
break;
}
if (UNLIKELY(neg))
opcode = neg_opcode;
if (EncodingMap[opcode].flags & IS_QUAD_OP)
return NewLIR4(opcode | wide, r_dest_src1.GetReg(), r_dest_src1.GetReg(), abs_value,
(shift) ? 1 : 0);
else
return NewLIR3(opcode | wide, r_dest_src1.GetReg(), abs_value, (shift) ? 1 : 0);
}
int Arm64Mir2Lir::EncodeShift(int shift_type, int amount) {
DCHECK_EQ(shift_type & 0x3, shift_type);
DCHECK_EQ(amount & 0x3f, amount);
return ((shift_type & 0x3) << 7) | (amount & 0x3f);
}
int Arm64Mir2Lir::EncodeExtend(int extend_type, int amount) {
DCHECK_EQ(extend_type & 0x7, extend_type);
DCHECK_EQ(amount & 0x7, amount);
return (1 << 6) | ((extend_type & 0x7) << 3) | (amount & 0x7);
}
bool Arm64Mir2Lir::IsExtendEncoding(int encoded_value) {
return ((1 << 6) & encoded_value) != 0;
}
LIR* Arm64Mir2Lir::LoadBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_dest,
int scale, OpSize size) {
LIR* load;
int expected_scale = 0;
A64Opcode opcode = kA64Brk1d;
r_base = Check64BitReg(r_base);
// TODO(Arm64): The sign extension of r_index should be carried out by using an extended
// register offset load (rather than doing the sign extension in a separate instruction).
if (r_index.Is32Bit()) {
// Assemble: ``sxtw xN, wN''.
r_index = As64BitReg(r_index);
NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
}
if (r_dest.IsFloat()) {
if (r_dest.IsDouble()) {
DCHECK(size == k64 || size == kDouble);
expected_scale = 3;
opcode = WIDE(kA64Ldr4fXxG);
} else {
DCHECK(r_dest.IsSingle());
DCHECK(size == k32 || size == kSingle);
expected_scale = 2;
opcode = kA64Ldr4fXxG;
}
DCHECK(scale == 0 || scale == expected_scale);
return NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
(scale != 0) ? 1 : 0);
}
switch (size) {
case kDouble:
case kWord:
case k64:
r_dest = Check64BitReg(r_dest);
opcode = WIDE(kA64Ldr4rXxG);
expected_scale = 3;
break;
case kReference:
r_dest = As32BitReg(r_dest);
FALLTHROUGH_INTENDED;
case kSingle: // Intentional fall-through.
case k32:
r_dest = Check32BitReg(r_dest);
opcode = kA64Ldr4rXxG;
expected_scale = 2;
break;
case kUnsignedHalf:
r_dest = Check32BitReg(r_dest);
opcode = kA64Ldrh4wXxd;
expected_scale = 1;
break;
case kSignedHalf:
r_dest = Check32BitReg(r_dest);
opcode = kA64Ldrsh4rXxd;
expected_scale = 1;
break;
case kUnsignedByte:
r_dest = Check32BitReg(r_dest);
opcode = kA64Ldrb3wXx;
break;
case kSignedByte:
r_dest = Check32BitReg(r_dest);
opcode = kA64Ldrsb3rXx;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
if (UNLIKELY(expected_scale == 0)) {
// This is a tertiary op (e.g. ldrb, ldrsb), it does not not support scale.
DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
DCHECK_EQ(scale, 0);
load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg());
} else {
DCHECK(scale == 0 || scale == expected_scale);
load = NewLIR4(opcode, r_dest.GetReg(), r_base.GetReg(), r_index.GetReg(),
(scale != 0) ? 1 : 0);
}
return load;
}
LIR* Arm64Mir2Lir::StoreBaseIndexed(RegStorage r_base, RegStorage r_index, RegStorage r_src,
int scale, OpSize size) {
LIR* store;
int expected_scale = 0;
A64Opcode opcode = kA64Brk1d;
r_base = Check64BitReg(r_base);
// TODO(Arm64): The sign extension of r_index should be carried out by using an extended
// register offset store (rather than doing the sign extension in a separate instruction).
if (r_index.Is32Bit()) {
// Assemble: ``sxtw xN, wN''.
r_index = As64BitReg(r_index);
NewLIR4(WIDE(kA64Sbfm4rrdd), r_index.GetReg(), r_index.GetReg(), 0, 31);
}
if (r_src.IsFloat()) {
if (r_src.IsDouble()) {
DCHECK(size == k64 || size == kDouble);
expected_scale = 3;
opcode = WIDE(kA64Str4fXxG);
} else {
DCHECK(r_src.IsSingle());
DCHECK(size == k32 || size == kSingle);
expected_scale = 2;
opcode = kA64Str4fXxG;
}
DCHECK(scale == 0 || scale == expected_scale);
return NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
(scale != 0) ? 1 : 0);
}
switch (size) {
case kDouble: // Intentional fall-trough.
case kWord: // Intentional fall-trough.
case k64:
r_src = Check64BitReg(r_src);
opcode = WIDE(kA64Str4rXxG);
expected_scale = 3;
break;
case kReference:
r_src = As32BitReg(r_src);
FALLTHROUGH_INTENDED;
case kSingle: // Intentional fall-trough.
case k32:
r_src = Check32BitReg(r_src);
opcode = kA64Str4rXxG;
expected_scale = 2;
break;
case kUnsignedHalf:
case kSignedHalf:
r_src = Check32BitReg(r_src);
opcode = kA64Strh4wXxd;
expected_scale = 1;
break;
case kUnsignedByte:
case kSignedByte:
r_src = Check32BitReg(r_src);
opcode = kA64Strb3wXx;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
if (UNLIKELY(expected_scale == 0)) {
// This is a tertiary op (e.g. strb), it does not not support scale.
DCHECK_NE(EncodingMap[UNWIDE(opcode)].flags & IS_TERTIARY_OP, 0U);
DCHECK_EQ(scale, 0);
store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg());
} else {
store = NewLIR4(opcode, r_src.GetReg(), r_base.GetReg(), r_index.GetReg(),
(scale != 0) ? 1 : 0);
}
return store;
}
/*
* Load value from base + displacement. Optionally perform null check
* on base (which must have an associated s_reg and MIR). If not
* performing null check, incoming MIR can be null.
*/
LIR* Arm64Mir2Lir::LoadBaseDispBody(RegStorage r_base, int displacement, RegStorage r_dest,
OpSize size) {
LIR* load = nullptr;
A64Opcode opcode = kA64Brk1d;
A64Opcode alt_opcode = kA64Brk1d;
int scale = 0;
switch (size) {
case kDouble: // Intentional fall-through.
case kWord: // Intentional fall-through.
case k64:
r_dest = Check64BitReg(r_dest);
scale = 3;
if (r_dest.IsFloat()) {
DCHECK(r_dest.IsDouble());
opcode = WIDE(kA64Ldr3fXD);
alt_opcode = WIDE(kA64Ldur3fXd);
} else {
opcode = WIDE(kA64Ldr3rXD);
alt_opcode = WIDE(kA64Ldur3rXd);
}
break;
case kReference:
r_dest = As32BitReg(r_dest);
FALLTHROUGH_INTENDED;
case kSingle: // Intentional fall-through.
case k32:
r_dest = Check32BitReg(r_dest);
scale = 2;
if (r_dest.IsFloat()) {
DCHECK(r_dest.IsSingle());
opcode = kA64Ldr3fXD;
} else {
opcode = kA64Ldr3rXD;
}
break;
case kUnsignedHalf:
scale = 1;
opcode = kA64Ldrh3wXF;
break;
case kSignedHalf:
scale = 1;
opcode = kA64Ldrsh3rXF;
break;
case kUnsignedByte:
opcode = kA64Ldrb3wXd;
break;
case kSignedByte:
opcode = kA64Ldrsb3rXd;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
int scaled_disp = displacement >> scale;
if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
// Can use scaled load.
load = NewLIR3(opcode, r_dest.GetReg(), r_base.GetReg(), scaled_disp);
} else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
// Can use unscaled load.
load = NewLIR3(alt_opcode, r_dest.GetReg(), r_base.GetReg(), displacement);
} else {
// Use long sequence.
// TODO: cleaner support for index/displacement registers? Not a reference, but must match width.
RegStorage r_scratch = AllocTempWide();
LoadConstantWide(r_scratch, displacement);
load = LoadBaseIndexed(r_base, r_scratch,
(size == kReference) ? As64BitReg(r_dest) : r_dest,
0, size);
FreeTemp(r_scratch);
}
// TODO: in future may need to differentiate Dalvik accesses w/ spills
if (mem_ref_type_ == ResourceMask::kDalvikReg) {
DCHECK_EQ(r_base, rs_sp);
AnnotateDalvikRegAccess(load, displacement >> 2, true /* is_load */, r_dest.Is64Bit());
}
return load;
}
LIR* Arm64Mir2Lir::LoadBaseDisp(RegStorage r_base, int displacement, RegStorage r_dest,
OpSize size, VolatileKind is_volatile) {
// LoadBaseDisp() will emit correct insn for atomic load on arm64
// assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
LIR* load = LoadBaseDispBody(r_base, displacement, r_dest, size);
if (UNLIKELY(is_volatile == kVolatile)) {
// TODO: This should generate an acquire load instead of the barrier.
GenMemBarrier(kLoadAny);
}
return load;
}
LIR* Arm64Mir2Lir::StoreBaseDispBody(RegStorage r_base, int displacement, RegStorage r_src,
OpSize size) {
LIR* store = nullptr;
A64Opcode opcode = kA64Brk1d;
A64Opcode alt_opcode = kA64Brk1d;
int scale = 0;
switch (size) {
case kDouble: // Intentional fall-through.
case kWord: // Intentional fall-through.
case k64:
r_src = Check64BitReg(r_src);
scale = 3;
if (r_src.IsFloat()) {
DCHECK(r_src.IsDouble());
opcode = WIDE(kA64Str3fXD);
alt_opcode = WIDE(kA64Stur3fXd);
} else {
opcode = WIDE(kA64Str3rXD);
alt_opcode = WIDE(kA64Stur3rXd);
}
break;
case kReference:
r_src = As32BitReg(r_src);
FALLTHROUGH_INTENDED;
case kSingle: // Intentional fall-through.
case k32:
r_src = Check32BitReg(r_src);
scale = 2;
if (r_src.IsFloat()) {
DCHECK(r_src.IsSingle());
opcode = kA64Str3fXD;
} else {
opcode = kA64Str3rXD;
}
break;
case kUnsignedHalf:
case kSignedHalf:
scale = 1;
opcode = kA64Strh3wXF;
break;
case kUnsignedByte:
case kSignedByte:
opcode = kA64Strb3wXd;
break;
default:
LOG(FATAL) << "Bad size: " << size;
}
bool displacement_is_aligned = (displacement & ((1 << scale) - 1)) == 0;
int scaled_disp = displacement >> scale;
if (displacement_is_aligned && scaled_disp >= 0 && scaled_disp < 4096) {
// Can use scaled store.
store = NewLIR3(opcode, r_src.GetReg(), r_base.GetReg(), scaled_disp);
} else if (alt_opcode != kA64Brk1d && IS_SIGNED_IMM9(displacement)) {
// Can use unscaled store.
store = NewLIR3(alt_opcode, r_src.GetReg(), r_base.GetReg(), displacement);
} else {
// Use long sequence.
RegStorage r_scratch = AllocTempWide();
LoadConstantWide(r_scratch, displacement);
store = StoreBaseIndexed(r_base, r_scratch,
(size == kReference) ? As64BitReg(r_src) : r_src,
0, size);
FreeTemp(r_scratch);
}
// TODO: In future, may need to differentiate Dalvik & spill accesses.
if (mem_ref_type_ == ResourceMask::kDalvikReg) {
DCHECK_EQ(r_base, rs_sp);
AnnotateDalvikRegAccess(store, displacement >> 2, false /* is_load */, r_src.Is64Bit());
}
return store;
}
LIR* Arm64Mir2Lir::StoreBaseDisp(RegStorage r_base, int displacement, RegStorage r_src,
OpSize size, VolatileKind is_volatile) {
// TODO: This should generate a release store and no barriers.
if (UNLIKELY(is_volatile == kVolatile)) {
// Ensure that prior accesses become visible to other threads first.
GenMemBarrier(kAnyStore);
}
// StoreBaseDisp() will emit correct insn for atomic store on arm64
// assuming r_dest is correctly prepared using RegClassForFieldLoadStore().
LIR* store = StoreBaseDispBody(r_base, displacement, r_src, size);
if (UNLIKELY(is_volatile == kVolatile)) {
// Preserve order with respect to any subsequent volatile loads.
// We need StoreLoad, but that generally requires the most expensive barrier.
GenMemBarrier(kAnyAny);
}
return store;
}
LIR* Arm64Mir2Lir::OpFpRegCopy(RegStorage r_dest, RegStorage r_src) {
UNUSED(r_dest, r_src);
LOG(FATAL) << "Unexpected use of OpFpRegCopy for Arm64";
UNREACHABLE();
}
LIR* Arm64Mir2Lir::OpMem(OpKind op, RegStorage r_base, int disp) {
UNUSED(op, r_base, disp);
LOG(FATAL) << "Unexpected use of OpMem for Arm64";
UNREACHABLE();
}
LIR* Arm64Mir2Lir::InvokeTrampoline(OpKind op, RegStorage r_tgt,
QuickEntrypointEnum trampoline ATTRIBUTE_UNUSED) {
// The address of the trampoline is already loaded into r_tgt.
return OpReg(op, r_tgt);
}
} // namespace art