/*
     * Long integer shift.  This is different from the generic 32/64-bit
     * binary operations because vAA/vBB are 64-bit but vCC (the shift
     * distance) is 32-bit.  Also, Dalvik requires us to mask off the low
     * 6 bits of the shift distance.
     */
    /* shr-long vAA, vBB, vCC */
    FETCH r0, 1                         @ r0<- CCBB
    mov     r9, rINST, lsr #8           @ r9<- AA
    and     r3, r0, #255                @ r3<- BB
    mov     r0, r0, lsr #8              @ r0<- CC
    VREG_INDEX_TO_ADDR r3, r3           @ r3<- &fp[BB]
    GET_VREG r2, r0                     @ r2<- vCC
    ldmia   r3, {r0-r1}                 @ r0/r1<- vBB/vBB+1
    CLEAR_SHADOW_PAIR r9, lr, ip        @ Zero out the shadow regs
    and     r2, r2, #63                 @ r0<- r0 & 0x3f
    VREG_INDEX_TO_ADDR r9, r9           @ r9<- &fp[AA]
    mov     r0, r0, lsr r2              @ r0<- r2 >> r2
    rsb     r3, r2, #32                 @ r3<- 32 - r2
    orr     r0, r0, r1, asl r3          @ r0<- r0 | (r1 << (32-r2))
    subs    ip, r2, #32                 @ ip<- r2 - 32
    movpl   r0, r1, asr ip              @ if r2 >= 32, r0<-r1 >> (r2-32)
    FETCH_ADVANCE_INST 2                @ advance rPC, load rINST
    mov     r1, r1, asr r2              @ r1<- r1 >> r2
    GET_INST_OPCODE ip                  @ extract opcode from rINST
    stmia   r9, {r0-r1}                 @ vAA/vAA+1<- r0/r1
    GOTO_OPCODE ip                      @ jump to next instruction