1
0
Fork 0
mirror of https://github.com/tobast/libunwind-eh_elf.git synced 2024-11-26 17:17:39 +01:00

(GR): New macro.

(BR): Likewise.
(FR): Likewise.
(_Uia64_getcontext): Tweak for slightly better cold-cache performance.

(Logical change 1.190)
This commit is contained in:
mostang.com!davidm 2004-03-27 09:25:58 +00:00
parent 1fdf615440
commit a77f17b0b7

View file

@ -25,6 +25,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
#include "ucontext_i.h"
#define GR(n) (SC_GR + (n)*8)
#define BR(n) (SC_BR + (n)*8)
#define FR(n) (SC_FR + (n)*16)
/* This should be compatible to the libc's getcontext(), except that
the sc->sc_mask field is always cleared and that the name is
prefixed with _Uia64_ so we don't step on the application's
@ -36,116 +40,118 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
.proc _Uia64_getcontext
_Uia64_getcontext:
.prologue
.body
alloc r11 = ar.pfs, 1, 0, 0, 0
alloc rPFS = ar.pfs, 1, 0, 0, 0
flushrs // save dirty partition on rbs
add r3 = SC_MASK, r32
add r2 = SC_MASK, r32
;;
st8 [r3] = r0 // clear sc->sc_mask
st8 [r2] = r0 // clear sc->sc_mask
mov.m rFPSR = ar.fpsr
mov.m rRSC = ar.rsc
add r2 = SC_GR+1*8, r32
add r2 = GR(1), r32
;;
mov.m rRSC = ar.rsc
mov.m rBSP = ar.bsp
.prologue
mov rPR = pr
.save ar.unat, rUNAT
mov.m rUNAT = ar.unat
.body
add r3 = SC_GR+4*8, r32
st8.spill [r2] = r1, (GR(12) - GR(1))
add r3 = SC_NAT, r32
;;
.mem.offset 0,0; st8.spill [r2] = r1, (5*8 - 1*8)
.mem.offset 8,0; st8.spill [r3] = r4, 16
mov rPFS = r11
st8.spill [r2] = sp, (SC_PR - GR(12))
lfetch.fault.nt1 [r3] // prefetch nat...ar.lc
adds r3 = FR(2), r32
;;
.mem.offset 0,0; st8.spill [r2] = r5, 16
.mem.offset 8,0; st8.spill [r3] = r6, 48
st8 [r2] = rPR
stf.spill [r3] = f2, (FR(16) - FR(2))
add r2 = FR(24), r32
;;
stf.spill [r2] = f24, (FR(31) - FR(24))
stf.spill [r3] = f16
add r3 = GR(4), r32
;;
stf.spill [r2] = f31
st8.spill [r3] = r4, (GR(6) - GR(4))
add r2 = GR(5), r32
;;
.mem.offset 0,0; st8.spill [r2] = r5, (GR(7) - GR(5))
.mem.offset 8,0; st8.spill [r3] = r6
and rTMP = ~0x3, rRSC
;;
.mem.offset 0,0; st8.spill [r2] = r7, (SC_FR+2*16-(SC_GR+7*8))
.mem.offset 8,0; st8.spill [r3] = sp, (SC_FR+3*16-(SC_GR+12*8))
;;
st8.spill [r2] = r7
mov.m ar.rsc = rTMP // put RSE into enforced lazy mode
mov.m rNAT = ar.unat
mov.i rLC = ar.lc
;;
mov.m rNAT = ar.unat
mov.m rRNAT = ar.rnat
add r2 = FR(3), r32
;;
mov.m ar.rsc = rRSC // restore RSE mode
mov rPR = pr
stf.spill [r2] = f3, (FR(4) - FR(3))
add r3 = FR(5), r32
;;
stf.spill [r2] = f4, (FR(17) - FR(4))
stf.spill [r3] = f5, (FR(18) - FR(5))
/*
* Rotate NaT bits by rPOS positions to the right:
*/
stf.spill [r2] = f2, 32
stf.spill [r3] = f3, 32
add rPOS = SC_GR, r32 // rPOS <- &sc_gr[0]
add rPOS = GR(0), r32 // rPOS <- &sc_gr[0]
;;
stf.spill [r2] = f4, (16*16-4*16)
stf.spill [r3] = f5, (17*16-5*16)
stf.spill [r2] = f17, (FR(19) - FR(17))
stf.spill [r3] = f18, (FR(20) - FR(18))
extr.u rPOS = rPOS, 3, 6 // get NaT bit number for r0
;;
stf.spill [r2] = f16, 32
stf.spill [r3] = f17, 32
stf.spill [r2] = f19, (FR(21) - FR(19))
stf.spill [r3] = f20, (FR(22) - FR(20))
sub rCPOS = 64, rPOS
;;
stf.spill [r2] = f18, 32
stf.spill [r3] = f19, 32
stf.spill [r2] = f21, (FR(23) - FR(21))
stf.spill [r3] = f22, (FR(25) - FR(22))
shr.u rTMP = rNAT, rPOS
;;
stf.spill [r2] = f20, 32
stf.spill [r3] = f21, 32
stf.spill [r2] = f23, (FR(26) - FR(23))
stf.spill [r3] = f25, (FR(27) - FR(25))
shl rNAT = rNAT, rCPOS
;;
stf.spill [r2] = f22, 32
stf.spill [r3] = f23, 32
stf.spill [r2] = f26, (FR(28) - FR(26))
stf.spill [r3] = f27, (FR(29) - FR(27))
or rNAT = rNAT, rTMP
;;
stf.spill [r2] = f24, 32
stf.spill [r3] = f25, 32
mov r8 = 0
;;
stf.spill [r2] = f26, 32
stf.spill [r3] = f27, 32
mov r9 = 1
;;
stf.spill [r2] = f28, 32
stf.spill [r3] = f29, 32
stf.spill [r2] = f28, (FR(30) - FR(28))
stf.spill [r3] = f29
mov rB0 = b0
;;
stf.spill [r2] = f30, 32
stf.spill [r3] = f31, 32
stf.spill [r2] = f30
mov ar.unat = rUNAT // done with integer regs; restore caller's UNaT
mov rB1 = b1
;;
mov ar.unat = rUNAT // done with integer regs; restore caller's UNaT
add r2 = SC_NAT, r32
add r3 = SC_BSP, r32
;;
st8 [r2] = rNAT, (SC_RNAT-SC_NAT)
st8 [r3] = rBSP, (SC_UNAT-SC_BSP)
mov rB2 = b2
;;
st8 [r2] = rRNAT, (SC_FPSR-SC_RNAT)
st8 [r3] = rUNAT, (SC_PFS-SC_UNAT)
st8 [r2] = rNAT, (SC_RNAT - SC_NAT)
st8 [r3] = rBSP, (SC_UNAT - SC_BSP)
mov rB3 = b3
;;
st8 [r2] = rFPSR, (SC_LC-SC_FPSR)
st8 [r3] = rPFS, (SC_PR-SC_PFS)
st8 [r2] = rRNAT, (SC_FPSR - SC_RNAT)
st8 [r3] = rUNAT, (SC_PFS - SC_UNAT)
mov rB4 = b4
;;
st8 [r2] = rLC, (SC_BR+0*8-SC_LC)
st8 [r3] = rPR, (SC_BR+1*8-SC_PR)
st8 [r2] = rFPSR, (SC_LC - SC_FPSR)
st8 [r3] = rPFS, (BR(0) - SC_PFS)
mov rB5 = b5
;;
st8 [r2] = rB0, 16
st8 [r3] = rB1, 16
st8 [r2] = rLC, (BR(1) - SC_LC)
st8 [r3] = rB0, (BR(2) - BR(0))
;;
st8 [r2] = rB2, 16
st8 [r3] = rB3, 16
st8 [r2] = rB1, (BR(3) - BR(1))
st8 [r3] = rB2, (BR(4) - BR(2))
;;
st8 [r2] = rB4
st8 [r3] = rB5
st8 [r2] = rB3, (BR(5) - BR(3))
st8 [r3] = rB4
;;
st8 [r2] = rB5
mov r8 = 0
br.ret.sptk.many rp
.endp _Uia64_getcontext