diff --git a/src/math/big/arith_riscv64.s b/src/math/big/arith_riscv64.s index 67812dd646f7342db6571f100df6af0bbfa6eaa0..897d08229eef185c75287f6184b955b3fbed6dea 100644 --- a/src/math/big/arith_riscv64.s +++ b/src/math/big/arith_riscv64.s @@ -92,7 +92,86 @@ done: RET TEXT ·subVV(SB),NOSPLIT,$0 - JMP ·subVV_g(SB) + MOV x+24(FP), X5 + MOV y+48(FP), X6 + MOV z+0(FP), X7 + MOV z_len+8(FP), X30 + + MOV $4, X28 + MOV $0, X29 // b = 0 + + BEQZ X30, done + BLTU X30, X28, loop1 + +loop4: + MOV 0(X5), X8 // x[0] + MOV 0(X6), X9 // y[0] + MOV 8(X5), X11 // x[1] + MOV 8(X6), X12 // y[1] + MOV 16(X5), X14 // x[2] + MOV 16(X6), X15 // y[2] + MOV 24(X5), X17 // x[3] + MOV 24(X6), X18 // y[3] + + SUB X9, X8, X21 // z[0] = x[0] - y[0] + SLTU X21, X8, X22 + SUB X29, X21, X10 // z[0] = x[0] - y[0] - b + SLTU X10, X21, X23 + ADD X22, X23, X29 // next b + + SUB X12, X11, X24 // z[1] = x[1] - y[1] + SLTU X24, X11, X25 + SUB X29, X24, X13 // z[1] = x[1] - y[1] - b + SLTU X13, X24, X26 + ADD X25, X26, X29 // next b + + SUB X15, X14, X21 // z[2] = x[2] - y[2] + SLTU X21, X14, X22 + SUB X29, X21, X16 // z[2] = x[2] - y[2] - b + SLTU X16, X21, X23 + ADD X22, X23, X29 // next b + + SUB X18, X17, X21 // z[3] = x[3] - y[3] + SLTU X21, X17, X22 + SUB X29, X21, X19 // z[3] = x[3] - y[3] - b + SLTU X19, X21, X23 + ADD X22, X23, X29 // next b + + MOV X10, 0(X7) // z[0] + MOV X13, 8(X7) // z[1] + MOV X16, 16(X7) // z[2] + MOV X19, 24(X7) // z[3] + + ADD $32, X5 + ADD $32, X6 + ADD $32, X7 + SUB $4, X30 + + BGEU X30, X28, loop4 + BEQZ X30, done + +loop1: + MOV 0(X5), X10 // x + MOV 0(X6), X11 // y + + SUB X11, X10, X12 // z = x - y + SLTU X12, X10, X14 + SUB X29, X12, X13 // z = x - y - b + SLTU X13, X12, X15 + ADD X14, X15, X29 // next b + + MOV X13, 0(X7) // z + + ADD $8, X5 + ADD $8, X6 + ADD $8, X7 + SUB $1, X30 + + BNEZ X30, loop1 + +done: + MOV X29, c+72(FP) // return b + RET TEXT ·addVW(SB),NOSPLIT,$0 JMP ·addVW_g(SB)