Skip to content
Snippets Groups Projects
Commit 49d6777d authored by Julian Zhu's avatar Julian Zhu Committed by Gopher Robot
Browse files

crypto/internal/fips140/subtle: add assembly implementation of xorBytes for mips64x

goos: linux
goarch: mips64le
pkg: crypto/subtle
                                     │  oldsubtle   │             newsubtle              │
                                     │    sec/op    │   sec/op     vs base               │
ConstantTimeByteEq-4                    5.011n ± 0%   5.014n ± 0%        ~ (p=0.110 n=8)
ConstantTimeEq-4                        3.342n ± 0%   3.342n ± 0%        ~ (p=0.993 n=8)
ConstantTimeLessOrEq-4                  4.455n ± 0%   4.458n ± 0%        ~ (p=0.182 n=8)
XORBytes/8Bytes-4                       36.48n ± 0%   26.73n ± 0%  -26.74% (p=0.000 n=8)
XORBytes/128Bytes-4                     70.21n ± 0%   50.14n ± 0%  -28.59% (p=0.000 n=8)
XORBytes/2048Bytes-4                    566.1n ± 0%   257.2n ± 0%  -54.58% (p=0.000 n=8)
XORBytes/8192Bytes-4                   2123.0n ± 0%   966.8n ± 0%  -54.46% (p=0.000 n=8)
XORBytes/32768Bytes-4                  13.740µ ± 0%   5.614µ ± 0%  -59.14% (p=0.000 n=8)
XORBytesAlignment/8Bytes0Offset-4       38.98n ± 0%   26.53n ± 0%  -31.95% (p=0.000 n=8)
XORBytesAlignment/8Bytes1Offset-4       43.27n ± 0%   26.54n ± 0%  -38.68% (p=0.000 n=8)
XORBytesAlignment/8Bytes2Offset-4       43.28n ± 0%   26.54n ± 0%  -38.69% (p=0.000 n=8)
XORBytesAlignment/8Bytes3Offset-4       43.32n ± 0%   26.54n ± 0%  -38.74% (p=0.000 n=8)
XORBytesAlignment/8Bytes4Offset-4       43.49n ± 0%   26.53n ± 0%  -38.99% (p=0.000 n=8)
XORBytesAlignment/8Bytes5Offset-4       43.53n ± 0%   26.54n ± 0%  -39.03% (p=0.000 n=8)
XORBytesAlignment/8Bytes6Offset-4       43.48n ± 0%   26.53n ± 0%  -38.98% (p=0.000 n=8)
XORBytesAlignment/8Bytes7Offset-4       43.46n ± 1%   26.53n ± 0%  -38.96% (p=0.000 n=8)
XORBytesAlignment/128Bytes0Offset-4     71.84n ± 0%   47.70n ± 1%  -33.60% (p=0.000 n=8)
XORBytesAlignment/128Bytes1Offset-4    260.60n ± 0%   59.87n ± 0%  -77.03% (p=0.000 n=8)
XORBytesAlignment/128Bytes2Offset-4    260.60n ± 0%   59.81n ± 0%  -77.05% (p=0.000 n=8)
XORBytesAlignment/128Bytes3Offset-4    260.55n ± 0%   59.89n ± 0%  -77.01% (p=0.000 n=8)
XORBytesAlignment/128Bytes4Offset-4    260.60n ± 0%   59.84n ± 0%  -77.04% (p=0.000 n=8)
XORBytesAlignment/128Bytes5Offset-4    260.70n ± 0%   59.82n ± 0%  -77.05% (p=0.000 n=8)
XORBytesAlignment/128Bytes6Offset-4    260.60n ± 0%   59.89n ± 0%  -77.02% (p=0.000 n=8)
XORBytesAlignment/128Bytes7Offset-4    260.70n ± 0%   59.85n ± 0%  -77.04% (p=0.000 n=8)
XORBytesAlignment/2048Bytes0Offset-4    552.2n ± 1%   250.0n ± 0%  -54.73% (p=0.000 n=8)
XORBytesAlignment/2048Bytes1Offset-4   3603.0n ± 0%   548.6n ± 0%  -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes2Offset-4   3602.0n ± 0%   548.6n ± 0%  -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes3Offset-4   3604.0n ± 0%   548.6n ± 0%  -84.78% (p=0.000 n=8)
XORBytesAlignment/2048Bytes4Offset-4   3603.5n ± 0%   548.9n ± 0%  -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes5Offset-4   3603.0n ± 0%   548.8n ± 0%  -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes6Offset-4   3602.0n ± 0%   548.6n ± 0%  -84.77% (p=0.000 n=8)
XORBytesAlignment/2048Bytes7Offset-4   3601.5n ± 0%   548.5n ± 0%  -84.77% (p=0.000 n=8)
geomean                                 220.0n        81.91n       -62.77%

                                     │  oldsubtle   │               newsubtle               │
                                     │     B/s      │      B/s       vs base                │
XORBytes/8Bytes-4                      209.1Mi ± 0%    285.5Mi ± 0%   +36.52% (p=0.000 n=8)
XORBytes/128Bytes-4                    1.698Gi ± 0%    2.378Gi ± 0%   +40.04% (p=0.000 n=8)
XORBytes/2048Bytes-4                   3.369Gi ± 0%    7.418Gi ± 0%  +120.17% (p=0.000 n=8)
XORBytes/8192Bytes-4                   3.594Gi ± 0%    7.892Gi ± 0%  +119.59% (p=0.000 n=8)
XORBytes/32768Bytes-4                  2.221Gi ± 0%    5.436Gi ± 0%  +144.76% (p=0.000 n=8)
XORBytesAlignment/8Bytes0Offset-4      195.7Mi ± 0%    287.6Mi ± 0%   +46.96% (p=0.000 n=8)
XORBytesAlignment/8Bytes1Offset-4      176.3Mi ± 0%    287.5Mi ± 0%   +63.06% (p=0.000 n=8)
XORBytesAlignment/8Bytes2Offset-4      176.3Mi ± 0%    287.4Mi ± 0%   +63.07% (p=0.000 n=8)
XORBytesAlignment/8Bytes3Offset-4      176.1Mi ± 0%    287.5Mi ± 0%   +63.25% (p=0.000 n=8)
XORBytesAlignment/8Bytes4Offset-4      175.5Mi ± 0%    287.6Mi ± 0%   +63.90% (p=0.000 n=8)
XORBytesAlignment/8Bytes5Offset-4      175.3Mi ± 0%    287.5Mi ± 0%   +64.02% (p=0.000 n=8)
XORBytesAlignment/8Bytes6Offset-4      175.5Mi ± 0%    287.6Mi ± 0%   +63.86% (p=0.000 n=8)
XORBytesAlignment/8Bytes7Offset-4      175.5Mi ± 0%    287.6Mi ± 0%   +63.85% (p=0.000 n=8)
XORBytesAlignment/128Bytes0Offset-4    1.659Gi ± 0%    2.499Gi ± 1%   +50.61% (p=0.000 n=8)
XORBytesAlignment/128Bytes1Offset-4    468.4Mi ± 0%   2039.0Mi ± 0%  +335.30% (p=0.000 n=8)
XORBytesAlignment/128Bytes2Offset-4    468.4Mi ± 0%   2040.9Mi ± 0%  +335.73% (p=0.000 n=8)
XORBytesAlignment/128Bytes3Offset-4    468.5Mi ± 0%   2038.1Mi ± 0%  +335.02% (p=0.000 n=8)
XORBytesAlignment/128Bytes4Offset-4    468.4Mi ± 0%   2040.0Mi ± 0%  +335.52% (p=0.000 n=8)
XORBytesAlignment/128Bytes5Offset-4    468.2Mi ± 0%   2040.5Mi ± 0%  +335.82% (p=0.000 n=8)
XORBytesAlignment/128Bytes6Offset-4    468.4Mi ± 0%   2038.2Mi ± 0%  +335.13% (p=0.000 n=8)
XORBytesAlignment/128Bytes7Offset-4    468.2Mi ± 0%   2039.4Mi ± 0%  +335.58% (p=0.000 n=8)
XORBytesAlignment/2048Bytes0Offset-4   3.454Gi ± 1%    7.629Gi ± 0%  +120.90% (p=0.000 n=8)
XORBytesAlignment/2048Bytes1Offset-4   542.1Mi ± 0%   3560.1Mi ± 0%  +556.68% (p=0.000 n=8)
XORBytesAlignment/2048Bytes2Offset-4   542.3Mi ± 0%   3560.1Mi ± 0%  +556.48% (p=0.000 n=8)
XORBytesAlignment/2048Bytes3Offset-4   541.9Mi ± 0%   3560.0Mi ± 0%  +556.93% (p=0.000 n=8)
XORBytesAlignment/2048Bytes4Offset-4   542.0Mi ± 0%   3558.8Mi ± 0%  +556.67% (p=0.000 n=8)
XORBytesAlignment/2048Bytes5Offset-4   542.1Mi ± 3%   3558.8Mi ± 0%  +556.53% (p=0.000 n=8)
XORBytesAlignment/2048Bytes6Offset-4   542.2Mi ± 0%   3560.2Mi ± 0%  +556.57% (p=0.000 n=8)
XORBytesAlignment/2048Bytes7Offset-4   542.3Mi ± 0%   3560.5Mi ± 0%  +556.56% (p=0.000 n=8)
geomean                                514.9Mi         1.496Gi       +197.56%

Change-Id: I649fa6bfca31296d65cccdf5fceb3dcfa0c588a1
Reviewed-on: https://go-review.googlesource.com/c/go/+/666255


Reviewed-by: default avatarKeith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: default avatarCherry Mui <cherryyz@google.com>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
parent 7bc714d6
No related branches found
No related tags found
No related merge requests found
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (amd64 || arm64 || loong64 || ppc64 || ppc64le || riscv64) && !purego //go:build (amd64 || arm64 || loong64 || mips64 || mips64le || ppc64 || ppc64le || riscv64) && !purego
package subtle package subtle
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style // Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file. // license that can be found in the LICENSE file.
//go:build (!amd64 && !arm64 && !loong64 && !ppc64 && !ppc64le && !riscv64) || purego //go:build (!amd64 && !arm64 && !loong64 && !mips64 && !mips64le && !ppc64 && !ppc64le && !riscv64) || purego
package subtle package subtle
......
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build (mips64 || mips64le) && !purego
#include "textflag.h"
// func xorBytes(dst, a, b *byte, n int)
TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
MOVV dst+0(FP), R1
MOVV a+8(FP), R2
MOVV b+16(FP), R3
MOVV n+24(FP), R4
xor_64_check:
SGTU $64, R4, R5 // R5 = 1 if (64 > R4)
BNE R5, xor_32_check
xor_64:
MOVV (R2), R6
MOVV 8(R2), R7
MOVV 16(R2), R8
MOVV 24(R2), R9
MOVV (R3), R10
MOVV 8(R3), R11
MOVV 16(R3), R12
MOVV 24(R3), R13
XOR R6, R10
XOR R7, R11
XOR R8, R12
XOR R9, R13
MOVV R10, (R1)
MOVV R11, 8(R1)
MOVV R12, 16(R1)
MOVV R13, 24(R1)
MOVV 32(R2), R6
MOVV 40(R2), R7
MOVV 48(R2), R8
MOVV 56(R2), R9
MOVV 32(R3), R10
MOVV 40(R3), R11
MOVV 48(R3), R12
MOVV 56(R3), R13
XOR R6, R10
XOR R7, R11
XOR R8, R12
XOR R9, R13
MOVV R10, 32(R1)
MOVV R11, 40(R1)
MOVV R12, 48(R1)
MOVV R13, 56(R1)
ADDV $64, R2
ADDV $64, R3
ADDV $64, R1
SUBV $64, R4
SGTU $64, R4, R5
BEQ R0, R5, xor_64
BEQ R0, R4, end
xor_32_check:
SGTU $32, R4, R5
BNE R5, xor_16_check
xor_32:
MOVV (R2), R6
MOVV 8(R2), R7
MOVV 16(R2), R8
MOVV 24(R2), R9
MOVV (R3), R10
MOVV 8(R3), R11
MOVV 16(R3), R12
MOVV 24(R3), R13
XOR R6, R10
XOR R7, R11
XOR R8, R12
XOR R9, R13
MOVV R10, (R1)
MOVV R11, 8(R1)
MOVV R12, 16(R1)
MOVV R13, 24(R1)
ADDV $32, R2
ADDV $32, R3
ADDV $32, R1
SUBV $32, R4
BEQ R0, R4, end
xor_16_check:
SGTU $16, R4, R5
BNE R5, xor_8_check
xor_16:
MOVV (R2), R6
MOVV 8(R2), R7
MOVV (R3), R8
MOVV 8(R3), R9
XOR R6, R8
XOR R7, R9
MOVV R8, (R1)
MOVV R9, 8(R1)
ADDV $16, R2
ADDV $16, R3
ADDV $16, R1
SUBV $16, R4
BEQ R0, R4, end
xor_8_check:
SGTU $8, R4, R5
BNE R5, xor_4_check
xor_8:
MOVV (R2), R6
MOVV (R3), R7
XOR R6, R7
MOVV R7, (R1)
ADDV $8, R1
ADDV $8, R2
ADDV $8, R3
SUBV $8, R4
BEQ R0, R4, end
xor_4_check:
SGTU $4, R4, R5
BNE R5, xor_2_check
xor_4:
MOVW (R2), R6
MOVW (R3), R7
XOR R6, R7
MOVW R7, (R1)
ADDV $4, R2
ADDV $4, R3
ADDV $4, R1
SUBV $4, R4
BEQ R0, R4, end
xor_2_check:
SGTU $2, R4, R5
BNE R5, xor_1
xor_2:
MOVH (R2), R6
MOVH (R3), R7
XOR R6, R7
MOVH R7, (R1)
ADDV $2, R2
ADDV $2, R3
ADDV $2, R1
SUBV $2, R4
BEQ R0, R4, end
xor_1:
MOVB (R2), R6
MOVB (R3), R7
XOR R6, R7
MOVB R7, (R1)
end:
RET
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment