diff --git a/src/crypto/sha1/sha1block_decl.go b/src/crypto/sha1/sha1block_decl.go
index 3edf5a43606d417e55b100f3b7aae7d410006fd5..46f41a1cc2634ad762b86475dc121a306a382b55 100644
--- a/src/crypto/sha1/sha1block_decl.go
+++ b/src/crypto/sha1/sha1block_decl.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build (arm || 386 || s390x) && !purego
+//go:build (386 || arm || loong64 || s390x) && !purego
 
 package sha1
 
diff --git a/src/crypto/sha1/sha1block_generic.go b/src/crypto/sha1/sha1block_generic.go
index 4c6f74d99d8f23eb273530277a3dd880a15448d9..5989a2434760b53fa3e9cc478f5adbc19ef4a5d4 100644
--- a/src/crypto/sha1/sha1block_generic.go
+++ b/src/crypto/sha1/sha1block_generic.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-//go:build (!amd64 && !386 && !arm && !s390x && !arm64) || purego
+//go:build (!386 && !amd64 && !arm && !arm64 && !loong64 && !s390x) || purego
 
 package sha1
 
diff --git a/src/crypto/sha1/sha1block_loong64.s b/src/crypto/sha1/sha1block_loong64.s
new file mode 100644
index 0000000000000000000000000000000000000000..7e9d6e0933994e8abf5b2a1ccf8d4a5e71373ebd
--- /dev/null
+++ b/src/crypto/sha1/sha1block_loong64.s
@@ -0,0 +1,226 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !purego
+
+#include "textflag.h"
+
+// SHA-1 block routine. See sha1block.go for Go equivalent.
+//
+// There are 80 rounds of 4 types:
+//   - rounds 0-15 are type 1 and load data (ROUND1 macro).
+//   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
+//   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
+//   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
+//   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
+//
+// Each round loads or shuffles the data, then computes a per-round
+// function of b, c, d, and then mixes the result into and rotates the
+// five registers a, b, c, d, e holding the intermediate results.
+//
+// The register rotation is implemented by rotating the arguments to
+// the round macros instead of by explicit move instructions.
+
+#define REGTMP	R30
+#define REGTMP1	R17
+#define REGTMP2	R18
+#define REGTMP3	R19
+
+#define LOAD1(index) \
+	MOVW	(index*4)(R5), REGTMP3; \
+	WORD	$0x3a73; \	// REVB2W REGTMP3, REGTMP3   to big-endian
+	MOVW	REGTMP3, (index*4)(R3)
+
+#define LOAD(index) \
+	MOVW	(((index)&0xf)*4)(R3), REGTMP3; \
+	MOVW	(((index-3)&0xf)*4)(R3), REGTMP; \
+	MOVW	(((index-8)&0xf)*4)(R3), REGTMP1; \
+	MOVW	(((index-14)&0xf)*4)(R3), REGTMP2; \
+	XOR	REGTMP, REGTMP3; \
+	XOR	REGTMP1, REGTMP3; \
+	XOR	REGTMP2, REGTMP3; \
+	ROTR	$31, REGTMP3; \
+	MOVW	REGTMP3, (((index)&0xf)*4)(R3)
+
+// f = d ^ (b & (c ^ d))
+#define FUNC1(a, b, c, d, e) \
+	XOR	c, d, REGTMP1; \
+	AND	b, REGTMP1; \
+	XOR	d, REGTMP1
+
+// f = b ^ c ^ d
+#define FUNC2(a, b, c, d, e) \
+	XOR	b, c, REGTMP1; \
+	XOR	d, REGTMP1
+
+// f = (b & c) | ((b | c) & d)
+#define FUNC3(a, b, c, d, e) \
+	OR	b, c, REGTMP2; \
+	AND	b, c, REGTMP; \
+	AND	d, REGTMP2; \
+	OR	REGTMP, REGTMP2, REGTMP1
+
+#define FUNC4 FUNC2
+
+#define MIX(a, b, c, d, e, const) \
+	ROTR	$2, b; \	// b << 30
+	ADD	REGTMP1, e; \	// e = e + f
+	ROTR	$27, a, REGTMP2; \	// a << 5
+	ADD	REGTMP3, e; \	// e = e + w[i]
+	ADDV	$const, e; \	// e = e + k
+	ADD	REGTMP2, e	// e = e + a<<5
+
+#define ROUND1(a, b, c, d, e, index) \
+	LOAD1(index); \
+	FUNC1(a, b, c, d, e); \
+	MIX(a, b, c, d, e, 0x5A827999)
+
+#define ROUND1x(a, b, c, d, e, index) \
+	LOAD(index); \
+	FUNC1(a, b, c, d, e); \
+	MIX(a, b, c, d, e, 0x5A827999)
+
+#define ROUND2(a, b, c, d, e, index) \
+	LOAD(index); \
+	FUNC2(a, b, c, d, e); \
+	MIX(a, b, c, d, e, 0x6ED9EBA1)
+
+#define ROUND3(a, b, c, d, e, index) \
+	LOAD(index); \
+	FUNC3(a, b, c, d, e); \
+	MIX(a, b, c, d, e, 0x8F1BBCDC)
+
+#define ROUND4(a, b, c, d, e, index) \
+	LOAD(index); \
+	FUNC4(a, b, c, d, e); \
+	MIX(a, b, c, d, e, 0xCA62C1D6)
+
+// A stack frame size of 64 bytes is required here, because
+// the frame size used for data expansion is 64 bytes.
+// See the definition of the macro LOAD above, and the definition
+// of the local variable w in the general implementation (sha1block.go).
+TEXT ·block(SB),NOSPLIT,$64-32
+	MOVV	dig+0(FP),	R4
+	MOVV	p_base+8(FP),	R5
+	MOVV	p_len+16(FP),	R6
+	AND	$~63, R6
+	BEQ	R6, zero
+
+	// p_len >= 64
+	ADDV    R5, R6, R24
+	MOVW	(0*4)(R4), R7
+	MOVW	(1*4)(R4), R8
+	MOVW	(2*4)(R4), R9
+	MOVW	(3*4)(R4), R10
+	MOVW	(4*4)(R4), R11
+
+loop:
+	MOVW	R7,	R12
+	MOVW	R8,	R13
+	MOVW	R9,	R14
+	MOVW	R10,	R15
+	MOVW	R11,	R16
+
+	ROUND1(R7,  R8,  R9,  R10, R11, 0)
+	ROUND1(R11, R7,  R8,  R9,  R10, 1)
+	ROUND1(R10, R11, R7,  R8,  R9,  2)
+	ROUND1(R9,  R10, R11, R7,  R8,  3)
+	ROUND1(R8,  R9,  R10, R11, R7,  4)
+	ROUND1(R7,  R8,  R9,  R10, R11, 5)
+	ROUND1(R11, R7,  R8,  R9,  R10, 6)
+	ROUND1(R10, R11, R7,  R8,  R9,  7)
+	ROUND1(R9,  R10, R11, R7,  R8,  8)
+	ROUND1(R8,  R9,  R10, R11, R7,  9)
+	ROUND1(R7,  R8,  R9,  R10, R11, 10)
+	ROUND1(R11, R7,  R8,  R9,  R10, 11)
+	ROUND1(R10, R11, R7,  R8,  R9,  12)
+	ROUND1(R9,  R10, R11, R7,  R8,  13)
+	ROUND1(R8,  R9,  R10, R11, R7,  14)
+	ROUND1(R7,  R8,  R9,  R10, R11, 15)
+
+	ROUND1x(R11, R7,  R8,  R9,  R10, 16)
+	ROUND1x(R10, R11, R7,  R8,  R9,  17)
+	ROUND1x(R9,  R10, R11, R7,  R8,  18)
+	ROUND1x(R8,  R9,  R10, R11, R7,  19)
+
+	ROUND2(R7,  R8,  R9,  R10, R11, 20)
+	ROUND2(R11, R7,  R8,  R9,  R10, 21)
+	ROUND2(R10, R11, R7,  R8,  R9,  22)
+	ROUND2(R9,  R10, R11, R7,  R8,  23)
+	ROUND2(R8,  R9,  R10, R11, R7,  24)
+	ROUND2(R7,  R8,  R9,  R10, R11, 25)
+	ROUND2(R11, R7,  R8,  R9,  R10, 26)
+	ROUND2(R10, R11, R7,  R8,  R9,  27)
+	ROUND2(R9,  R10, R11, R7,  R8,  28)
+	ROUND2(R8,  R9,  R10, R11, R7,  29)
+	ROUND2(R7,  R8,  R9,  R10, R11, 30)
+	ROUND2(R11, R7,  R8,  R9,  R10, 31)
+	ROUND2(R10, R11, R7,  R8,  R9,  32)
+	ROUND2(R9,  R10, R11, R7,  R8,  33)
+	ROUND2(R8,  R9,  R10, R11, R7,  34)
+	ROUND2(R7,  R8,  R9,  R10, R11, 35)
+	ROUND2(R11, R7,  R8,  R9,  R10, 36)
+	ROUND2(R10, R11, R7,  R8,  R9,  37)
+	ROUND2(R9,  R10, R11, R7,  R8,  38)
+	ROUND2(R8,  R9,  R10, R11, R7,  39)
+
+	ROUND3(R7,  R8,  R9,  R10, R11, 40)
+	ROUND3(R11, R7,  R8,  R9,  R10, 41)
+	ROUND3(R10, R11, R7,  R8,  R9,  42)
+	ROUND3(R9,  R10, R11, R7,  R8,  43)
+	ROUND3(R8,  R9,  R10, R11, R7,  44)
+	ROUND3(R7,  R8,  R9,  R10, R11, 45)
+	ROUND3(R11, R7,  R8,  R9,  R10, 46)
+	ROUND3(R10, R11, R7,  R8,  R9,  47)
+	ROUND3(R9,  R10, R11, R7,  R8,  48)
+	ROUND3(R8,  R9,  R10, R11, R7,  49)
+	ROUND3(R7,  R8,  R9,  R10, R11, 50)
+	ROUND3(R11, R7,  R8,  R9,  R10, 51)
+	ROUND3(R10, R11, R7,  R8,  R9,  52)
+	ROUND3(R9,  R10, R11, R7,  R8,  53)
+	ROUND3(R8,  R9,  R10, R11, R7,  54)
+	ROUND3(R7,  R8,  R9,  R10, R11, 55)
+	ROUND3(R11, R7,  R8,  R9,  R10, 56)
+	ROUND3(R10, R11, R7,  R8,  R9,  57)
+	ROUND3(R9,  R10, R11, R7,  R8,  58)
+	ROUND3(R8,  R9,  R10, R11, R7,  59)
+
+	ROUND4(R7,  R8,  R9,  R10, R11, 60)
+	ROUND4(R11, R7,  R8,  R9,  R10, 61)
+	ROUND4(R10, R11, R7,  R8,  R9,  62)
+	ROUND4(R9,  R10, R11, R7,  R8,  63)
+	ROUND4(R8,  R9,  R10, R11, R7,  64)
+	ROUND4(R7,  R8,  R9,  R10, R11, 65)
+	ROUND4(R11, R7,  R8,  R9,  R10, 66)
+	ROUND4(R10, R11, R7,  R8,  R9,  67)
+	ROUND4(R9,  R10, R11, R7,  R8,  68)
+	ROUND4(R8,  R9,  R10, R11, R7,  69)
+	ROUND4(R7,  R8,  R9,  R10, R11, 70)
+	ROUND4(R11, R7,  R8,  R9,  R10, 71)
+	ROUND4(R10, R11, R7,  R8,  R9,  72)
+	ROUND4(R9,  R10, R11, R7,  R8,  73)
+	ROUND4(R8,  R9,  R10, R11, R7,  74)
+	ROUND4(R7,  R8,  R9,  R10, R11, 75)
+	ROUND4(R11, R7,  R8,  R9,  R10, 76)
+	ROUND4(R10, R11, R7,  R8,  R9,  77)
+	ROUND4(R9,  R10, R11, R7,  R8,  78)
+	ROUND4(R8,  R9,  R10, R11, R7,  79)
+
+	ADD	R12, R7
+	ADD	R13, R8
+	ADD	R14, R9
+	ADD	R15, R10
+	ADD	R16, R11
+
+	ADDV	$64, R5
+	BNE	R5, R24, loop
+
+end:
+	MOVW	R7, (0*4)(R4)
+	MOVW	R8, (1*4)(R4)
+	MOVW	R9, (2*4)(R4)
+	MOVW	R10, (3*4)(R4)
+	MOVW	R11, (4*4)(R4)
+zero:
+	RET