[3/5,V1] RISC-V:Implement intrinsics for Crypto extension

Message ID 20220223094418.3518-4-shihua@iscas.ac.cn
State Deferred, archived
Headers
Series RISC-V:Implement Crypto extension's instruction patterns and it's intrinsics |

Commit Message

Liao Shihua Feb. 23, 2022, 9:44 a.m. UTC
  From: LiaoShihua <shihua@iscas.ac.cn>

	These headers are in https://github.com/rvkrypto/rvkrypto-fips .

gcc/ChangeLog:

        * config.gcc: Add extra_headers.
        * config/riscv/riscv_crypto.h: New file.
        * config/riscv/riscv_crypto_scalar.h: New file.
        * config/riscv/rvk_asm_intrin.h: New file.
        * config/riscv/rvk_emu_intrin.h: New file.

Co-Authored-By: mjosaarinen <mjos@iki.fi>
---
 gcc/config.gcc                         |   1 +
 gcc/config/riscv/riscv_crypto.h        |  12 +
 gcc/config/riscv/riscv_crypto_scalar.h | 247 ++++++++++
 gcc/config/riscv/rvk_asm_intrin.h      | 187 ++++++++
 gcc/config/riscv/rvk_emu_intrin.h      | 594 +++++++++++++++++++++++++
 5 files changed, 1041 insertions(+)
 create mode 100644 gcc/config/riscv/riscv_crypto.h
 create mode 100644 gcc/config/riscv/riscv_crypto_scalar.h
 create mode 100644 gcc/config/riscv/rvk_asm_intrin.h
 create mode 100644 gcc/config/riscv/rvk_emu_intrin.h
  

Comments

Kito Cheng Feb. 28, 2022, 3:34 p.m. UTC | #1
Those header files have license issues that should relicinced to GPL,
and don't put rvk_asm_intrin.h rvk_emu_intrin.h, since they are not
too meaningful when we have compiler support.

General comment:
- Use /* */ rather than //, that gives much more compatibility, that
is illegal for c89.
- Add a new line at the end of file, that prevents something like "\
No newline at end of file" in the diff.

> --- /dev/null
> +++ b/gcc/config/riscv/riscv_crypto_scalar.h
> @@ -0,0 +1,247 @@
> +//     riscv_crypto_scalar.h
> +//     2021-11-08      Markku-Juhani O. Saarinen <mjos@pqshield.com>
> +//     Copyright (c) 2021, PQShield Ltd. All rights reserved.
> +
> +//     === Scalar crypto: General mapping from intrinsics to compiler builtins,
> +//             inline assembler, or to an (insecure) porting / emulation layer.
> +
> +/*
> + *     _rv_*(...)
> + *       RV32/64 intrinsics that return the "long" data type
> + *
> + *     _rv32_*(...)
> + *       RV32/64 intrinsics that return the "int32_t" data type
> + *
> + *     _rv64_*(...)
> + *       RV64-only intrinsics that return the "int64_t" data type
> + *
> + */
> +
> +#ifndef _RISCV_CRYPTO_SCALAR_H
> +#define _RISCV_CRYPTO_SCALAR_H
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#if !defined(__riscv_xlen) && !defined(RVKINTRIN_EMULATE)
> +#warning "Target is not RISC-V. Enabling insecure emulation."
> +#define RVKINTRIN_EMULATE 1
> +#endif
> +
> +#if defined(RVKINTRIN_EMULATE)
> +
> +//     intrinsics via emulation (insecure -- porting / debug option)
> +#include "rvk_emu_intrin.h"
> +#define _RVK_INTRIN_IMPL(s) _rvk_emu_##s
> +
> +#elif defined(RVKINTRIN_ASSEMBLER)
> +
> +//     intrinsics via inline assembler (builtins not available)
> +#include "rvk_asm_intrin.h"
> +#define _RVK_INTRIN_IMPL(s) _rvk_asm_##s
> +#else
> +
> +//     intrinsics via compiler builtins
> +#include <stdint.h>
> +#define _RVK_INTRIN_IMPL(s) __builtin_riscv_##s
> +
> +#endif

Drop rvk_emu_intrin.h and rvk_asm_intrin.h here.

> +
> +//     set type if not already set
> +#if !defined(RVKINTRIN_RV32) && !defined(RVKINTRIN_RV64)
...
> +static inline long _rv_sm3p0(long rs1)
> +       { return _RVK_INTRIN_IMPL(sm3p0)(rs1); }                                //      SM3P0
> +
> +static inline long _rv_sm3p1(long rs1)
> +       { return _RVK_INTRIN_IMPL(sm3p1)(rs1); }                                //      SM3P1
> +
> +#ifdef __cplusplus
> +}
> +#endif
> +

#undef _RVK_INTRIN_IMPL before end of this header to prevent
introducing unexpected symbols.


> +#endif //      _RISCV_CRYPTO_SCALAR_H
> \ No newline at end of file
  

Patch

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 2cc5aeec9e4..caf673f1cb0 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -510,6 +510,7 @@  pru-*-*)
 riscv*)
 	cpu_type=riscv
 	extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o"
+	extra_headers="riscv_crypto.h riscv_crypto_scalar.h rvk_asm_intrin.h rvk_emu_intrin.h"
 	d_target_objs="riscv-d.o"
 	;;
 rs6000*-*-*)
diff --git a/gcc/config/riscv/riscv_crypto.h b/gcc/config/riscv/riscv_crypto.h
new file mode 100644
index 00000000000..d06c777b7af
--- /dev/null
+++ b/gcc/config/riscv/riscv_crypto.h
@@ -0,0 +1,12 @@ 
+//	riscv_crypto.h
+//	2022-02-12	Markku-Juhani O. Saarinen <mjos@pqshield.com>
+//	Copyright (c) 2022, PQShield Ltd. All rights reserved.
+
+//	=== Master crypto intrinsics header. Currently Just includes scalar crypto.
+
+#ifndef _RISCV_CRYPTO_H
+#define _RISCV_CRYPTO_H
+
+#include "riscv_crypto_scalar.h"
+
+#endif	//	_RISCV_CRYPTO_H
\ No newline at end of file
diff --git a/gcc/config/riscv/riscv_crypto_scalar.h b/gcc/config/riscv/riscv_crypto_scalar.h
new file mode 100644
index 00000000000..0ed627856fd
--- /dev/null
+++ b/gcc/config/riscv/riscv_crypto_scalar.h
@@ -0,0 +1,247 @@ 
+//	riscv_crypto_scalar.h
+//	2021-11-08	Markku-Juhani O. Saarinen <mjos@pqshield.com>
+//	Copyright (c) 2021, PQShield Ltd. All rights reserved.
+
+//	=== Scalar crypto: General mapping from intrinsics to compiler builtins,
+//		inline assembler, or to an (insecure) porting / emulation layer.
+
+/*
+ *	_rv_*(...)
+ *	  RV32/64 intrinsics that return the "long" data type
+ *
+ *	_rv32_*(...)
+ *	  RV32/64 intrinsics that return the "int32_t" data type
+ *
+ *	_rv64_*(...)
+ *	  RV64-only intrinsics that return the "int64_t" data type
+ *
+ */
+
+#ifndef _RISCV_CRYPTO_SCALAR_H
+#define _RISCV_CRYPTO_SCALAR_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !defined(__riscv_xlen) && !defined(RVKINTRIN_EMULATE)
+#warning "Target is not RISC-V. Enabling insecure emulation."
+#define RVKINTRIN_EMULATE 1
+#endif
+
+#if defined(RVKINTRIN_EMULATE)
+
+//	intrinsics via emulation (insecure -- porting / debug option)
+#include "rvk_emu_intrin.h"
+#define _RVK_INTRIN_IMPL(s) _rvk_emu_##s
+
+#elif defined(RVKINTRIN_ASSEMBLER)
+
+//	intrinsics via inline assembler (builtins not available)
+#include "rvk_asm_intrin.h"
+#define _RVK_INTRIN_IMPL(s) _rvk_asm_##s
+#else
+
+//	intrinsics via compiler builtins
+#include <stdint.h>
+#define _RVK_INTRIN_IMPL(s) __builtin_riscv_##s
+
+#endif
+
+//	set type if not already set
+#if !defined(RVKINTRIN_RV32) && !defined(RVKINTRIN_RV64)
+#if __riscv_xlen == 32
+#define RVKINTRIN_RV32
+#elif __riscv_xlen == 64
+#define RVKINTRIN_RV64
+#else
+#error "__riscv_xlen not valid."
+#endif
+#endif
+
+//	Mappings to implementation
+
+//	=== (mapping)	Zbkb:	Bitmanipulation instructions for Cryptography
+
+static inline int32_t _rv32_ror(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(ror_32)(rs1, rs2); }			//	ROR[W] ROR[W]I
+
+static inline int32_t _rv32_rol(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(rol_32)(rs1, rs2); }			//	ROL[W] ROR[W]I
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rv64_ror(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(ror_64)(rs1, rs2); }			//	ROR or RORI
+
+static inline int64_t _rv64_rol(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(rol_64)(rs1, rs2); }			//	ROL or RORI
+#endif
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rv32_brev8(int32_t rs1)
+	{ return _RVK_INTRIN_IMPL(brev8_32)(rs1); }				//	BREV8 (GREVI)
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rv64_brev8(int64_t rs1)
+	{ return _RVK_INTRIN_IMPL(brev8_64)(rs1); }				//	BREV8 (GREVI)
+#endif
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rv32_zip(int32_t rs1)
+	{ return _RVK_INTRIN_IMPL(zip_32)(rs1); }				//	ZIP (SHFLI)
+
+static inline int32_t _rv32_unzip(int32_t rs1)
+	{ return _RVK_INTRIN_IMPL(unzip_32)(rs1); }				//	UNZIP (UNSHFLI)
+#endif
+
+//	=== (mapping)	Zbkc:	Carry-less multiply instructions
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rv32_clmul(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(clmul_32)(rs1, rs2); }		//	CLMUL
+
+static inline int32_t _rv32_clmulh(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(clmulh_32)(rs1, rs2); }		//	CLMULH
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rv64_clmul(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(clmul_64)(rs1, rs2); }		//	CLMUL
+
+static inline int64_t _rv64_clmulh(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(clmulh_64)(rs1, rs2); }		//	CLMULH
+#endif
+
+//	=== (mapping)	Zbkx:	Crossbar permutation instructions
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rv32_xperm8(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(xperm8_32)(rs1, rs2); }		//	XPERM8
+
+static inline int32_t _rv32_xperm4(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(xperm4_32)(rs1, rs2); }		//	XPERM4
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rv64_xperm8(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(xperm8_64)(rs1, rs2); }		//	XPERM8
+
+static inline int64_t _rv64_xperm4(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(xperm4_64)(rs1, rs2); }		//	XPERM4
+#endif
+
+//	=== (mapping)	Zknd:	NIST Suite: AES Decryption
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rv32_aes32dsi(int32_t rs1, int32_t rs2, int bs)
+	{ return _RVK_INTRIN_IMPL(aes32dsi)(rs1, rs2, bs); }	//	AES32DSI
+
+static inline int32_t _rv32_aes32dsmi(int32_t rs1, int32_t rs2, int bs)
+	{ return _RVK_INTRIN_IMPL(aes32dsmi)(rs1, rs2, bs); }	//	AES32DSMI
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rv64_aes64ds(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(aes64ds)(rs1, rs2); }			//	AES64DS
+
+static inline int64_t _rv64_aes64dsm(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(aes64dsm)(rs1, rs2); }		//	AES64DSM
+
+static inline int64_t _rv64_aes64im(int64_t rs1)
+	{ return _RVK_INTRIN_IMPL(aes64im)(rs1); }				//	AES64IM
+
+static inline int64_t _rv64_aes64ks1i(int64_t rs1, int rnum)
+	{ return _RVK_INTRIN_IMPL(aes64ks1i)(rs1, rnum); }		//	AES64KS1I
+
+static inline int64_t _rv64_aes64ks2(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(aes64ks2)(rs1, rs2); }		//	AES64KS2
+#endif
+
+//	=== (mapping)	Zkne:	NIST Suite: AES Encryption
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rv32_aes32esi(int32_t rs1, int32_t rs2, int bs)
+	{ return _RVK_INTRIN_IMPL(aes32esi)(rs1, rs2, bs); }	//	AES32ESI
+
+static inline int32_t _rv32_aes32esmi(int32_t rs1, int32_t rs2, int bs)
+	{ return _RVK_INTRIN_IMPL(aes32esmi)(rs1, rs2, bs); }	//	AES32ESMI
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rv64_aes64es(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(aes64es)(rs1, rs2); }			//	AES64ES
+
+static inline int64_t _rv64_aes64esm(int64_t rs1, int64_t rs2)
+	{ return _RVK_INTRIN_IMPL(aes64esm)(rs1, rs2); }		//	AES64ESM
+#endif
+
+//	=== (mapping)	Zknh:	NIST Suite: Hash Function Instructions
+
+static inline long _rv_sha256sig0(long rs1)
+	{ return _RVK_INTRIN_IMPL(sha256sig0)(rs1); }			//	SHA256SIG0
+
+static inline long _rv_sha256sig1(long rs1)
+	{ return _RVK_INTRIN_IMPL(sha256sig1)(rs1); }			//	SHA256SIG1
+
+static inline long _rv_sha256sum0(long rs1)
+	{ return _RVK_INTRIN_IMPL(sha256sum0)(rs1); }			//	SHA256SUM0
+
+static inline long _rv_sha256sum1(long rs1)
+	{ return _RVK_INTRIN_IMPL(sha256sum1)(rs1); }			//	SHA256SUM1
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rv32_sha512sig0h(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(sha512sig0h)(rs1, rs2); }		//	SHA512SIG0H
+
+static inline int32_t _rv32_sha512sig0l(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(sha512sig0l)(rs1, rs2); }		//	SHA512SIG0L
+
+static inline int32_t _rv32_sha512sig1h(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(sha512sig1h)(rs1, rs2); }		//	SHA512SIG1H
+
+static inline int32_t _rv32_sha512sig1l(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(sha512sig1l)(rs1, rs2); }		//	SHA512SIG1L
+
+static inline int32_t _rv32_sha512sum0r(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(sha512sum0r)(rs1, rs2); }		//	SHA512SUM0R
+
+static inline int32_t _rv32_sha512sum1r(int32_t rs1, int32_t rs2)
+	{ return _RVK_INTRIN_IMPL(sha512sum1r)(rs1, rs2); }		//	SHA512SUM1R
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rv64_sha512sig0(int64_t rs1)
+	{ return _RVK_INTRIN_IMPL(sha512sig0)(rs1); }			//	SHA512SIG0
+
+static inline int64_t _rv64_sha512sig1(int64_t rs1)
+	{ return _RVK_INTRIN_IMPL(sha512sig1)(rs1); }			//	SHA512SIG1
+
+static inline int64_t _rv64_sha512sum0(int64_t rs1)
+	{ return _RVK_INTRIN_IMPL(sha512sum0)(rs1); }			//	SHA512SUM0
+
+static inline int64_t _rv64_sha512sum1(int64_t rs1)
+	{ return _RVK_INTRIN_IMPL(sha512sum1)(rs1); }			//	SHA512SUM1
+#endif
+
+//	=== (mapping)	Zksed:	ShangMi Suite: SM4 Block Cipher Instructions
+
+static inline long _rv_sm4ks(int32_t rs1, int32_t rs2, int bs)
+	{ return _RVK_INTRIN_IMPL(sm4ks)(rs1, rs2, bs); }		//	SM4KS
+
+static inline long _rv_sm4ed(int32_t rs1, int32_t rs2, int bs)
+	{ return _RVK_INTRIN_IMPL(sm4ed)(rs1, rs2, bs); }		//	SM4ED
+
+//	=== (mapping)	Zksh:	ShangMi Suite: SM3 Hash Function Instructions
+
+static inline long _rv_sm3p0(long rs1)
+	{ return _RVK_INTRIN_IMPL(sm3p0)(rs1); }				//	SM3P0
+
+static inline long _rv_sm3p1(long rs1)
+	{ return _RVK_INTRIN_IMPL(sm3p1)(rs1); }				//	SM3P1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	//	_RISCV_CRYPTO_SCALAR_H
\ No newline at end of file
diff --git a/gcc/config/riscv/rvk_asm_intrin.h b/gcc/config/riscv/rvk_asm_intrin.h
new file mode 100644
index 00000000000..a9a088d1fd6
--- /dev/null
+++ b/gcc/config/riscv/rvk_asm_intrin.h
@@ -0,0 +1,187 @@ 
+//	rvk_asm_intrin.h
+//	2021-11-08	Markku-Juhani O. Saarinen <mjos@pqshield.com>
+//	Copyright (c) 2021, PQShield Ltd. All rights reserved.
+
+//	=== Inline assembler definitions for scalar cryptography intrinsics.
+
+#ifndef _RVK_ASM_INTRIN_H
+#define _RVK_ASM_INTRIN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+#if __riscv_xlen == 32
+#define RVKINTRIN_RV32
+#elif __riscv_xlen == 64
+#define RVKINTRIN_RV64
+#else
+#error "__riscv_xlen not valid."
+#endif
+
+//	=== (inline)	Zbkb:	Bitmanipulation instructions for Cryptography
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rvk_asm_ror_32(int32_t rs1, int32_t rs2)
+	{ int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("rori	 %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 &  rs2)); else __asm__ ("ror	 %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int32_t _rvk_asm_rol_32(int32_t rs1, int32_t rs2)
+	{ int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("rori	 %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & -rs2)); else __asm__ ("rol	 %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int32_t _rvk_asm_ror_32(int32_t rs1, int32_t rs2)
+	{ int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("roriw  %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 &  rs2)); else __asm__ ("rorw  %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int32_t _rvk_asm_rol_32(int32_t rs1, int32_t rs2)
+	{ int32_t rd; if (__builtin_constant_p(rs2)) __asm__ ("roriw  %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(31 & -rs2)); else __asm__ ("rolw  %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int64_t _rvk_asm_ror_64(int64_t rs1, int64_t rs2)
+	{ int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("rori	 %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 &  rs2)); else __asm__ ("ror	 %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int64_t _rvk_asm_rol_64(int64_t rs1, int64_t rs2)
+	{ int64_t rd; if (__builtin_constant_p(rs2)) __asm__ ("rori	 %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(63 & -rs2)); else __asm__ ("rol	 %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+#endif
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rvk_asm_brev8_32(int32_t rs1)
+	{ int32_t rd; __asm__ ("grevi  %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(7)); return rd; }
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rvk_asm_brev8_64(int64_t rs1)
+	{ int64_t rd; __asm__ ("grevi  %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(7)); return rd; }
+#endif
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rvk_asm_zip_32(int32_t rs1)
+	{ int32_t rd; __asm__ ("shfli  %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(15)); return rd; }
+static inline int32_t _rvk_asm_unzip_32(int32_t rs1)
+	{ int32_t rd; __asm__ ("unshfli	 %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(15)); return rd; }
+#endif
+
+//	=== (inline)	Zbkc:	Carry-less multiply instructions
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rvk_asm_clmul_32(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("clmul  %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int32_t _rvk_asm_clmulh_32(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("clmulh	%0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rvk_asm_clmul_64(int64_t rs1, int64_t rs2)
+	{ int64_t rd; __asm__ ("clmul  %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int64_t _rvk_asm_clmulh_64(int64_t rs1, int64_t rs2)
+	{ int64_t rd; __asm__ ("clmulh	%0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+#endif
+
+//	=== (inline)	Zbkx:	Crossbar permutation instructions
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rvk_asm_xperm8_32(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("xperm8	%0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int32_t _rvk_asm_xperm4_32(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("xperm4	%0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rvk_asm_xperm8_64(int64_t rs1, int64_t rs2)
+	{ int64_t rd; __asm__ ("xperm8	%0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int64_t _rvk_asm_xperm4_64(int64_t rs1, int64_t rs2)
+	{ int64_t rd; __asm__ ("xperm4	%0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+#endif
+
+//	=== (inline)	Zknd:	NIST Suite: AES Decryption
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rvk_asm_aes32dsi(int32_t rs1, int32_t rs2, int bs)
+	{ int32_t rd; __asm__("aes32dsi	 %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(bs)); return rd; }
+static inline int32_t _rvk_asm_aes32dsmi(int32_t rs1, int32_t rs2, int bs)
+	{ int32_t rd; __asm__("aes32dsmi  %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(bs)); return rd; }
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rvk_asm_aes64ds(int64_t rs1, int64_t rs2)
+	{ int64_t rd; __asm__("aes64ds	%0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int64_t _rvk_asm_aes64dsm(int64_t rs1, int64_t rs2)
+	{ int64_t rd; __asm__("aes64dsm %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int64_t _rvk_asm_aes64im(int64_t rs1)
+	{ int64_t rd; __asm__("aes64im	%0, %1	 " : "=r"(rd) : "r"(rs1)); return rd; }
+static inline int64_t _rvk_asm_aes64ks1i(int64_t rs1, int rnum)
+	{ int64_t rd; __asm__("aes64ks1i %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rnum)); return rd; }
+static inline int64_t _rvk_asm_aes64ks2(int64_t rs1, int64_t rs2)
+	{ int64_t rd; __asm__("aes64ks2 %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+#endif
+
+//	=== (inline)	Zkne:	NIST Suite: AES Encryption
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rvk_asm_aes32esi(int32_t rs1, int32_t rs2, int bs)
+	{ int32_t rd; __asm__("aes32esi	 %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(bs)); return rd; }
+static inline int32_t _rvk_asm_aes32esmi(int32_t rs1, int32_t rs2, int bs)
+	{ int32_t rd; __asm__("aes32esmi %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(bs)); return rd; }
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rvk_asm_aes64es(int64_t rs1, int64_t rs2)
+	{ int64_t rd; __asm__("aes64es	%0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int64_t _rvk_asm_aes64esm(int64_t rs1, int64_t rs2)
+	{ int64_t rd; __asm__("aes64esm %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+#endif
+
+//	=== (inline)	Zknh:	NIST Suite: Hash Function Instructions
+
+static inline long _rvk_asm_sha256sig0(long rs1)
+	{ long rd; __asm__ ("sha256sig0 %0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+static inline long _rvk_asm_sha256sig1(long rs1)
+	{ long rd; __asm__ ("sha256sig1 %0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+static inline long _rvk_asm_sha256sum0(long rs1)
+	{ long rd; __asm__ ("sha256sum0 %0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+static inline long _rvk_asm_sha256sum1(long rs1)
+	{ long rd; __asm__ ("sha256sum1 %0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+
+#ifdef RVKINTRIN_RV32
+static inline int32_t _rvk_asm_sha512sig0h(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("sha512sig0h %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int32_t _rvk_asm_sha512sig0l(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("sha512sig0l %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int32_t _rvk_asm_sha512sig1h(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("sha512sig1h %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int32_t _rvk_asm_sha512sig1l(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("sha512sig1l %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int32_t _rvk_asm_sha512sum0r(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("sha512sum0r %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+static inline int32_t _rvk_asm_sha512sum1r(int32_t rs1, int32_t rs2)
+	{ int32_t rd; __asm__ ("sha512sum1r %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); return rd; }
+#endif
+
+#ifdef RVKINTRIN_RV64
+static inline int64_t _rvk_asm_sha512sig0(int64_t rs1)
+	{ int64_t rd; __asm__ ("sha512sig0	%0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+static inline int64_t _rvk_asm_sha512sig1(int64_t rs1)
+	{ int64_t rd; __asm__ ("sha512sig1	%0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+static inline int64_t _rvk_asm_sha512sum0(int64_t rs1)
+	{ int64_t rd; __asm__ ("sha512sum0	%0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+static inline int64_t _rvk_asm_sha512sum1(int64_t rs1)
+	{ int64_t rd; __asm__ ("sha512sum1	%0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+#endif
+
+//	=== (inline)	Zksed:	ShangMi Suite: SM4 Block Cipher Instructions
+
+static inline long _rvk_asm_sm4ks(int32_t rs1, int32_t rs2, int bs)
+	{ long rd; __asm__("sm4ks %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(bs)); return rd; }
+static inline long _rvk_asm_sm4ed(int32_t rs1, int32_t rs2, int bs)
+	{ long rd; __asm__("sm4ed %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(bs)); return rd; }
+
+//	=== (inline)	Zksh:	ShangMi Suite: SM3 Hash Function Instructions
+
+static inline long _rvk_asm_sm3p0(long rs1)
+	{ long rd; __asm__("sm3p0  %0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+static inline long _rvk_asm_sm3p1(long rs1)
+	{ long rd; __asm__("sm3p1  %0, %1" : "=r"(rd) : "r"(rs1)); return rd; }
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	//	_RVK_ASM_INTRIN_H
\ No newline at end of file
diff --git a/gcc/config/riscv/rvk_emu_intrin.h b/gcc/config/riscv/rvk_emu_intrin.h
new file mode 100644
index 00000000000..9b6e874696a
--- /dev/null
+++ b/gcc/config/riscv/rvk_emu_intrin.h
@@ -0,0 +1,594 @@ 
+//	rvk_emu_intrin.h
+//	2021-02-13	Markku-Juhani O. Saarinen <mjos@pqshield.com>
+//	Copyright (c) 2021, PQShield Ltd. All rights reserved.
+
+//	=== Platform-independent emulation for scalar cryptography intrinsics.
+//	Requires tables in rvk_emu_intrin.c (prefix _rvk_emu)
+
+#ifndef _RVK_EMU_INTRIN_H
+#define _RVK_EMU_INTRIN_H
+
+#ifdef RVKINTRIN_EMULATE
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <limits.h>
+#include <stdint.h>
+
+//	=== RVKINTRIN_EMULATE ==============================================
+
+#if UINT_MAX != 0xffffffffU
+#  error "<rvk_emu_intrin.h> supports systems with sizeof(int) = 4."
+#endif
+
+#if (ULLONG_MAX == 0xffffffffLLU) || (ULLONG_MAX != 0xffffffffffffffffLLU)
+#  error "<rvk_emu_intrin.h> supports systems with sizeof(long long) = 8."
+#endif
+
+#if !defined(RVKINTRIN_RV32) && !defined(RVKINTRIN_RV64)
+#if UINT_MAX == ULONG_MAX
+#  define RVKINTRIN_RV32
+#else
+#  define RVKINTRIN_RV64
+#endif
+#endif
+
+//	=== (emulated)	Zbkb:	Bitmanipulation instructions for Cryptography
+
+//	shift helpers (that mask/limit the amount of shift)
+
+static inline int32_t _rvk_emu_sll_32(int32_t rs1, int32_t rs2)
+	{ return rs1 << (rs2 & 31); }
+static inline int32_t _rvk_emu_srl_32(int32_t rs1, int32_t rs2)
+	{ return (uint32_t)rs1 >> (rs2 & 31); }
+static inline int64_t _rvk_emu_sll_64(int64_t rs1, int64_t rs2)
+	{ return rs1 << (rs2 & 63); }
+static inline int64_t _rvk_emu_srl_64(int64_t rs1, int64_t rs2)
+	{ return (uint64_t)rs1 >> (rs2 & 63); }
+
+//	rotate (a part of the extension). no separate intrinsic for rori
+
+static inline int32_t _rvk_emu_rol_32(int32_t rs1, int32_t rs2)
+	{ return _rvk_emu_sll_32(rs1, rs2) | _rvk_emu_srl_32(rs1, -rs2); }
+static inline int32_t _rvk_emu_ror_32(int32_t rs1, int32_t rs2)
+	{ return _rvk_emu_srl_32(rs1, rs2) | _rvk_emu_sll_32(rs1, -rs2); }
+
+static inline int64_t _rvk_emu_rol_64(int64_t rs1, int64_t rs2)
+	{ return _rvk_emu_sll_64(rs1, rs2) | _rvk_emu_srl_64(rs1, -rs2); }
+static inline int64_t _rvk_emu_ror_64(int64_t rs1, int64_t rs2)
+	{ return _rvk_emu_srl_64(rs1, rs2) | _rvk_emu_sll_64(rs1, -rs2); }
+
+//	brev8, rev8
+
+static inline int32_t _rvk_emu_grev_32(int32_t rs1, int32_t rs2)
+{
+	uint32_t x = rs1;
+	int shamt = rs2 & 31;
+	if (shamt &	 1) x = ((x & 0x55555555) <<  1) | ((x & 0xAAAAAAAA) >>	 1);
+	if (shamt &	 2) x = ((x & 0x33333333) <<  2) | ((x & 0xCCCCCCCC) >>	 2);
+	if (shamt &	 4) x = ((x & 0x0F0F0F0F) <<  4) | ((x & 0xF0F0F0F0) >>	 4);
+	if (shamt &	 8) x = ((x & 0x00FF00FF) <<  8) | ((x & 0xFF00FF00) >>	 8);
+	if (shamt & 16) x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16);
+	return x;
+}
+
+static inline int64_t _rvk_emu_grev_64(int64_t rs1, int64_t rs2)
+{
+	uint64_t x = rs1;
+	int shamt = rs2 & 63;
+	if (shamt &	 1)
+		x = ((x & 0x5555555555555555LL) <<	1) |
+			((x & 0xAAAAAAAAAAAAAAAALL) >>	1);
+	if (shamt &	 2)
+		x = ((x & 0x3333333333333333LL) <<	2) |
+			((x & 0xCCCCCCCCCCCCCCCCLL) >>	2);
+	if (shamt &	 4)
+		x = ((x & 0x0F0F0F0F0F0F0F0FLL) <<	4) |
+			((x & 0xF0F0F0F0F0F0F0F0LL) >>	4);
+	if (shamt &	 8)
+		x = ((x & 0x00FF00FF00FF00FFLL) <<	8) |
+			((x & 0xFF00FF00FF00FF00LL) >>	8);
+	if (shamt & 16)
+		x = ((x & 0x0000FFFF0000FFFFLL) << 16) |
+			((x & 0xFFFF0000FFFF0000LL) >> 16);
+	if (shamt & 32)
+		x = ((x & 0x00000000FFFFFFFFLL) << 32) |
+			((x & 0xFFFFFFFF00000000LL) >> 32);
+	return x;
+}
+
+static inline int32_t _rvk_emu_brev8_32(int32_t rs1)
+	{ return _rvk_emu_grev_32(rs1, 7); }
+
+static inline int64_t _rvk_emu_brev8_64(int64_t rs1)
+	{ return _rvk_emu_grev_64(rs1, 7); }
+
+//	shuffle (zip and unzip, RV32 only)
+
+static inline uint32_t _rvk_emu_shuffle32_stage(uint32_t src, uint32_t maskL, uint32_t maskR, int N)
+{
+	uint32_t x = src & ~(maskL | maskR);
+	x |= ((src <<  N) & maskL) | ((src >>  N) & maskR);
+	return x;
+}
+static inline int32_t _rvk_emu_shfl_32(int32_t rs1, int32_t rs2)
+{
+	uint32_t x = rs1;
+	int shamt = rs2 & 15;
+
+	if (shamt & 8) x = _rvk_emu_shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
+	if (shamt & 4) x = _rvk_emu_shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
+	if (shamt & 2) x = _rvk_emu_shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2);
+	if (shamt & 1) x = _rvk_emu_shuffle32_stage(x, 0x44444444, 0x22222222, 1);
+
+	return x;
+}
+
+static inline int32_t _rvk_emu_unshfl_32(int32_t rs1, int32_t rs2)
+{
+	uint32_t x = rs1;
+	int shamt = rs2 & 15;
+
+	if (shamt & 1) x = _rvk_emu_shuffle32_stage(x, 0x44444444, 0x22222222, 1);
+	if (shamt & 2) x = _rvk_emu_shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2);
+	if (shamt & 4) x = _rvk_emu_shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4);
+	if (shamt & 8) x = _rvk_emu_shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8);
+
+	return x;
+}
+
+static inline int32_t _rvk_emu_zip_32(int32_t rs1)
+	{ return _rvk_emu_shfl_32(rs1, 15); }
+static inline int32_t _rvk_emu_unzip_32(int32_t rs1)
+	{ return _rvk_emu_unshfl_32(rs1, 15); }
+
+//	=== (emulated)	Zbkc: Carry-less multiply instructions
+
+static inline int32_t _rvk_emu_clmul_32(int32_t rs1, int32_t rs2)
+{
+	uint32_t a = rs1, b = rs2, x = 0;
+	for (int i = 0; i < 32; i++) {
+		if ((b >> i) & 1)
+			x ^= a << i;
+	}
+	return x;
+}
+
+static inline int32_t _rvk_emu_clmulh_32(int32_t rs1, int32_t rs2)
+{
+	uint32_t a = rs1, b = rs2, x = 0;
+	for (int i = 1; i < 32; i++) {
+		if ((b >> i) & 1)
+			x ^= a >> (32-i);
+	}
+	return x;
+}
+
+static inline int64_t _rvk_emu_clmul_64(int64_t rs1, int64_t rs2)
+{
+	uint64_t a = rs1, b = rs2, x = 0;
+
+	for (int i = 0; i < 64; i++) {
+		if ((b >> i) & 1)
+			x ^= a << i;
+	}
+	return x;
+}
+
+static inline int64_t _rvk_emu_clmulh_64(int64_t rs1, int64_t rs2)
+{
+	uint64_t a = rs1, b = rs2, x = 0;
+
+	for (int i = 1; i < 64; i++) {
+		if ((b >> i) & 1)
+			x ^= a >> (64-i);
+	}
+	return x;
+}
+
+//	=== (emulated)	Zbkx: Crossbar permutation instructions
+
+static inline uint32_t _rvk_emu_xperm32(uint32_t rs1, uint32_t rs2, int sz_log2)
+{
+	uint32_t r = 0;
+	uint32_t sz = 1LL << sz_log2;
+	uint32_t mask = (1LL << sz) - 1;
+	for (int i = 0; i < 32; i += sz) {
+		uint32_t pos = ((rs2 >> i) & mask) << sz_log2;
+		if (pos < 32)
+			r |= ((rs1 >> pos) & mask) << i;
+	}
+	return r;
+}
+
+static inline int32_t _rvk_emu_xperm4_32(int32_t rs1, int32_t rs2)
+	{ return _rvk_emu_xperm32(rs1, rs2, 2); }
+
+static inline int32_t _rvk_emu_xperm8_32(int32_t rs1, int32_t rs2)
+	{ return _rvk_emu_xperm32(rs1, rs2, 3); }
+
+static inline uint64_t _rvk_emu_xperm64(uint64_t rs1, uint64_t rs2, int sz_log2)
+{
+	uint64_t r = 0;
+	uint64_t sz = 1LL << sz_log2;
+	uint64_t mask = (1LL << sz) - 1;
+	for (int i = 0; i < 64; i += sz) {
+		uint64_t pos = ((rs2 >> i) & mask) << sz_log2;
+		if (pos < 64)
+			r |= ((rs1 >> pos) & mask) << i;
+	}
+	return r;
+}
+
+static inline int64_t _rvk_emu_xperm4_64(int64_t rs1, int64_t rs2)
+	{ return _rvk_emu_xperm64(rs1, rs2, 2); }
+
+static inline int64_t _rvk_emu_xperm8_64(int64_t rs1, int64_t rs2)
+	{ return _rvk_emu_xperm64(rs1, rs2, 3); }
+
+/*
+ *	_rvk_emu_*(...)
+ *	  Some INTERNAL tables (rvk_emu.c) and functions.
+ */
+
+extern const uint8_t _rvk_emu_aes_fwd_sbox[256];	//	AES Forward S-Box
+extern const uint8_t _rvk_emu_aes_inv_sbox[256];	//	AES Inverse S-Box
+extern const uint8_t _rvk_emu_sm4_sbox[256];		//	SM4 S-Box
+
+//	rvk_emu internal: multiply by 0x02 in AES's GF(256) - LFSR style.
+
+static inline uint8_t _rvk_emu_aes_xtime(uint8_t x)
+{
+	return (x << 1) ^ ((x & 0x80) ? 0x11B : 0x00);
+}
+
+//	rvk_emu internal: AES forward MixColumns 8->32 bits
+
+static inline uint32_t _rvk_emu_aes_fwd_mc_8(uint32_t x)
+{
+	uint32_t x2;
+
+	x2 = _rvk_emu_aes_xtime(x);				//	double x
+	x = ((x ^ x2) << 24) |					//	0x03	MixCol MDS Matrix
+		(x << 16) |							//	0x01
+		(x << 8) |							//	0x01
+		x2;									//	0x02
+
+	return x;
+}
+
+//	rvk_emu internal: AES forward MixColumns 32->32 bits
+
+static inline uint32_t _rvk_emu_aes_fwd_mc_32(uint32_t x)
+{
+	return	_rvk_emu_aes_fwd_mc_8(x & 0xFF) ^
+		_rvk_emu_rol_32(_rvk_emu_aes_fwd_mc_8((x >>	 8) & 0xFF),	8) ^
+		_rvk_emu_rol_32(_rvk_emu_aes_fwd_mc_8((x >> 16) & 0xFF), 16) ^
+		_rvk_emu_rol_32(_rvk_emu_aes_fwd_mc_8((x >> 24) & 0xFF), 24);
+}
+
+//	rvk_emu internal: AES inverse MixColumns 8->32 bits
+
+static inline uint32_t _rvk_emu_aes_inv_mc_8(uint32_t x)
+{
+	uint32_t x2, x4, x8;
+
+	x2 = _rvk_emu_aes_xtime(x);				//	double x
+	x4 = _rvk_emu_aes_xtime(x2);			//	double to 4*x
+	x8 = _rvk_emu_aes_xtime(x4);			//	double to 8*x
+
+	x = ((x ^ x2 ^ x8) << 24) |				//	0x0B	Inv MixCol MDS Matrix
+		((x ^ x4 ^ x8) << 16) |				//	0x0D
+		((x ^ x8) << 8) |					//	0x09
+		(x2 ^ x4 ^ x8);						//	0x0E
+
+	return x;
+}
+
+//	rvk_emu internal: AES inverse MixColumns 32->32 bits
+
+static inline uint32_t _rvk_emu_aes_inv_mc_32(uint32_t x)
+{
+	return	_rvk_emu_aes_inv_mc_8(x & 0xFF) ^
+		_rvk_emu_rol_32(_rvk_emu_aes_inv_mc_8((x >>	 8) & 0xFF),	8) ^
+		_rvk_emu_rol_32(_rvk_emu_aes_inv_mc_8((x >> 16) & 0xFF), 16) ^
+		_rvk_emu_rol_32(_rvk_emu_aes_inv_mc_8((x >> 24) & 0xFF), 24);
+}
+
+//	=== (emulated)	Zknd:	NIST Suite: AES Decryption
+
+static inline int32_t _rvk_emu_aes32dsi(int32_t rs1, int32_t rs2, uint8_t bs)
+{
+	int32_t x;
+
+	bs = (bs & 3) << 3;						//	byte select
+	x = (rs2 >> bs) & 0xFF;
+	x = _rvk_emu_aes_inv_sbox[x];			//	AES inverse s-box
+
+	return rs1 ^ _rvk_emu_rol_32(x, bs);
+}
+
+static inline int32_t _rvk_emu_aes32dsmi(int32_t rs1, int32_t rs2, uint8_t bs)
+{
+	int32_t x;
+
+	bs = (bs & 3) << 3;						//	byte select
+	x = (rs2 >> bs) & 0xFF;
+	x = _rvk_emu_aes_inv_sbox[x];			//	AES inverse s-box
+	x = _rvk_emu_aes_inv_mc_8(x);			//	inverse MixColumns
+
+	return rs1 ^ _rvk_emu_rol_32(x, bs);
+}
+
+static inline int64_t _rvk_emu_aes64ds(int64_t rs1, int64_t rs2)
+{
+	//	Half of inverse ShiftRows and SubBytes (last round)
+	return ((int64_t) _rvk_emu_aes_inv_sbox[rs1 & 0xFF]) |
+		(((int64_t) _rvk_emu_aes_inv_sbox[(rs2 >> 40) & 0xFF]) <<  8) |
+		(((int64_t) _rvk_emu_aes_inv_sbox[(rs2 >> 16) & 0xFF]) << 16) |
+		(((int64_t) _rvk_emu_aes_inv_sbox[(rs1 >> 56) & 0xFF]) << 24) |
+		(((int64_t) _rvk_emu_aes_inv_sbox[(rs1 >> 32) & 0xFF]) << 32) |
+		(((int64_t) _rvk_emu_aes_inv_sbox[(rs1 >>  8) & 0xFF]) << 40) |
+		(((int64_t) _rvk_emu_aes_inv_sbox[(rs2 >> 48) & 0xFF]) << 48) |
+		(((int64_t) _rvk_emu_aes_inv_sbox[(rs2 >> 24) & 0xFF]) << 56);
+}
+
+static inline int64_t _rvk_emu_aes64im(int64_t rs1)
+{
+	return ((int64_t) _rvk_emu_aes_inv_mc_32(rs1)) |
+		(((int64_t) _rvk_emu_aes_inv_mc_32(rs1 >> 32)) << 32);
+}
+
+static inline int64_t _rvk_emu_aes64dsm(int64_t rs1, int64_t rs2)
+{
+	int64_t x;
+
+	x = _rvk_emu_aes64ds(rs1, rs2);			//	Inverse ShiftRows, SubBytes
+	x = _rvk_emu_aes64im(x);					//	Inverse MixColumns
+	return x;
+}
+
+static inline int64_t _rvk_emu_aes64ks1i(int64_t rs1, int rnum)
+{
+	//	AES Round Constants
+	const uint8_t aes_rcon[] = {
+		0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
+	};
+
+	uint32_t t, rc;
+
+	t = rs1 >> 32;							//	high word
+	rc = 0;
+
+	if (rnum < 10) {						//	10: don't do it
+		t = _rvk_emu_ror_32(t, 8);
+		rc = aes_rcon[rnum];				//	round constant
+	}
+	//	SubWord
+	t = ((uint32_t) _rvk_emu_aes_fwd_sbox[t & 0xFF]) |
+		(((uint32_t) _rvk_emu_aes_fwd_sbox[(t >> 8) & 0xFF]) << 8) |
+		(((uint32_t) _rvk_emu_aes_fwd_sbox[(t >> 16) & 0xFF]) << 16) |
+		(((uint32_t) _rvk_emu_aes_fwd_sbox[(t >> 24) & 0xFF]) << 24);
+
+	t ^= rc;
+
+	return ((int64_t) t) | (((int64_t) t) << 32);
+}
+
+static inline int64_t _rvk_emu_aes64ks2(int64_t rs1, int64_t rs2)
+{
+	uint32_t t;
+
+	t = (rs1 >> 32) ^ (rs2 & 0xFFFFFFFF);	//	wrap 32 bits
+
+	return ((int64_t) t) ^					//	low 32 bits
+		(((int64_t) t) << 32) ^ (rs2 & 0xFFFFFFFF00000000ULL);
+}
+
+//	=== (emulated)	Zkne:	NIST Suite: AES Encryption
+
+static inline int32_t _rvk_emu_aes32esi(int32_t rs1, int32_t rs2, uint8_t bs)
+{
+	int32_t x;
+
+	bs = (bs & 3) << 3;						//	byte select
+	x = (rs2 >> bs) & 0xFF;
+	x = _rvk_emu_aes_fwd_sbox[x];			//	AES forward s-box
+
+	return rs1 ^ _rvk_emu_rol_32(x, bs);
+}
+
+static inline int32_t _rvk_emu_aes32esmi(int32_t rs1, int32_t rs2, uint8_t bs)
+{
+	uint32_t x;
+
+	bs = (bs & 3) << 3;						//	byte select
+	x = (rs2 >> bs) & 0xFF;
+	x = _rvk_emu_aes_fwd_sbox[x];			//	AES forward s-box
+	x = _rvk_emu_aes_fwd_mc_8(x);			//	forward MixColumns
+
+	return rs1 ^ _rvk_emu_rol_32(x, bs);
+}
+
+static inline int64_t _rvk_emu_aes64es(int64_t rs1, int64_t rs2)
+{
+	//	Half of forward ShiftRows and SubBytes (last round)
+	return ((int64_t) _rvk_emu_aes_fwd_sbox[rs1 & 0xFF]) |
+		(((int64_t) _rvk_emu_aes_fwd_sbox[(rs1 >> 40) & 0xFF]) <<  8) |
+		(((int64_t) _rvk_emu_aes_fwd_sbox[(rs2 >> 16) & 0xFF]) << 16) |
+		(((int64_t) _rvk_emu_aes_fwd_sbox[(rs2 >> 56) & 0xFF]) << 24) |
+		(((int64_t) _rvk_emu_aes_fwd_sbox[(rs1 >> 32) & 0xFF]) << 32) |
+		(((int64_t) _rvk_emu_aes_fwd_sbox[(rs2 >>  8) & 0xFF]) << 40) |
+		(((int64_t) _rvk_emu_aes_fwd_sbox[(rs2 >> 48) & 0xFF]) << 48) |
+		(((int64_t) _rvk_emu_aes_fwd_sbox[(rs1 >> 24) & 0xFF]) << 56);
+}
+
+static inline int64_t _rvk_emu_aes64esm(int64_t rs1, int64_t rs2)
+{
+	int64_t x;
+
+	x = _rvk_emu_aes64es(rs1, rs2);			//	ShiftRows and SubBytes
+	x = ((int64_t) _rvk_emu_aes_fwd_mc_32(x)) |		//	MixColumns
+		(((int64_t) _rvk_emu_aes_fwd_mc_32(x >> 32)) << 32);
+	return x;
+}
+
+//	=== (emulated)	Zknh:	NIST Suite: Hash Function Instructions
+
+static inline long _rvk_emu_sha256sig0(long rs1)
+{
+	int32_t x;
+
+	x = _rvk_emu_ror_32(rs1, 7) ^ _rvk_emu_ror_32(rs1, 18) ^
+		_rvk_emu_srl_32(rs1, 3);
+	return (long) x;
+}
+
+static inline long _rvk_emu_sha256sig1(long rs1)
+{
+	int32_t x;
+
+	x = _rvk_emu_ror_32(rs1, 17) ^ _rvk_emu_ror_32(rs1, 19) ^
+		_rvk_emu_srl_32(rs1, 10);
+	return (long) x;
+}
+
+static inline long _rvk_emu_sha256sum0(long rs1)
+{
+	int32_t x;
+
+	x = _rvk_emu_ror_32(rs1, 2) ^ _rvk_emu_ror_32(rs1, 13) ^
+		_rvk_emu_ror_32(rs1, 22);
+	return (long) x;
+}
+
+static inline long _rvk_emu_sha256sum1(long rs1)
+{
+	int32_t x;
+
+	x = _rvk_emu_ror_32(rs1, 6) ^ _rvk_emu_ror_32(rs1, 11) ^
+		_rvk_emu_ror_32(rs1, 25);
+	return (long) x;
+}
+
+static inline int32_t  _rvk_emu_sha512sig0h(int32_t rs1, int32_t rs2)
+{
+	return	_rvk_emu_srl_32(rs1, 1) ^ _rvk_emu_srl_32(rs1, 7) ^
+			_rvk_emu_srl_32(rs1, 8) ^ _rvk_emu_sll_32(rs2, 31) ^
+			_rvk_emu_sll_32(rs2, 24);
+}
+
+static inline int32_t  _rvk_emu_sha512sig0l(int32_t rs1, int32_t rs2)
+{
+	return	_rvk_emu_srl_32(rs1, 1) ^ _rvk_emu_srl_32(rs1, 7) ^
+			_rvk_emu_srl_32(rs1, 8) ^ _rvk_emu_sll_32(rs2, 31) ^
+			_rvk_emu_sll_32(rs2, 25) ^ _rvk_emu_sll_32(rs2, 24);
+}
+
+static inline int32_t  _rvk_emu_sha512sig1h(int32_t rs1, int32_t rs2)
+{
+	return	_rvk_emu_sll_32(rs1, 3) ^ _rvk_emu_srl_32(rs1, 6) ^
+			_rvk_emu_srl_32(rs1, 19) ^ _rvk_emu_srl_32(rs2, 29) ^
+			_rvk_emu_sll_32(rs2, 13);
+}
+
+static inline int32_t  _rvk_emu_sha512sig1l(int32_t rs1, int32_t rs2)
+{
+	return	_rvk_emu_sll_32(rs1, 3) ^ _rvk_emu_srl_32(rs1, 6) ^
+			_rvk_emu_srl_32(rs1,19) ^ _rvk_emu_srl_32(rs2, 29) ^
+			_rvk_emu_sll_32(rs2, 26) ^ _rvk_emu_sll_32(rs2, 13);
+}
+
+static inline int32_t  _rvk_emu_sha512sum0r(int32_t rs1, int32_t rs2)
+{
+	return	_rvk_emu_sll_32(rs1, 25) ^ _rvk_emu_sll_32(rs1, 30) ^
+			_rvk_emu_srl_32(rs1, 28) ^ _rvk_emu_srl_32(rs2, 7) ^
+			_rvk_emu_srl_32(rs2, 2) ^ _rvk_emu_sll_32(rs2, 4);
+}
+
+static inline int32_t  _rvk_emu_sha512sum1r(int32_t rs1, int32_t rs2)
+{
+	return	_rvk_emu_sll_32(rs1, 23) ^ _rvk_emu_srl_32(rs1,14) ^
+			_rvk_emu_srl_32(rs1, 18) ^ _rvk_emu_srl_32(rs2, 9) ^
+			_rvk_emu_sll_32(rs2, 18) ^ _rvk_emu_sll_32(rs2, 14);
+}
+
+static inline int64_t  _rvk_emu_sha512sig0(int64_t rs1)
+{
+	return	_rvk_emu_ror_64(rs1, 1) ^ _rvk_emu_ror_64(rs1, 8) ^
+			_rvk_emu_srl_64(rs1,7);
+}
+
+static inline int64_t  _rvk_emu_sha512sig1(int64_t rs1)
+{
+	return	_rvk_emu_ror_64(rs1, 19) ^ _rvk_emu_ror_64(rs1, 61) ^
+			_rvk_emu_srl_64(rs1, 6);
+}
+
+static inline int64_t  _rvk_emu_sha512sum0(int64_t rs1)
+{
+	return	_rvk_emu_ror_64(rs1, 28) ^ _rvk_emu_ror_64(rs1, 34) ^
+			_rvk_emu_ror_64(rs1, 39);
+}
+
+static inline int64_t  _rvk_emu_sha512sum1(int64_t rs1)
+{
+	return	_rvk_emu_ror_64(rs1, 14) ^ _rvk_emu_ror_64(rs1, 18) ^
+			_rvk_emu_ror_64(rs1, 41);
+}
+
+//	=== (emulated)	Zksed:	ShangMi Suite: SM4 Block Cipher Instructions
+
+static inline long _rvk_emu_sm4ed(long rs1, long rs2, uint8_t bs)
+{
+	int32_t x;
+
+	bs = (bs & 3) << 3;						//	byte select
+	x = (rs2 >> bs) & 0xFF;
+	x = _rvk_emu_sm4_sbox[x];				//	SM4 s-box
+
+	//	SM4 linear transform L
+	x = x ^ (x << 8) ^ (x << 2) ^ (x << 18) ^
+			((x & 0x3F) << 26) ^ ((x & 0xC0) << 10);
+	x = rs1 ^ _rvk_emu_rol_32(x, bs);
+	return (long) x;
+}
+
+static inline long _rvk_emu_sm4ks(long rs1, long rs2, uint8_t bs)
+{
+	int32_t x;
+
+	bs = (bs & 3) << 3;						//	byte select
+	x = (rs2 >> bs) & 0xFF;
+	x = _rvk_emu_sm4_sbox[x];				//	SM4 s-box
+
+	//	SM4 transform L' (key)
+	x = x ^ ((x & 0x07) << 29) ^ ((x & 0xFE) << 7) ^
+		((x & 1) << 23) ^ ((x & 0xF8) << 13);
+	x = rs1 ^ _rvk_emu_rol_32(x, bs);
+	return (long) x;
+}
+
+//	=== (emulated)	Zksh:	ShangMi Suite: SM3 Hash Function Instructions
+
+static inline int32_t  _rvk_emu_sm3p0(long rs1)
+{
+	int32_t x;
+
+	x = rs1 ^ _rvk_emu_rol_32(rs1, 9) ^ _rvk_emu_rol_32(rs1, 17);
+	return (long) x;
+}
+
+static inline int32_t  _rvk_emu_sm3p1(long rs1)
+{
+	int32_t x;
+
+	x = rs1 ^ _rvk_emu_rol_32(rs1, 15) ^ _rvk_emu_rol_32(rs1, 23);
+	return (long) x;
+}
+
+
+#endif		//	RVKINTRIN_EMULATE
+#endif		//	_RVK_EMU_INTRIN_H
\ No newline at end of file