@@ -1607,3 +1607,19 @@
DONE;
}
[(set_attr "type" "vandn")])
+
+;; -------------------------------------------------------------------------
+;; - vrol.vv vror.vv
+;; -------------------------------------------------------------------------
+(define_expand "v<bitmanip_optab><mode>3"
+ [(set (match_operand:VI 0 "register_operand")
+ (bitmanip_rotate:VI
+ (match_operand:VI 1 "register_operand")
+ (match_operand:VI 2 "register_operand")))]
+ "TARGET_ZVBB || TARGET_ZVKB"
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_v (<CODE>, <MODE>mode),
+ riscv_vector::BINARY_OP, operands);
+ DONE;
+ }
+)
new file mode 100644
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include "vrolr-template.h"
+
+/* { dg-final { scan-assembler-times {\tvrol\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {\tvror\.vv} 4 } } */
new file mode 100644
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+#include <assert.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+
+#define ARRAY_SIZE 512
+
+#define CIRCULAR_LEFT_SHIFT_ARRAY(arr, shifts, bit_size, size) \
+ for (int i = 0; i < size; i++) { \
+ (arr)[i] = (((arr)[i] << (shifts)[i % bit_size]) | ((arr)[i] >> (bit_size - (shifts)[i % bit_size]))); \
+ }
+
+#define CIRCULAR_RIGHT_SHIFT_ARRAY(arr, shifts, bit_size, size) \
+ for (int i = 0; i < size; i++) { \
+ (arr)[i] = (((arr)[i] >> (shifts)[i % bit_size]) | ((arr)[i] << (bit_size - (shifts)[i % bit_size]))); \
+ }
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results8(
+ uint8_t *result_left, uint8_t *result_right,
+ int bit_size, uint8_t *shift_values)
+{
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ assert(result_left[i] == (i << shift_values[i % bit_size]) | (i >> (bit_size - shift_values[i % bit_size])));
+ assert(result_right[i] == (i >> shift_values[i % bit_size]) | (i << (bit_size - shift_values[i % bit_size])));
+ }
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results16(
+ uint16_t *result_left, uint16_t *result_right,
+ int bit_size, uint16_t *shift_values)
+{
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ assert(result_left[i] == (i << shift_values[i % bit_size]) | (i >> (bit_size - shift_values[i % bit_size])));
+ assert(result_right[i] == (i >> shift_values[i % bit_size]) | (i << (bit_size - shift_values[i % bit_size])));
+ }
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results32(
+ uint32_t *result_left, uint32_t *result_right,
+ int bit_size, uint32_t *shift_values)
+{
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ assert(result_left[i] == (i << shift_values[i % bit_size]) | (i >> (bit_size - shift_values[i % bit_size])));
+ assert(result_right[i] == (i >> shift_values[i % bit_size]) | (i << (bit_size - shift_values[i % bit_size])));
+ }
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results64(
+ uint64_t *result_left, uint64_t *result_right,
+ int bit_size, uint64_t *shift_values)
+{
+ for (int i = 0; i < ARRAY_SIZE; i++) {
+ assert(result_left[i] == ((uint64_t)i << shift_values[i % bit_size]) | ((uint64_t)i >> (bit_size - shift_values[i % bit_size])));
+ assert(result_right[i] == ((uint64_t)i >> shift_values[i % bit_size]) | ((uint64_t)i << (bit_size - shift_values[i % bit_size])));
+ }
+}
+
+#define TEST_SHIFT_OPERATIONS(TYPE, bit_size) \
+ TYPE shift_val##bit_size[ARRAY_SIZE];\
+ TYPE result_left##bit_size[ARRAY_SIZE];\
+ TYPE result_right##bit_size[ARRAY_SIZE];\
+ do { \
+ for (int i = 0; i < ARRAY_SIZE; i++) { \
+ result_left##bit_size[i] = i;\
+ result_right##bit_size[i] = i;\
+ shift_val##bit_size[i] = i % bit_size; \
+ } \
+ CIRCULAR_LEFT_SHIFT_ARRAY(result_left##bit_size, shift_val##bit_size, bit_size, ARRAY_SIZE)\
+ CIRCULAR_RIGHT_SHIFT_ARRAY(result_right##bit_size, shift_val##bit_size, bit_size, ARRAY_SIZE)\
+ compare_results##bit_size(result_left##bit_size, result_right##bit_size, bit_size, shift_val##bit_size); \
+ } while(0)
+
+
+int main() {
+ TEST_SHIFT_OPERATIONS(uint8_t, 8);
+ TEST_SHIFT_OPERATIONS(uint16_t, 16);
+ TEST_SHIFT_OPERATIONS(uint32_t, 32);
+ TEST_SHIFT_OPERATIONS(uint64_t, 64);
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,29 @@
+#include <stdint-gcc.h>
+
+#define VROL_VV(SEW, S, T) \
+__attribute__ ((noipa))\
+void autovect_vrol_vv_##S##SEW (T *out, T *op1, T *op2, int n){\
+ for(int i=0; i<n; i++){ \
+ op2[i] = op2[i] & (SEW-1);\
+ out[i]= (op1[i] << op2[i]) | (op1[i] >> (SEW - op2[i]));\
+ }\
+}
+
+#define VROR_VV(SEW, S, T) \
+__attribute__ ((noipa))\
+void autovect_vror_vv_##S##SEW (T *out, T *op1, T *op2, int n){\
+ for(int i=0; i<n; i++){ \
+ op2[i] = op2[i] & (SEW-1);\
+ out[i]= (op1[i] >> op2[i]) | (op1[i] << (SEW - op2[i]));\
+ }\
+}
+
+VROL_VV(8, u, uint8_t)
+VROL_VV(16, u, uint16_t)
+VROL_VV(32, u, uint32_t)
+VROL_VV(64, u, uint64_t)
+
+VROR_VV(8, u, uint8_t)
+VROR_VV(16, u, uint16_t)
+VROR_VV(32, u, uint32_t)
+VROR_VV(64, u, uint64_t)