gcn: Fix __builtin_gcn_first_call_this_thread_p

Message ID 54ee69be-7101-c4e0-fbca-3c7c3f1101b8@codesourcery.com
State Committed
Headers
Series gcn: Fix __builtin_gcn_first_call_this_thread_p |

Commit Message

Tobias Burnus Nov. 28, 2022, 7:40 a.m. UTC
  It turned out that cprop cleverly propagated the unspec_volatile
to the preceding (pseudo)register, permitting to remove the
'set (s0) (pseudoregister)' at -O2.  Unfortunately, it does
matter whether the assignment is done to 's2' (previously: pseudoregister)
or to s1. – Just having a hard register is not enough ...

Solution: Use USE (alias gen_rtx_USE) instead.

Additionally, I removed the s0 modification (that should lead to the unchanged result)
by adding 'gcn_operand_part (DImode, reg, 1)' and then working with SImode. Result:

   if (__builtin_gcn_first_call_this_thread_p())
     x = 42;

becomes now (with -O2) the following; the builtin code is up to to (and including)
'.L2', the rest is the 'if' and 'x=42':

         s_lshr_b32      s2, s1, 16
         s_cmpk_lg_u32   s2, 12345
         s_mov_b32       s12, scc
         s_mov_b32       vcc_lo, scc
         s_mov_b32       vcc_hi, 0
         s_cbranch_vccz  .L2
         s_and_b32       s2, s1, 65535   (= 0xFFFF)
         s_or_b32        s1, s2, 809041920 (= 0x30390000 = (12345 << 16))
.L2:
         s_getpc_b64     s[2:3]
         s_add_u32       s2, s2, x@rel32@lo+4
         s_addc_u32      s3, s3, x@rel32@hi+4
         s_mov_b32       vcc_lo, s12
         s_mov_b32       vcc_hi, 0
         s_cbranch_vccz  .L3
         s_mov_b32       s12, 42
         v_writelane_b32 v0, s12, 0
         s_mov_b64       exec, 1
         global_store_dword      v1, v0, s[2:3]
.L3:


OK for mainline?

Tobias
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
  

Comments

Andrew Stubbs Nov. 28, 2022, 9:41 a.m. UTC | #1
On 28/11/2022 07:40, Tobias Burnus wrote:
> It turned out that cprop cleverly propagated the unspec_volatile
> to the preceding (pseudo)register, permitting to remove the
> 'set (s0) (pseudoregister)' at -O2.  Unfortunately, it does
> matter whether the assignment is done to 's2' (previously: pseudoregister)
> or to s1. – Just having a hard register is not enough ...
> 
> Solution: Use USE (alias gen_rtx_USE) instead.
> 
> Additionally, I removed the s0 modification (that should lead to the 
> unchanged result)
> by adding 'gcn_operand_part (DImode, reg, 1)' and then working with 
> SImode. Result:
> 
>    if (__builtin_gcn_first_call_this_thread_p())
>      x = 42;
> 
> becomes now (with -O2) the following; the builtin code is up to to (and 
> including)
> '.L2', the rest is the 'if' and 'x=42':
> 
>          s_lshr_b32      s2, s1, 16
>          s_cmpk_lg_u32   s2, 12345
>          s_mov_b32       s12, scc
>          s_mov_b32       vcc_lo, scc
>          s_mov_b32       vcc_hi, 0
>          s_cbranch_vccz  .L2
>          s_and_b32       s2, s1, 65535   (= 0xFFFF)
>          s_or_b32        s1, s2, 809041920 (= 0x30390000 = (12345 << 16))
> .L2:
>          s_getpc_b64     s[2:3]
>          s_add_u32       s2, s2, x@rel32@lo+4
>          s_addc_u32      s3, s3, x@rel32@hi+4
>          s_mov_b32       vcc_lo, s12
>          s_mov_b32       vcc_hi, 0
>          s_cbranch_vccz  .L3
>          s_mov_b32       s12, 42
>          v_writelane_b32 v0, s12, 0
>          s_mov_b64       exec, 1
>          global_store_dword      v1, v0, s[2:3]
> .L3:
> 
> 
> OK for mainline?

OK.

Andrew
  

Patch

gcn: Fix __builtin_gcn_first_call_this_thread_p

Contrary naive expectation, unspec_volatile (via prologue_use) did not
prevent the cprop pass (at -O2) to remove the access to the s[0:1]
(PRIVATE_SEGMENT_BUFFER_ARG) register as the volatile got just put on
the preceeding pseudoregister.  Solution: Use gen_rtx_USE instead.
Additionally, this patch removes (gen_)prologue_use_di as it is then no
longer used.

Finally, as we already do bit manipulation, instead of using the full
64bit side - and then just keeping the value of 's0', just move directly
to use only s1 of s[0:1] and do the bit manipulations there, generating
more readable assembly code and better matching the '#else' branch.

gcc/ChangeLog:

	* config/gcn/gcn.cc (gcn_expand_builtin_1): Work on s1 instead
	of s[0:1] and use USE to prevent removal of setting that register.
	* config/gcn/gcn.md (prologue_use_di): Remove.

 gcc/config/gcn/gcn.cc | 16 ++++++++--------
 gcc/config/gcn/gcn.md | 13 -------------
 2 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 6fb261318c4..c74fa007a21 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4556,8 +4556,9 @@  gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
 	    rtx not_first = gen_label_rtx ();
 	    rtx reg = gen_rtx_REG (DImode,
 			cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]);
-	    rtx cmp = force_reg (DImode,
-				 gen_rtx_LSHIFTRT (DImode, reg, GEN_INT (48)));
+	    reg = gcn_operand_part (DImode, reg, 1);
+	    rtx cmp = force_reg (SImode,
+				 gen_rtx_LSHIFTRT (SImode, reg, GEN_INT (16)));
 	    emit_insn (gen_cstoresi4 (result, gen_rtx_NE (BImode, cmp,
 							  GEN_INT(12345)),
 				      cmp, GEN_INT(12345)));
@@ -4565,12 +4566,11 @@  gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
 							      const0_rtx),
 				       result));
 	    emit_move_insn (reg,
-	      force_reg (DImode,
-		gen_rtx_IOR (DImode,
-			     gen_rtx_AND (DImode, reg,
-					  GEN_INT (0x0000ffffffffffffL)),
-			     GEN_INT (12345L << 48))));
-	    emit_insn (gen_prologue_use (reg));
+	      force_reg (SImode,
+		gen_rtx_IOR (SImode,
+			     gen_rtx_AND (SImode, reg, GEN_INT (0x0000ffff)),
+			     GEN_INT (12345L << 16))));
+	    emit_insn (gen_rtx_USE (VOIDmode, reg));
 	    emit_label (not_first);
 	  }
 	return result;
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index a8b9c28d115..92e9892c4f7 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -697,19 +697,6 @@ 
   ""
   [(set_attr "length" "0")])
 
-(define_insn_and_split "prologue_use_di"
-  [(unspec_volatile [(match_operand:DI 0 "register_operand")] UNSPECV_PROLOGUE_USE)]
-  ""
-  "#"
-  "reload_completed"
-  [(unspec_volatile [(match_dup 0)] UNSPECV_PROLOGUE_USE)
-   (unspec_volatile [(match_dup 1)] UNSPECV_PROLOGUE_USE)]
-  {
-    operands[1] = gcn_operand_part (DImode, operands[0], 1);
-    operands[0] = gcn_operand_part (DImode, operands[0], 0);
-  }
-  [(set_attr "length" "0")])
-
 (define_expand "prologue"
   [(const_int 0)]
   ""