[15/17] amdgcn: Support XNACK mode

Message ID f8527d622db92f64123282e7bdca1e404c1c316f.1657188329.git.ams@codesourcery.com
State New
Headers
Series openmp, nvptx, amdgcn: 5.0 Memory Allocators |

Commit Message

Andrew Stubbs July 7, 2022, 10:34 a.m. UTC
  The XNACK feature allows memory load instructions to restart safely following
a page-miss interrupt.  This is useful for shared-memory devices, like APUs,
and to implement OpenMP Unified Shared Memory.

To support the feature we must be able to set the appropriate meta-data and
set the load instructions to early-clobber.  When the port supports scheduling
of s_waitcnt instructions there will be further requirements.

gcc/ChangeLog:

	* config/gcn/gcn-hsa.h (XNACKOPT): New macro.
	(ASM_SPEC): Use XNACKOPT.
	* config/gcn/gcn-opts.h (enum sram_ecc_type): Rename to ...
	(enum hsaco_attr_type): ... this, and generalize the names.
	(TARGET_XNACK): New macro.
	* config/gcn/gcn-valu.md (gather<mode>_insn_1offset<exec>):
	Add xnack compatible alternatives.
	(gather<mode>_insn_2offsets<exec>): Likewise.
	* config/gcn/gcn.c (gcn_option_override): Permit -mxnack for devices
	other than Fiji.
	(gcn_expand_epilogue): Remove early-clobber problems.
	(output_file_start): Emit xnack attributes.
	(gcn_hsa_declare_function_name): Obey -mxnack setting.
	* config/gcn/gcn.md (xnack): New attribute.
	(enabled): Rework to include "xnack" attribute.
	(*movbi): Add xnack compatible alternatives.
	(*mov<mode>_insn): Likewise.
	(*mov<mode>_insn): Likewise.
	(*mov<mode>_insn): Likewise.
	(*movti_insn): Likewise.
	* config/gcn/gcn.opt (-mxnack): Add the "on/off/any" syntax.
	(sram_ecc_type): Rename to ...
	(hsaco_attr_type: ... this.)
	* config/gcn/mkoffload.c (SET_XNACK_ANY): New macro.
	(TEST_XNACK): Delete.
	(TEST_XNACK_ANY): New macro.
	(TEST_XNACK_ON): New macro.
	(main): Support the new -mxnack=on/off/any syntax.
---
 gcc/config/gcn/gcn-hsa.h    |   3 +-
 gcc/config/gcn/gcn-opts.h   |  10 ++--
 gcc/config/gcn/gcn-valu.md  |  29 ++++-----
 gcc/config/gcn/gcn.cc       |  34 ++++++-----
 gcc/config/gcn/gcn.md       | 113 +++++++++++++++++++++++-------------
 gcc/config/gcn/gcn.opt      |  18 +++---
 gcc/config/gcn/mkoffload.cc |  19 ++++--
 7 files changed, 140 insertions(+), 86 deletions(-)
  

Patch

diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index b3079cebb43..fd08947574f 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -81,12 +81,13 @@  extern unsigned int gcn_local_sym_hash (const char *name);
 /* In HSACOv4 no attribute setting means the binary supports "any" hardware
    configuration.  The name of the attribute also changed.  */
 #define SRAMOPT "msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc"
+#define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack"
 
 /* Use LLVM assembler and linker options.  */
 #define ASM_SPEC  "-triple=amdgcn--amdhsa "  \
 		  "%:last_arg(%{march=*:-mcpu=%*}) " \
 		  "%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \
-		  "%{" NO_XNACK "mxnack:-mattr=+xnack;:-mattr=-xnack} " \
+		  "%{" NO_XNACK XNACKOPT "}" \
 		  "%{" NO_SRAM_ECC SRAMOPT "} " \
 		  "-filetype=obj"
 #define LINK_SPEC "--pie --export-dynamic"
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index b62dfb45f59..07ddc79cda3 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -48,11 +48,13 @@  extern enum gcn_isa {
 #define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
 #define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS)
 
-enum sram_ecc_type
+#define TARGET_XNACK (flag_xnack != HSACO_ATTR_OFF)
+
+enum hsaco_attr_type
 {
-  SRAM_ECC_OFF,
-  SRAM_ECC_ON,
-  SRAM_ECC_ANY
+  HSACO_ATTR_OFF,
+  HSACO_ATTR_ON,
+  HSACO_ATTR_ANY
 };
 
 #endif
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index abe46201344..ec114db9dd1 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -741,13 +741,13 @@  (define_expand "gather<mode>_expr<exec>"
     {})
 
 (define_insn "gather<mode>_insn_1offset<exec>"
-  [(set (match_operand:V_ALL 0 "register_operand"		   "=v")
+  [(set (match_operand:V_ALL 0 "register_operand"		   "=v,&v")
 	(unspec:V_ALL
-	  [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
+	  [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v, v")
 			(vec_duplicate:<VnDI>
-			  (match_operand 2 "immediate_operand"	   " n")))
-	   (match_operand 3 "immediate_operand"			   " n")
-	   (match_operand 4 "immediate_operand"			   " n")
+			  (match_operand 2 "immediate_operand"	   " n, n")))
+	   (match_operand 3 "immediate_operand"			   " n, n")
+	   (match_operand 4 "immediate_operand"			   " n, n")
 	   (mem:BLK (scratch))]
 	  UNSPEC_GATHER))]
   "(AS_FLAT_P (INTVAL (operands[3]))
@@ -777,7 +777,8 @@  (define_insn "gather<mode>_insn_1offset<exec>"
     return buf;
   }
   [(set_attr "type" "flat")
-   (set_attr "length" "12")])
+   (set_attr "length" "12")
+   (set_attr "xnack" "off,on")])
 
 (define_insn "gather<mode>_insn_1offset_ds<exec>"
   [(set (match_operand:V_ALL 0 "register_operand"		   "=v")
@@ -802,17 +803,18 @@  (define_insn "gather<mode>_insn_1offset_ds<exec>"
    (set_attr "length" "12")])
 
 (define_insn "gather<mode>_insn_2offsets<exec>"
-  [(set (match_operand:V_ALL 0 "register_operand"			"=v")
+  [(set (match_operand:V_ALL 0 "register_operand"		     "=v,&v")
 	(unspec:V_ALL
 	  [(plus:<VnDI>
 	     (plus:<VnDI>
 	       (vec_duplicate:<VnDI>
-		 (match_operand:DI 1 "register_operand"			"Sv"))
+		 (match_operand:DI 1 "register_operand"		     "Sv,Sv"))
 	       (sign_extend:<VnDI>
-		 (match_operand:<VnSI> 2 "register_operand"		" v")))
-	     (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
-	   (match_operand 4 "immediate_operand"				" n")
-	   (match_operand 5 "immediate_operand"				" n")
+		 (match_operand:<VnSI> 2 "register_operand"	     " v, v")))
+	     (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand"
+								     " n, n")))
+	   (match_operand 4 "immediate_operand"			     " n, n")
+	   (match_operand 5 "immediate_operand"			     " n, n")
 	   (mem:BLK (scratch))]
 	  UNSPEC_GATHER))]
   "(AS_GLOBAL_P (INTVAL (operands[4]))
@@ -831,7 +833,8 @@  (define_insn "gather<mode>_insn_2offsets<exec>"
     return buf;
   }
   [(set_attr "type" "flat")
-   (set_attr "length" "12")])
+   (set_attr "length" "12")
+   (set_attr "xnack" "off,on")])
 
 (define_expand "scatter_store<mode><vnsi>"
   [(match_operand:DI 0 "register_operand")
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 6fc20d3f659..4df05453604 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -170,9 +170,14 @@  gcn_option_override (void)
 	acc_lds_size = 32768;
     }
 
-  /* The xnack option is a placeholder, for now.  */
-  if (flag_xnack)
-    sorry ("XNACK support");
+  /* gfx908 "Fiji" does not support XNACK.  */
+  if (gcn_arch == PROCESSOR_FIJI)
+    {
+      if (flag_xnack == HSACO_ATTR_ON)
+	error ("-mxnack=on is incompatible with -march=fiji");
+      /* Allow HSACO_ATTR_ANY silently because that's the default.  */
+      flag_xnack = HSACO_ATTR_OFF;
+    }
 }
 
 /* }}}  */
@@ -3188,17 +3193,19 @@  gcn_expand_epilogue (void)
       /* Assume that an exit value compatible with gcn-run is expected.
          That is, the third input parameter is an int*.
 
-         We can't allocate any new registers, but the kernarg_reg is
-         dead after this, so we'll use that.  */
+         We can't allocate any new registers, but the dispatch_ptr and
+	 kernarg_reg are dead after this, so we'll use those.  */
+      rtx dispatch_ptr_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
+					  [DISPATCH_PTR_ARG]);
       rtx kernarg_reg = gen_rtx_REG (DImode, cfun->machine->args.reg
 				     [KERNARG_SEGMENT_PTR_ARG]);
       rtx retptr_mem = gen_rtx_MEM (DImode,
 				    gen_rtx_PLUS (DImode, kernarg_reg,
 						  GEN_INT (16)));
       set_mem_addr_space (retptr_mem, ADDR_SPACE_SCALAR_FLAT);
-      emit_move_insn (kernarg_reg, retptr_mem);
+      emit_move_insn (dispatch_ptr_reg, retptr_mem);
 
-      rtx retval_mem = gen_rtx_MEM (SImode, kernarg_reg);
+      rtx retval_mem = gen_rtx_MEM (SImode, dispatch_ptr_reg);
       set_mem_addr_space (retval_mem, ADDR_SPACE_SCALAR_FLAT);
       emit_move_insn (retval_mem,
 		      gen_rtx_REG (SImode, SGPR_REGNO (RETURN_VALUE_REG)));
@@ -5250,11 +5257,12 @@  static void
 output_file_start (void)
 {
   /* In HSACOv4 no attribute setting means the binary supports "any" hardware
-     configuration.  In GCC binaries, this is true for SRAM ECC, but not
-     XNACK.  */
-  const char *xnack = (flag_xnack ? ":xnack+" : ":xnack-");
-  const char *sram_ecc = (flag_sram_ecc == SRAM_ECC_ON ? ":sramecc+"
-			  : flag_sram_ecc == SRAM_ECC_OFF ? ":sramecc-"
+     configuration.  */
+  const char *xnack = (flag_xnack == HSACO_ATTR_ON ? ":xnack+"
+		       : flag_xnack == HSACO_ATTR_OFF ? ":xnack-"
+		       : "");
+  const char *sram_ecc = (flag_sram_ecc == HSACO_ATTR_ON ? ":sramecc+"
+			  : flag_sram_ecc == HSACO_ATTR_OFF ? ":sramecc-"
 			  : "");
 
   const char *cpu;
@@ -5298,7 +5306,7 @@  void
 gcn_hsa_declare_function_name (FILE *file, const char *name, tree)
 {
   int sgpr, vgpr;
-  bool xnack_enabled = false;
+  bool xnack_enabled = TARGET_XNACK;
 
   fputs ("\n\n", file);
 
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 033c1708e88..0f9381c9194 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -277,12 +277,19 @@  (define_attr "length" ""
 
 (define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3"))
 
+(define_attr "xnack" "na,off,on" (const_string "na"))
+
 (define_attr "enabled" ""
-  (cond [(eq_attr "gcn_version" "gcn3") (const_int 1)
-	 (and (eq_attr "gcn_version" "gcn5")
-	      (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
-	   (const_int 1)]
-	(const_int 0)))
+  (cond [(and (eq_attr "gcn_version" "gcn5")
+	      (eq (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
+	   (const_int 0)
+	 (and (eq_attr "xnack" "off")
+	      (ne (symbol_ref "TARGET_XNACK") (const_int 0)))
+	   (const_int 0)
+	 (and (eq_attr "xnack" "on")
+	      (eq (symbol_ref "TARGET_XNACK") (const_int 0)))
+	   (const_int 0)]
+	(const_int 1)))
 
 ; We need to be able to identify v_readlane and v_writelane with
 ; SGPR lane selection in order to handle "Manually Inserted Wait States".
@@ -472,9 +479,9 @@  (define_split
 
 (define_insn "*movbi"
   [(set (match_operand:BI 0 "nonimmediate_operand"
-				    "=Sg,   v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM")
+			  "=Sg,   v,Sg,cs,cV,cV,Sm,&Sm,RS, v,&v,RF, v,&v,RM")
 	(match_operand:BI 1 "gcn_load_operand"
-				    "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))]
+			  "SSA,vSvA, v,SS, v,SS,RS, RS,Sm,RF,RF, v,RM,RM, v"))]
   ""
   {
     /* SCC as an operand is currently not accepted by the LLVM assembler, so
@@ -501,66 +508,77 @@  (define_insn "*movbi"
       return "s_mov_b32\tvcc_lo, %1\;"
 	     "s_mov_b32\tvcc_hi, 0";
     case 6:
-      return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
     case 7:
-      return "s_store_dword\t%1, %A0";
+      return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)";
     case 8:
-      return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
+      return "s_store_dword\t%1, %A0";
     case 9:
-      return "flat_store_dword\t%A0, %1%O0%g0";
     case 10:
-      return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
+      return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0";
     case 11:
+      return "flat_store_dword\t%A0, %1%O0%g0";
+    case 12:
+    case 13:
+      return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)";
+    case 14:
       return "global_store_dword\t%A0, %1%O0%g0";
     default:
       gcc_unreachable ();
     }
   }
-  [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat,
-		     flat,flat")
-   (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*")
-   (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")])
+  [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,smem,flat,flat,
+		     flat,flat,flat,flat")
+   (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12,12,12,12")
+   (set_attr "xnack" "*,*,*,*,*,*,off,on,*,off,on,*,off,on,*")])
 
 ; 32bit move pattern
 
 (define_insn "*mov<mode>_insn"
   [(set (match_operand:SISF 0 "nonimmediate_operand"
-		  "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG,   v,SD, v,RM")
+     "=SD,SD,SD,SD,&SD,RB,Sm,&Sm,RS,v,Sg, v, v,&v,RF,v,RLRG,   v,SD, v,&v,RM")
 	(match_operand:SISF 1 "gcn_load_operand"
-		  "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B,   v,RLRG, Y,RM, v"))]
+    "SSA, J, B,RB, RB,Sm,RS, RS,Sm,v, v,Sv,RF,RF, v,B,   v,RLRG, Y,RM,RM, v"))]
   ""
   "@
   s_mov_b32\t%0, %1
   s_movk_i32\t%0, %1
   s_mov_b32\t%0, %1
   s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
+  s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0)
   s_buffer_store%s1\t%1, s[0:3], %0
   s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+  s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
   s_store_dword\t%1, %A0
   v_mov_b32\t%0, %1
   v_readlane_b32\t%0, %1, 0
   v_writelane_b32\t%0, %1, 0
   flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
+  flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0
   flat_store_dword\t%A0, %1%O0%g0
   v_mov_b32\t%0, %1
   ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   s_mov_b32\t%0, %1
   global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   global_store_dword\t%A0, %1%O0%g0"
-  [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat,
-		     flat,vop1,ds,ds,sop1,flat,flat")
-   (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*")
-   (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")])
+  [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,smem,smem,vop1,vop3a,
+	      vop3a,flat,flat,flat,vop1,ds,ds,sop1,flat,flat,flat")
+   (set_attr "exec" "*,*,*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "length"
+	     "4,4,8,12,12,12,12,12,12,4,8,8,12,12,12,8,12,12,8,12,12,12")
+   (set_attr "xnack"
+	     "*,*,*,off,on,*,off,on,*,*,*,*,off,on,*,*,*,*,*,off,on,*")])
 
 ; 8/16bit move pattern
 ; TODO: implement combined load and zero_extend, but *only* for -msram-ecc=on
 
 (define_insn "*mov<mode>_insn"
   [(set (match_operand:QIHI 0 "nonimmediate_operand"
-				 "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG,   v, v,RM")
+			   "=SD,SD,SD,v,Sg, v, v,&v,RF,v,RLRG,   v, v,&v,RM")
 	(match_operand:QIHI 1 "gcn_load_operand"
-				 "SSA, J, B,v, v,Sv,RF, v,B,   v,RLRG,RM, v"))]
+			   "SSA, J, B,v, v,Sv,RF,RF, v,B,   v,RLRG,RM,RM, v"))]
   "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
   "@
   s_mov_b32\t%0, %1
@@ -570,24 +588,27 @@  (define_insn "*mov<mode>_insn"
   v_readlane_b32\t%0, %1, 0
   v_writelane_b32\t%0, %1, 0
   flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
+  flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0
   flat_store%s0\t%A0, %1%O0%g0
   v_mov_b32\t%0, %1
   ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   global_store%s0\t%A0, %1%O0%g0"
-  [(set_attr "type"
-	     "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat")
-   (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*")
-   (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")])
+  [(set_attr "type" "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,flat,vop1,ds,ds,
+	             flat,flat,flat")
+   (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*,*,*")
+   (set_attr "length" "4,4,8,4,4,4,12,12,12,8,12,12,12,12,12")
+   (set_attr "xnack" "*,*,*,*,*,*,off,on,*,*,*,*,off,on,*")])
 
 ; 64bit move pattern
 
 (define_insn_and_split "*mov<mode>_insn"
   [(set (match_operand:DIDF 0 "nonimmediate_operand"
-			  "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG,   v, v,RM")
+		"=SD,SD,SD,RS,Sm,&Sm,v, v,Sg, v, v,&v,RF,RLRG,   v, v,&v,RM")
 	(match_operand:DIDF 1 "general_operand"
-			  "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v,   v,RLRG,RM, v"))]
+		"SSA, C,DB,Sm,RS, RS,v,DB, v,Sv,RF,RF, v,   v,RLRG,RM,RM, v"))]
   "GET_CODE(operands[1]) != SYMBOL_REF"
   "@
   s_mov_b64\t%0, %1
@@ -595,15 +616,18 @@  (define_insn_and_split "*mov<mode>_insn"
   #
   s_store_dwordx2\t%1, %A0
   s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+  s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
   #
   #
   #
   #
   flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
+  flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0
   flat_store_dwordx2\t%A0, %1%O0%g0
   ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)
   global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   global_store_dwordx2\t%A0, %1%O0%g0"
   "reload_completed
    && ((!MEM_P (operands[0]) && !MEM_P (operands[1])
@@ -634,29 +658,33 @@  (define_insn_and_split "*mov<mode>_insn"
 	operands[3] = inhi;
       }
   }
-  [(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat,
-		     flat,ds,ds,flat,flat")
-   (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")])
+  [(set_attr "type" "sop1,sop1,mult,smem,smem,smem,vmult,vmult,vmult,vmult,
+	      flat,flat,flat,ds,ds,flat,flat,flat")
+   (set_attr "length" "4,8,*,12,12,12,*,*,*,*,12,12,12,12,12,12,12,12")
+   (set_attr "xnack" "*,*,*,*,off,on,*,*,*,*,off,on,*,*,*,off,on,*")])
 
 ; 128-bit move.
 
 (define_insn_and_split "*movti_insn"
   [(set (match_operand:TI 0 "nonimmediate_operand"
-				      "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v")
-	(match_operand:TI 1 "general_operand"  
-				      "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))]
+			     "=SD,RS,Sm,&Sm,RF, v,&v,v, v,SD,RM, v,&v,RL, v")
+	(match_operand:TI 1 "general_operand"
+			     "SSB,Sm,RS, RS, v,RF,RF,v,Sv, v, v,RM,RM, v,RL"))]
   ""
   "@
   #
   s_store_dwordx4\t%1, %A0
   s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
+  s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
   flat_store_dwordx4\t%A0, %1%O0%g0
   flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
+  flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0
   #
   #
   #
   global_store_dwordx4\t%A0, %1%O0%g0
   global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
+  global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)
   ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0)
   ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)"
   "reload_completed
@@ -678,10 +706,11 @@  (define_insn_and_split "*movti_insn"
     operands[0] = gcn_operand_part (TImode, operands[0], 0);
     operands[1] = gcn_operand_part (TImode, operands[1], 0);
   }
-  [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\
-		     ds,ds")
-   (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*")
-   (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")])
+  [(set_attr "type" "mult,smem,smem,smem,flat,flat,flat,vmult,vmult,vmult,flat,
+	             flat,flat,ds,ds")
+   (set_attr "delayeduse" "*,*,yes,yes,*,*,*,*,*,*,*,yes,*,*,*")
+   (set_attr "length" "*,12,12,12,12,12,12,*,*,*,12,12,12,12,12")
+   (set_attr "xnack" "*,*,off,on,*,off,on,*,*,*,*,off,on,*,*")])
 
 ;; }}}
 ;; {{{ Prologue/Epilogue
@@ -844,6 +873,8 @@  (define_insn "movdi_symbol"
   (clobber (reg:BI SCC_REG))]
  "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF"
   {
+    /* This s_load may not be XNACK-safe on devices where the GOT may fault.
+       DGPUs are most likely fine.  */
     if (SYMBOL_REF_P (operands[1])
 	&& SYMBOL_REF_WEAK (operands[1]))
 	return "s_getpc_b64\t%0\;"
@@ -868,6 +899,8 @@  (define_insn "movdi_symbol_save_scc"
   {
     /* !!! These sequences clobber CC_SAVE_REG.  */
 
+    /* This s_load may not be XNACK-safe on devices where the GOT may fault.
+       DGPUs are most likely fine.  */
     if (SYMBOL_REF_P (operands[1])
 	&& SYMBOL_REF_WEAK (operands[1]))
 	return "s_mov_b32\ts22, scc\;"
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index 9606aaf0b1a..759f7a064c9 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -81,23 +81,23 @@  Wopenacc-dims
 Target Var(warn_openacc_dims) Warning
 Warn about invalid OpenACC dimensions.
 
-mxnack
-Target Var(flag_xnack) Init(0)
-Compile for devices requiring XNACK enabled. Default off.
-
 Enum
-Name(sram_ecc_type) Type(enum sram_ecc_type)
+Name(hsaco_attr_type) Type(enum hsaco_attr_type)
 SRAM-ECC modes:
 
 EnumValue
-Enum(sram_ecc_type) String(off) Value(SRAM_ECC_OFF)
+Enum(hsaco_attr_type) String(off) Value(HSACO_ATTR_OFF)
 
 EnumValue
-Enum(sram_ecc_type) String(on) Value(SRAM_ECC_ON)
+Enum(hsaco_attr_type) String(on) Value(HSACO_ATTR_ON)
 
 EnumValue
-Enum(sram_ecc_type) String(any) Value(SRAM_ECC_ANY)
+Enum(hsaco_attr_type) String(any) Value(HSACO_ATTR_ANY)
+
+mxnack=
+Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(HSACO_ATTR_ANY)
+Compile for devices requiring XNACK enabled. Default off.
 
 msram-ecc=
-Target RejectNegative Joined ToLower Enum(sram_ecc_type) Var(flag_sram_ecc) Init(SRAM_ECC_ANY)
+Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY)
 Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\".
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index b8b3fecfcb4..cb8903c27cb 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -72,10 +72,14 @@ 
 
 #define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
 				 | EF_AMDGPU_FEATURE_XNACK_ON_V4)
+#define SET_XNACK_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
+				  | EF_AMDGPU_FEATURE_XNACK_ANY_V4)
 #define SET_XNACK_OFF(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
 				  | EF_AMDGPU_FEATURE_XNACK_OFF_V4)
-#define TEST_XNACK(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
-			 == EF_AMDGPU_FEATURE_XNACK_ON_V4)
+#define TEST_XNACK_ANY(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
+			     == EF_AMDGPU_FEATURE_XNACK_ANY_V4)
+#define TEST_XNACK_ON(VAR) ((VAR & EF_AMDGPU_FEATURE_XNACK_V4) \
+			    == EF_AMDGPU_FEATURE_XNACK_ON_V4)
 
 #define SET_SRAM_ECC_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_SRAMECC_V4) \
 				    | EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
@@ -884,9 +888,11 @@  main (int argc, char **argv)
 	fPIC = true;
       else if (strcmp (argv[i], "-fpic") == 0)
 	fpic = true;
-      else if (strcmp (argv[i], "-mxnack") == 0)
+      else if (strcmp (argv[i], "-mxnack=on") == 0)
 	SET_XNACK_ON (elf_flags);
-      else if (strcmp (argv[i], "-mno-xnack") == 0)
+      else if (strcmp (argv[i], "-mxnack=any") == 0)
+	SET_XNACK_ANY (elf_flags);
+      else if (strcmp (argv[i], "-mxnack=off") == 0)
 	SET_XNACK_OFF (elf_flags);
       else if (strcmp (argv[i], "-msram-ecc=on") == 0)
 	SET_SRAM_ECC_ON (elf_flags);
@@ -1045,8 +1051,9 @@  main (int argc, char **argv)
       obstack_ptr_grow (&ld_argv_obstack, gcn_s2_name);
       obstack_ptr_grow (&ld_argv_obstack, "-lgomp");
       obstack_ptr_grow (&ld_argv_obstack,
-			(TEST_XNACK (elf_flags)
-			 ? "-mxnack" : "-mno-xnack"));
+			(TEST_XNACK_ON (elf_flags) ? "-mxnack=on"
+			 : TEST_XNACK_ANY (elf_flags) ? "-mxnack=any"
+			 : "-mxnack=off"));
       obstack_ptr_grow (&ld_argv_obstack,
 			(TEST_SRAM_ECC_ON (elf_flags) ? "-msram-ecc=on"
 			 : TEST_SRAM_ECC_ANY (elf_flags) ? "-msram-ecc=any"