[committed] Improve Z flag handling on H8

Message ID 36f8c642-9cc5-9fb5-5e76-e01a001f57f7@gmail.com
State Committed
Commit 2555071c954aa5796eb3432c15739dcaae457bc3
Headers
Series [committed] Improve Z flag handling on H8 |

Commit Message

Jeff Law Oct. 1, 2022, 4:52 a.m. UTC
  This patch improves handling of the Z bit in the status register in a 
variety of ways to improve either the code size or code speed on various 
H8 subtargets.

For example, we can test the zero/nonzero status of the upper byte of a 
16 bit register using mov.b, we can move the Z or an inverted Z into a 
QImode register profitably on some subtargets. We can move Z or an 
inverted Z into the sign bit on the H8/SX profitably, etc.

I've actually had this patch in my tester for over a year, but got crazy 
busy and hadn't bothered to upstream it until now. Naturally it has been 
working all that time without regressions.

Pushed to the trunk.


Jeff
commit 2555071c954aa5796eb3432c15739dcaae457bc3
Author: Jeff Law <jeffreyalaw@gmail.com>
Date:   Sat Oct 1 00:42:15 2022 -0400

    Improve Z flag handling on H8
    
    This patch improves handling of the Z bit in the status register in a
    variety of ways to improve either the code size or code speed on various
    H8 subtargets.
    
    For example, we can test the zero/nonzero status of the upper byte of a
    16 bit register using mov.b, we can move the Z or an inverted Z into a
    QImode register profitably on some subtargets.  We can move Z or an
    inverted Z into the sign bit on the H8/SX profitably, etc.
    
    gcc/
    
            * config/h8300/h8300.md (HSI2): New iterator.
            (eqne_invert): Similarly.
            * config/h8300/testcompare.md (testhi_upper_z): New pattern.
            (cmpqi_z, cmphi_z, cmpsi_z): Likewise.
            (store_z_qi, store_z_i_qi, store_z_hi, store_z_hi_sb): New
            define_insn_and_splits and/or define_insns.
            (store_z_hi_neg, store_z_hi_and, store_z_<mode>): Likewise.
            (store_z_<mode>_neg, store_z_<mode>_and, store_z): Likewise.
  

Patch

diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md
index 2da7e7d8a7d..f592af1d5f7 100644
--- a/gcc/config/h8300/h8300.md
+++ b/gcc/config/h8300/h8300.md
@@ -221,6 +221,7 @@ 
 (define_mode_iterator QHI [QI HI])
 
 (define_mode_iterator HSI [HI SI])
+(define_mode_iterator HSI2 [HI SI])
 
 (define_mode_iterator QHSI [QI HI SI])
 (define_mode_iterator QHSI2 [QI HI SI])
@@ -236,6 +237,7 @@ 
 (define_code_iterator ors [ior xor])
 
 (define_code_iterator eqne [eq ne])
+(define_code_attr eqne_invert [(eq "ne") (ne "eq")])
 
 ;; For storing the C flag, map from the unsigned comparison to the right
 ;; code for testing the C bit.
diff --git a/gcc/config/h8300/testcompare.md b/gcc/config/h8300/testcompare.md
index 0ee3e360bea..81dce1d0bc1 100644
--- a/gcc/config/h8300/testcompare.md
+++ b/gcc/config/h8300/testcompare.md
@@ -61,6 +61,15 @@ 
   "mov.b	%t0,%t0"
   [(set_attr "length" "2")])
 
+(define_insn "*tsthi_upper_z"
+  [(set (reg:CCZ CC_REG)
+	(compare (and:HI (match_operand:HI 0 "register_operand" "r")
+			 (const_int -256))
+		 (const_int 0)))]
+  "reload_completed"
+  "mov.b	%t0,%t0"
+  [(set_attr "length" "2")])
+
 (define_insn "*tstsi_upper"
   [(set (reg:CCZN CC_REG)
 	(compare (and:SI (match_operand:SI 0 "register_operand" "r")
@@ -86,6 +95,30 @@ 
   }
   [(set_attr "length_table" "add")])
 
+(define_insn "*cmpqi_z"
+  [(set (reg:CCZ CC_REG)
+	(eq (match_operand:QI 0 "h8300_dst_operand" "rQ")
+	    (match_operand:QI 1 "h8300_src_operand" "rQi")))]
+  "reload_completed"
+  { return "cmp.b	%X1,%X0"; }
+  [(set_attr "length_table" "add")])
+
+(define_insn "*cmphi_z"
+  [(set (reg:CCZ CC_REG)
+	(eq (match_operand:HI 0 "h8300_dst_operand" "rQ")
+	    (match_operand:HI 1 "h8300_src_operand" "rQi")))]
+  "reload_completed"
+  { return "cmp.w	%T1,%T0"; }
+  [(set_attr "length_table" "add")])
+
+(define_insn "*cmpsi_z"
+  [(set (reg:CCZ CC_REG)
+	(eq (match_operand:SI 0 "h8300_dst_operand" "rQ")
+	    (match_operand:SI 1 "h8300_src_operand" "rQi")))]
+  "reload_completed"
+  { return "cmp.l	%S1,%S0"; }
+  [(set_attr "length_table" "add")])
+
 (define_insn "*cmpqi"
   [(set (reg:CC CC_REG)
 	(compare (match_operand:QI 0 "h8300_dst_operand" "rQ")
@@ -209,6 +242,8 @@ 
 	  return "xor.l\t%S0,%S0\;bist\t#0,%w0";
 	gcc_unreachable ();
       }
+    else
+      gcc_unreachable ();
   }
   [(set (attr "length") (symbol_ref "<MODE>mode == SImode ? 6 : 4"))])
 
@@ -340,3 +375,235 @@ 
 	(ashift:QHSI (<geultu_to_c>:QHSI (reg:CCC CC_REG) (const_int 0))
 		     (match_dup 3)))])
 
+;; Storing Z into a QImode destination is fairly easy on the H8/S and
+;; newer as the stc; shift; mask is just 3 insns/6 bytes.  On the H8/300H
+;; it is 4 insns/8 bytes which is a speed improvement, but a size
+;; regression relative to the branchy sequence
+;;
+;; Storing inverted Z in QImode is not profitable on the H8/300H, but
+;; is a speed improvement on the H8S.
+(define_insn_and_split "*store_z_qi"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(eq:QI (match_operand:HI 1 "register_operand" "r")
+	       (match_operand:HI 2 "register_operand" "r")))]
+  "TARGET_H8300S || !optimize_size"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CCZ CC_REG)
+	(eq:CCZ (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(ne:QI (reg:CCZ CC_REG) (const_int 0)))])
+
+(define_insn_and_split "*store_z_i_qi"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(ne:QI (match_operand:HI 1 "register_operand" "r")
+	       (match_operand:HI 2 "register_operand" "r")))]
+  "TARGET_H8300S"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CCZ CC_REG)
+	(eq:CCZ (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(eq:QI (reg:CCZ CC_REG) (const_int 0)))])
+
+(define_insn "*store_z_qi"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(ne:QI (reg:CCZ CC_REG) (const_int 0)))]
+  "(TARGET_H8300S || !optimize_size) && reload_completed"
+  {
+    if (TARGET_H8300S)
+      return "stc\tccr,%X0\;shar\t#2,%X0\;and\t#0x1,%X0";
+    else
+      return "stc\tccr,%X0\;shar\t%X0\;shar\t%X0\;and\t#0x1,%X0";
+  }
+  [(set (attr "length") (symbol_ref "TARGET_H8300S ? 6 : 8"))])
+
+(define_insn "*store_z_i_qi"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(eq:QI (reg:CCZ CC_REG) (const_int 0)))]
+  "(TARGET_H8300S || !optimize_size) && reload_completed"
+  "stc\tccr,%X0\;bld\t#2,%X0\;xor.w\t%T0,%T0\;bist\t#0,%X0";
+  [(set_attr "length" "8")])
+
+;; Storing Z or an inverted Z into a HImode destination is
+;; profitable on the H8/S and older variants, but not on the
+;; H8/SX where the branchy sequence can use the two-byte
+;; mov-immediate that is specific to the H8/SX
+(define_insn_and_split "*store_z_hi"
+  [(set (match_operand:HSI 0 "register_operand" "=r")
+	(eqne:HSI (match_operand:HSI2 1 "register_operand" "r")
+		  (match_operand:HSI2 2 "register_operand" "r")))]
+  "!TARGET_H8300SX"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CCZ CC_REG)
+	(eq:CCZ (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(<eqne_invert>:HSI (reg:CCZ CC_REG) (const_int 0)))])
+
+;; Similar, but putting the result into the sign bit
+(define_insn_and_split "*store_z_hi_sb"
+  [(set (match_operand:HSI 0 "register_operand" "=r")
+	(ashift:HSI (eqne:HSI (match_operand:HSI2 1 "register_operand" "r")
+			      (match_operand:HSI2 2 "register_operand" "r"))
+		     (const_int 15)))]
+  "!TARGET_H8300SX"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CCZ CC_REG)
+	(eq:CCZ (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(ashift:HSI (<eqne_invert>:HSI (reg:CCZ CC_REG) (const_int 0))
+		    (const_int 15)))])
+
+;; Similar, but negating the result
+(define_insn_and_split "*store_z_hi_neg"
+  [(set (match_operand:HSI 0 "register_operand" "=r")
+	(neg:HSI (eqne:HSI (match_operand:HSI2 1 "register_operand" "r")
+			   (match_operand:HSI2 2 "register_operand" "r"))))]
+  "!TARGET_H8300SX"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CCZ CC_REG)
+	(eq:CCZ (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(neg:HSI (<eqne_invert>:HSI (reg:CCZ CC_REG) (const_int 0))))])
+
+(define_insn_and_split "*store_z_hi_and"
+  [(set (match_operand:HSI 0 "register_operand" "=r")
+	(and:HSI (eqne:HSI (match_operand:HSI2 1 "register_operand" "r")
+			   (match_operand:HSI2 2 "register_operand" "r"))
+		 (match_operand:HSI 3 "register_operand" "r")))]
+  "!TARGET_H8300SX"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CCZ CC_REG)
+	(eq:CCZ (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(and:HSI (<eqne_invert>:HSI (reg:CCZ CC_REG) (const_int 0))
+		 (match_dup 3)))])
+
+(define_insn "*store_z_<mode>"
+  [(set (match_operand:HSI 0 "register_operand" "=r")
+	(eqne:HSI (reg:CCZ CC_REG) (const_int 0)))]
+  "!TARGET_H8300SX"
+  {
+    if (<MODE>mode == HImode)
+      {
+	if (<CODE> == NE)
+	  {
+	    if (TARGET_H8300S)
+	      return "stc\tccr,%X0\;shlr.b\t#2,%X0\;and.w\t#1,%T0";
+	    return "stc\tccr,%X0\;bld\t#2,%X0\;xor.w\t%T0,%T0\;bst\t#0,%X0";
+	  }
+	else
+	  return "stc\tccr,%X0\;bld\t#2,%X0\;xor.w\t%T0,%T0\;bist\t#0,%X0";
+      }
+    else if (<MODE>mode == SImode)
+      {
+	if (<CODE> == NE)
+	  {
+	    if (TARGET_H8300S)
+	      return "stc\tccr,%X0\;shlr.b\t#2,%X0\;and.l\t#1,%S0";
+	    return "stc\tccr,%X0\;bld\t#2,%X0\;xor.l\t%S0,%S0\;bst\t#0,%X0";
+	  }
+	else
+	  return "stc\tccr,%X0\;bld\t#2,%X0\;xor.l\t%S0,%S0\;bist\t#0,%X0";
+      }
+    gcc_unreachable ();
+  }
+;; XXXSImode is 2 bytes longer
+  [(set_attr "length" "8")])
+
+(define_insn "*store_z_<mode>_sb"
+  [(set (match_operand:HSI 0 "register_operand" "=r")
+	(ashift:HSI (eqne:HSI (reg:CCZ CC_REG) (const_int 0))
+		    (const_int 15)))]
+  "!TARGET_H8300SX"
+  {
+    if (<MODE>mode == HImode)
+      {
+	if (<CODE> == NE)
+	  return "stc\tccr,%X0\;bld\t#2,%X0\;xor.w\t%T0,%T0\;bst\t#7,%t0";
+	else
+	  return "stc\tccr,%X0\;bld\t#2,%X0\;xor.w\t%T0,%T0\;bist\t#7,%t0";
+      }
+    else if (<MODE>mode == SImode)
+      {
+	if (<CODE> == NE)
+	  return "stc\tccr,%X0\;bld\t#2,%X0\;xor.l\t%T0,%T0\;rotxr.l\t%S0";
+	else
+	  return "stc\tccr,%X0\;bild\t#2,%X0\;xor.l\t%T0,%T0\;rotxr.l\t%S0";
+      }
+    gcc_unreachable ();
+  }
+  ;; XXX SImode is larger
+  [(set_attr "length" "8")])
+
+(define_insn "*store_z_<mode>_neg"
+  [(set (match_operand:HSI 0 "register_operand" "=r")
+	(neg:HSI (eqne:HSI (reg:CCZ CC_REG) (const_int 0))))]
+  "!TARGET_H8300SX"
+  {
+    if (<MODE>mode == HImode)
+      {
+	if (<CODE> == NE)
+	  return "stc\tccr,%X0\;bld\t#2,%X0\;subx.b\t%X0,%X0\;exts.w\t%T0";
+	else
+	  return "stc\tccr,%X0\;bild\t#2,%X0\;subx.b\t%X0,%X0\;exts.w\t%T0";
+      }
+    else if (<MODE>mode == SImode)
+      {
+	if (<CODE> == NE)
+	  return "stc\tccr,%X0\;bld\t#2,%X0\;subx.b\t%X0,%X0\;exts.w\t%T0\;exts.l\t%S0";
+	else
+	  return "stc\tccr,%X0\;bild\t#2,%X0\;subx.b\t%X0,%X0\;exts.w\t%T0\;exts.l\t%S0";
+      }
+    gcc_unreachable ();
+  }
+  ;; XXX simode is an instruction longer
+  [(set_attr "length" "8")])
+
+(define_insn "*store_z_<mode>_and"
+  [(set (match_operand:HSI 0 "register_operand" "=r")
+	(and:HSI (eqne:HSI (reg:CCZ CC_REG) (const_int 0))
+		 (match_operand:HSI 1 "register_operand" "r")))]
+  "!TARGET_H8300SX"
+  {
+    if (<MODE>mode == HImode)
+      {
+	if (<CODE> == NE)
+	  return "bld\t#0,%X1\;stc\tccr,%X0\;band\t#2,%X0\;xor.w\t%T0,%T0\;bst\t#0,%X0";
+	else
+	  return "bild\t#0,%X1\;stc\tccr,%X0\;band\t#2,%X0\;xor.w\t%T0,%T0\;bist\t#0,X0";
+      }
+    else if (<MODE>mode == SImode)
+      {
+	if (<CODE> == NE)
+	  return "bld\t#0,%X1\;stc\tccr,%X0\;band\t#2,%X0\;xor.l\t%S0,%S0\;bst\t#0,%X0";
+	else
+	  return "bild\t#0,%X1\;stc\tccr,%X0\;band\t#2,%X0\;xor.l\t%S0,%S0\;bist\t#0,X0";
+      }
+    gcc_unreachable ();
+  }
+  ;; XXX simode is an instruction longer
+  [(set_attr "length" "8")])
+
+;; We can test the upper byte of a HImode register and the upper word
+;; of a SImode register
+
+;; We can test the upper byte of a HImode register and the upper word
+;; of a SImode register
+(define_insn_and_split "*store_z"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(eqne:HI (and:HI (match_operand:HI 1 "register_operand" "r")
+			 (const_int -256))
+		 (const_int 0)))]
+  "!TARGET_H8300SX"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CCZ CC_REG)
+	(compare (and:HI (match_dup 1) (const_int -256))
+		 (const_int 0)))
+   (set (match_dup 0)
+	(<eqne_invert>:HI (reg:CCZ CC_REG) (const_int 0)))])