RFC: New compact syntax for insn and insn_split in Machine Descriptions

Message ID patch-17151-tamar@arm.com
State New
Headers
Series RFC: New compact syntax for insn and insn_split in Machine Descriptions |

Commit Message

Tamar Christina April 18, 2023, 4:30 p.m. UTC
  Hi All,

This patch adds support for a compact syntax for specifying constraints in
instruction patterns. Credit for the idea goes to Richard Earnshaw.

I am sending up this RFC to get feedback for it's inclusion in GCC 14.
With this new syntax we want a clean break from the current limitations to make
something that is hopefully easier to use and maintain.

The idea behind this compact syntax is that often times it's quite hard to
correlate the entries in the constrains list, attributes and instruction lists.

One has to count and this often is tedious.  Additionally when changing a single
line in the insn multiple lines in a diff change, making it harder to see what's
going on.

This new syntax takes into account many of the common things that are done in MD
files.   It's also worth saying that this version is intended to deal with the
common case of a string based alternatives.   For C chunks we have some ideas
but those are not intended to be addressed here.

It's easiest to explain with an example:

normal syntax:

(define_insn_and_split "*movsi_aarch64"
  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
  "(register_operand (operands[0], SImode)
    || aarch64_reg_or_zero (operands[1], SImode))"
  "@
   mov\\t%w0, %w1
   mov\\t%w0, %w1
   mov\\t%w0, %w1
   mov\\t%w0, %1
   #
   * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
   ldr\\t%w0, %1
   ldr\\t%s0, %1
   str\\t%w1, %0
   str\\t%s1, %0
   adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
   adr\\t%x0, %c1
   adrp\\t%x0, %A1
   fmov\\t%s0, %w1
   fmov\\t%w0, %s1
   fmov\\t%s0, %s1
   * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
  "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
   [(const_int 0)]
   "{
       aarch64_expand_mov_immediate (operands[0], operands[1]);
       DONE;
    }"
  ;; The "mov_imm" type for CNT is just a placeholder.
  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
		    load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
]
)

New syntax:

(define_insn_and_split "*movsi_aarch64"
  [(set (match_operand:SI 0 "nonimmediate_operand")
	(match_operand:SI 1 "aarch64_mov_operand"))]
  "(register_operand (operands[0], SImode)
    || aarch64_reg_or_zero (operands[1], SImode))"
  "@@ (cons: 0 1; attrs: type arch length)
   [=r, r  ; mov_reg  , *   , 4] mov\t%w0, %w1
   [k , r  ; mov_reg  , *   , 4] ^
   [r , k  ; mov_reg  , *   , 4] ^
   [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
   [r , n  ; mov_imm  , *   , *] #
   [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
   [r , m  ; load_4   , *   , 4] ldr\t%w0, %1
   [w , m  ; load_4   , fp  , 4] ldr\t%s0, %1
   [m , rZ ; store_4  , *   , 4] str\t%w1, %0
   [m , w  ; store_4  , fp  , 4] str\t%s1, %0
   [r , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
   [r , Usa; adr      , *   , 4] adr\t%x0, %c1
   [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
   [w , rZ ; f_mcr    , fp  , 4] fmov\t%s0, %w1
   [r , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
   [w , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
   [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
  "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
  [(const_int 0)]
  {
    aarch64_expand_mov_immediate (operands[0], operands[1]);
    DONE;
  }
  ;; The "mov_imm" type for CNT is just a placeholder.
)

The patch contains some more rewritten examples for both Arm and AArch64.  I
have included them for examples in this RFC but the final version posted in
GCC 14 will have these split out.

The main syntax rules are as follows (See docs for full rules):
  - Template must start with "@@" to use the new syntax.
  - "@@" is followed by a layout in parentheses which is "cons:" followed by
    a list of match_operand/match_scratch IDs, then a semicolon, then the
    same for attributes ("attrs:"). Both sections are optional (so you can
    use only cons, or only attrs, or both), and cons must come before attrs
    if present.
  - Each alternative begins with any amount of whitespace.
  - Following the whitespace is a comma-separated list of constraints and/or
    attributes within brackets [], with sections separated by a semicolon.
  - Following the closing ']' is any amount of whitespace, and then the actual
    asm output.
  - Spaces are allowed in the list (they will simply be removed).
  - All alternatives should be specified: a blank list should be
    "[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if
    you leave certain attributes empty, I have found).
  - The actual constraint string in the match_operand or match_scratch, and
    the attribute string in the set_attr, must be blank or an empty string
    (you can't combine the old and new syntaxes).
  - The common idion * return can be shortened by using <<.
  - Any unexpanded iterators left during processing will result in an error at
    compile time.   If for some reason <> is needed in the output then these
    must be escaped using \.
  - Inside a @@ block '' is treated as "" when there are multiple characters
    inside the single quotes.  This version does not handle multi byte literals
    like specifying characters as their numerical encoding, like \003 nor does
    it handle unicode, especially multibyte encodings.  This feature may be more
    trouble than it's worth so have no finished it off, however this means one
    can use 'foo' instead of \"foo\" to denote a multicharacter string.
  - Inside an @@ block any unexpanded iterators will result in a compile time
    fault instead of incorrect assembly being generated at runtime.  If the
    literal <> is needed in the output this needs to be escaped with \<\>.
  - This check is not performed inside C blocks (lines starting with *).
  - Instead of copying the previous instruction again in the next pattern, one
    can use ^ to refer to the previous asm string.

This patch works by blindly transforming the new syntax into the old syntax,
so it doesn't do extensive checking. However, it does verify that:
	- The correct number of constraints/attributes are specified.
	- You haven't mixed old and new syntax.
	- The specified operand IDs/attribute names actually exist.

If something goes wrong, it may write invalid constraints/attributes/template
back into the rtx. But this shouldn't matter because error_at will cause the
program to fail on exit anyway.

Because this transformation occurs as early as possible (before patterns are
queued), the rest of the compiler can completely ignore the new syntax and
assume that the old syntax will always be used.

This doesn't seem to have any measurable effect on the runtime of gen*
programs.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Any feedback?

Thanks,
Tamar

gcc/ChangeLog:

	* config/aarch64/aarch64.md (arches): Add nosimd.
	(*mov<mode>_aarch64, *movsi_aarch64, *movdi_aarch64): Rewrite to
	compact syntax.
	* config/arm/arm.md (*arm_addsi3): Rewrite to compact syntax.
	* doc/md.texi: Document new syntax.
	* gensupport.cc (class conlist, add_constraints, add_attributes,
	create_missing_attributes, skip_spaces, expect_char,
	preprocess_compact_syntax, parse_section_layout, parse_section,
	convert_syntax): New.
	(process_rtx): Check for conversion.
	* genoutput.cc (process_template): Check for unresolved iterators.
	(class data): Add compact_syntax_p.
	(gen_insn): Use it.
	* gensupport.h (compact_syntax): New.
	(hash-set.h): Include.

Co-Authored-By: Omar Tahir <Omar.Tahir2@arm.com>

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 022eef80bc1e93299f329610dcd2321917d5770a..331eb2ff57a0e1ff300f3321f154829a57772679 100644




--
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 022eef80bc1e93299f329610dcd2321917d5770a..331eb2ff57a0e1ff300f3321f154829a57772679 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -375,7 +375,7 @@ (define_constants
 ;; As a convenience, "fp_q" means "fp" + the ability to move between
 ;; Q registers and is equivalent to "simd".
 
-(define_enum "arches" [ any rcpc8_4 fp fp_q simd sve fp16])
+(define_enum "arches" [ any rcpc8_4 fp fp_q simd nosimd sve fp16])
 
 (define_enum_attr "arch" "arches" (const_string "any"))
 
@@ -406,6 +406,9 @@ (define_attr "arch_enabled" "no,yes"
 	(and (eq_attr "arch" "fp_q, simd")
 	     (match_test "TARGET_SIMD"))
 
+	(and (eq_attr "arch" "nosimd")
+	     (match_test "!TARGET_SIMD"))
+
 	(and (eq_attr "arch" "fp16")
 	     (match_test "TARGET_FP_F16INST"))
 
@@ -1215,44 +1218,26 @@ (define_expand "mov<mode>"
 )
 
 (define_insn "*mov<mode>_aarch64"
-  [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,    w,r  ,r,w, m,m,r,w,w")
-	(match_operand:SHORT 1 "aarch64_mov_operand"  " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))]
+  [(set (match_operand:SHORT 0 "nonimmediate_operand")
+	(match_operand:SHORT 1 "aarch64_mov_operand"))]
   "(register_operand (operands[0], <MODE>mode)
     || aarch64_reg_or_zero (operands[1], <MODE>mode))"
-{
-   switch (which_alternative)
-     {
-     case 0:
-       return "mov\t%w0, %w1";
-     case 1:
-       return "mov\t%w0, %1";
-     case 2:
-       return aarch64_output_scalar_simd_mov_immediate (operands[1],
-							<MODE>mode);
-     case 3:
-       return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
-     case 4:
-       return "ldr<size>\t%w0, %1";
-     case 5:
-       return "ldr\t%<size>0, %1";
-     case 6:
-       return "str<size>\t%w1, %0";
-     case 7:
-       return "str\t%<size>1, %0";
-     case 8:
-       return TARGET_SIMD ? "umov\t%w0, %1.<v>[0]" : "fmov\t%w0, %s1";
-     case 9:
-       return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1";
-     case 10:
-       return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>[0]" : "fmov\t%s0, %s1";
-     default:
-       gcc_unreachable ();
-     }
-}
+  "@@ (cons: 0 1; attrs: type arch)
+  [=r, r    ; mov_reg        , *     ] mov\t%w0, %w1
+  [r , M    ; mov_imm        , *     ] mov\t%w0, %1
+  [w , D<hq>; neon_move      , simd  ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
+  [r , Usv  ; mov_imm        , sve   ] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
+  [r , m    ; load_4         , *     ] ldr<size>\t%w0, %1
+  [w , m    ; load_4         , *     ] ldr\t%<size>0, %1
+  [m , rZ   ; store_4        , *     ] str<size>\\t%w1, %0
+  [m , w    ; store_4        , *     ] str\t%<size>1, %0
+  [r , w    ; neon_to_gp<q>  , simd  ] umov\t%w0, %1.<v>[0]
+  [r , w    ; neon_to_gp<q>  , nosimd] fmov\t%w0, %s1
+  [w , rZ   ; neon_from_gp<q>, simd  ] dup\t%0.<Vallxd>, %w1
+  [w , rZ   ; neon_from_gp<q>, nosimd] fmov\t%s0, %w1
+  [w , w    ; neon_dup       , simd  ] dup\t%<Vetype>0, %1.<v>[0]
+  [w , w    ; neon_dup       , nosimd] fmov\t%s0, %s1"
   ;; The "mov_imm" type for CNT is just a placeholder.
-  [(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
-		     store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
-   (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")]
 )
 
 (define_expand "mov<mode>"
@@ -1289,79 +1274,69 @@ (define_expand "mov<mode>"
 )
 
 (define_insn_and_split "*movsi_aarch64"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
-	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
   "(register_operand (operands[0], SImode)
     || aarch64_reg_or_zero (operands[1], SImode))"
-  "@
-   mov\\t%w0, %w1
-   mov\\t%w0, %w1
-   mov\\t%w0, %w1
-   mov\\t%w0, %1
-   #
-   * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
-   ldr\\t%w0, %1
-   ldr\\t%s0, %1
-   str\\t%w1, %0
-   str\\t%s1, %0
-   adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
-   adr\\t%x0, %c1
-   adrp\\t%x0, %A1
-   fmov\\t%s0, %w1
-   fmov\\t%w0, %s1
-   fmov\\t%s0, %s1
-   * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
+  "@@ (cons: 0 1; attrs: type arch length)
+   [=r, r  ; mov_reg  , *   , 4] mov\t%w0, %w1
+   [k , r  ; mov_reg  , *   , 4] ^
+   [r , k  ; mov_reg  , *   , 4] ^
+   [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
+   [r , n  ; mov_imm  , *   ,16] #
+   [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
+   [r , m  ; load_4   , *   , 4] ldr\t%w0, %1
+   [w , m  ; load_4   , fp  , 4] ldr\t%s0, %1
+   [m , rZ ; store_4  , *   , 4] str\t%w1, %0
+   [m , w  ; store_4  , fp  , 4] str\t%s1, %0
+   [r , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
+   [r , Usa; adr      , *   , 4] adr\t%x0, %c1
+   [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
+   [w , rZ ; f_mcr    , fp  , 4] fmov\t%s0, %w1
+   [r , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
+   [w , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
+   [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
-   [(const_int 0)]
-   "{
-       aarch64_expand_mov_immediate (operands[0], operands[1]);
-       DONE;
-    }"
+  [(const_int 0)]
+  {
+    aarch64_expand_mov_immediate (operands[0], operands[1]);
+    DONE;
+  }
   ;; The "mov_imm" type for CNT is just a placeholder.
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
-		    load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
-   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
-   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
-]
 )
 
 (define_insn_and_split "*movdi_aarch64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m,   r,  r,  r, w,r,w, w")
-	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(match_operand:DI 1 "aarch64_mov_operand"))]
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
-  "@
-   mov\\t%x0, %x1
-   mov\\t%0, %x1
-   mov\\t%x0, %1
-   * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";
-   #
-   * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
-   ldr\\t%x0, %1
-   ldr\\t%d0, %1
-   str\\t%x1, %0
-   str\\t%d1, %0
-   * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\";
-   adr\\t%x0, %c1
-   adrp\\t%x0, %A1
-   fmov\\t%d0, %x1
-   fmov\\t%x0, %d1
-   fmov\\t%d0, %d1
-   * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
-   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
-    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
-   [(const_int 0)]
-   "{
-       aarch64_expand_mov_immediate (operands[0], operands[1]);
-       DONE;
-    }"
+  "@@ (cons: 0 1; attrs: type arch length)
+   [=r, r  ; mov_reg  , *   , 4] mov\t%x0, %x1
+   [k , r  ; mov_reg  , *   , 4] mov\t%0, %x1
+   [r , k  ; mov_reg  , *   , 4] mov\t%x0, %1
+   [r , O  ; mov_imm  , *   , 4] << aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? 'mov\t%x0, %1' : 'mov\t%w0, %1';
+   [r , n  ; mov_imm  , *   ,16] #
+   [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
+   [r , m  ; load_8   , *   , 4] ldr\t%x0, %1
+   [w , m  ; load_8   , fp  , 4] ldr\t%d0, %1
+   [m , rZ ; store_8  , *   , 4] str\t%x1, %0
+   [m , w  ; store_8  , fp  , 4] str\t%d1, %0
+   [r , Usw; load_8   , *   , 8] << TARGET_ILP32 ? 'adrp\t%0, %A1;ldr\t%w0, [%0, %L1]' : 'adrp\t%0, %A1;ldr\t%0, [%0, %L1]';
+   [r , Usa; adr      , *   , 4] adr\t%x0, %c1
+   [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
+   [w , rZ ; f_mcr    , fp  , 4] fmov\t%d0, %x1
+   [r , w  ; f_mrc    , fp  , 4] fmov\t%x0, %d1
+   [w , w  ; fmov     , fp  , 4] fmov\t%d0, %d1
+   [w , Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
+  "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
+   && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+  [(const_int 0)]
+  {
+      aarch64_expand_mov_immediate (operands[0], operands[1]);
+      DONE;
+  }
   ;; The "mov_imm" type for CNTD is just a placeholder.
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
-		     load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
-		     fmov,neon_move")
-   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
-   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")]
 )
 
 (define_insn "insv_imm<mode>"
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index cbfc4543531452b0708a38bdf4abf5105b54f8b7..16c50b4a7c414a72b234cef7745a37745e6a41fc 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -924,27 +924,27 @@ (define_peephole2
 ;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
 ;; put the duplicated register first, and not try the commutative version.
 (define_insn_and_split "*arm_addsi3"
-  [(set (match_operand:SI          0 "s_register_operand" "=rk,l,l ,l ,r ,k ,r,k ,r ,k ,r ,k,k,r ,k ,r")
-	(plus:SI (match_operand:SI 1 "s_register_operand" "%0 ,l,0 ,l ,rk,k ,r,r ,rk,k ,rk,k,r,rk,k ,rk")
-		 (match_operand:SI 2 "reg_or_int_operand" "rk ,l,Py,Pd,rI,rI,k,rI,Pj,Pj,L ,L,L,PJ,PJ,?n")))]
-  "TARGET_32BIT"
-  "@
-   add%?\\t%0, %0, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %2, %1
-   add%?\\t%0, %1, %2
-   addw%?\\t%0, %1, %2
-   addw%?\\t%0, %1, %2
-   sub%?\\t%0, %1, #%n2
-   sub%?\\t%0, %1, #%n2
-   sub%?\\t%0, %1, #%n2
-   subw%?\\t%0, %1, #%n2
-   subw%?\\t%0, %1, #%n2
-   #"
+  [(set (match_operand:SI 0 "s_register_operand")
+        (plus:SI (match_operand:SI 1 "s_register_operand")
+                 (match_operand:SI 2 "reg_or_int_operand")))]
+  "TARGET_32BIT"
+  "@@ (cons: 0 1 2; attrs: length predicable_short_it arch)
+   [=rk, %0, rk; 2,  yes, t2] add%?\\t%0, %0, %2
+   [l,   l,  l ; 4,  yes, t2] add%?\\t%0, %1, %2
+   [l,   0,  Py; 4,  yes, t2] add%?\\t%0, %1, %2
+   [l,   l,  Pd; 4,  yes, t2] add%?\\t%0, %1, %2
+   [r,   rk, rI; 4,  no,  * ] add%?\\t%0, %1, %2
+   [k,   k,  rI; 4,  no,  * ] add%?\\t%0, %1, %2
+   [r,   r,  k ; 4,  no,  * ] add%?\\t%0, %2, %1
+   [k,   r,  rI; 4,  no,  a ] add%?\\t%0, %1, %2
+   [r,   rk, Pj; 4,  no,  t2] addw%?\\t%0, %1, %2
+   [k,   k,  Pj; 4,  no,  t2] addw%?\\t%0, %1, %2
+   [r,   rk, L ; 4,  no,  * ] sub%?\\t%0, %1, #%n2
+   [k,   k,  L ; 4,  no,  * ] sub%?\\t%0, %1, #%n2
+   [k,   r,  L ; 4,  no,  a ] sub%?\\t%0, %1, #%n2
+   [r,   rk, PJ; 4,  no,  t2] subw%?\\t%0, %1, #%n2
+   [k,   k,  PJ; 4,  no,  t2] subw%?\\t%0, %1, #%n2
+   [r,   rk, ?n; 16, no,  * ] #"
   "TARGET_32BIT
    && CONST_INT_P (operands[2])
    && !const_ok_for_op (INTVAL (operands[2]), PLUS)
@@ -956,10 +956,10 @@ (define_insn_and_split "*arm_addsi3"
 		      operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "2,4,4,4,4,4,4,4,4,4,4,4,4,4,4,16")
+  [(set_attr "length")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no,no,no,no,no,no,no,no")
-   (set_attr "arch" "t2,t2,t2,t2,*,*,*,a,t2,t2,*,*,a,t2,t2,*")
+   (set_attr "predicable_short_it")
+   (set_attr "arch")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_imm")
 		      (const_string "alu_sreg")))
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..199f2315432dc56cadfdfc03a8ab381fe02a43b3 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -27,6 +27,7 @@ See the next chapter for information on the C header file.
                         from such an insn.
 * Output Statement::    For more generality, write C code to output
                         the assembler code.
+* Compact Syntax::      Compact syntax for writing Machine descriptors.
 * Predicates::          Controlling what kinds of operands can be used
                         for an insn.
 * Constraints::         Fine-tuning operand selection.
@@ -713,6 +714,211 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template:
 @end group
 @end smallexample
 
+@node Compact Syntax
+@section Compact Syntax
+@cindex compact syntax
+
+In cases where the number of alternatives in a @code{define_insn} or
+@code{define_insn_and_split} are large then it may be beneficial to use the
+compact syntax when specifying alternatives.
+
+This syntax puts the constraints and attributes on the same horizontal line as
+the instruction assembly template.
+
+As an example
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv"))]
+  ""
+  "@
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %1
+   #
+   * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
+  "&& true"
+   [(const_int 0)]
+  @{
+     aarch64_expand_mov_immediate (operands[0], operands[1]);
+     DONE;
+  @}
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm")
+   (set_attr "arch"   "*,*,*,*,*,sve")
+   (set_attr "length" "4,4,4,4,*,  4")
+]
+)
+@end group
+@end smallexample
+
+can be better expressed as:
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)
+   [=r, r  ; mov_reg  , *   , 4] mov\t%w0, %w1
+   [k , r  ; mov_reg  , *   , 4] ^
+   [r , k  ; mov_reg  , *   , 4] ^
+   [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
+   [r , n  ; mov_imm  , *   , *] #
+   [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
+  "&& true"
+  [(const_int 0)]
+  @{
+    aarch64_expand_mov_immediate (operands[0], operands[1]);
+    DONE;
+  @}
+)
+@end group
+@end smallexample
+
+The syntax rules are as follows:
+@itemize @bullet
+@item
+Template must start with "@@" to use the new syntax.
+
+@item
+"@@" is followed by a layout in parentheses which is @samp{"cons:"} followed by
+a list of @code{match_operand}/@code{match_scratch} operand numbers, then a
+semicolon, followed by the same for attributes (@samp{"attrs:"}). Both sections
+are optional (so you can use only @samp{cons}, or only @samp{attrs}, or both),
+and @samp{cons} must come before @samp{attrs} if present.
+
+@item
+Each alternative begins with any amount of whitespace.
+
+@item
+Following the whitespace is a comma-separated list of @samp{constraints} and/or
+@samp{attributes} within brackets @code{[]}, with sections separated by a
+semicolon.
+
+@item
+Should you want to copy the previous asm line, the symbol @code{^} can be used.
+This allows less copy pasting between alternative and reduces the number of
+lines to update on changes.
+
+@item
+When using C functions for output, the idiom @code{* return <function>;} can be
+replaced with the shorthand @code{<< <function>;}.
+
+@item
+Following the closing ']' is any amount of whitespace, and then the actual asm
+output.
+
+@item
+Spaces are allowed in the list (they will simply be removed).
+
+@item
+All alternatives should be specified: a blank list should be "[,,]", "[,,;,]"
+etc., not "[]" or "".
+
+@item
+Within an @@ block, @code{''} is treated the same as @code{""} in cases where a
+single character would be invalid in C.  This means a multicharacter string can
+be created using @code{''} which allows for less escaping.
+
+@item
+Any unexpanded iterators within the block will result in a compile time error
+rather than accepting the generating the @code{<..>} in the output asm.  If the
+literal @code{<..>} is required it should be escaped as @code{\<..\>}.
+
+@item
+Within an @@ block, any iterators that do not get expanded will result in an
+error.  If for some reason it is required to have @code{<>} in the output then
+these must be escaped using @backslashchar{}.
+
+@item
+The actual constraint string in the @code{match_operand} or
+@code{match_scratch}, and the attribute string in the @code{set_attr}, must be
+blank or an empty string (you can't combine the old and new syntaxes).
+
+@item
+@code{set_attr} are optional.  If a @code{set_attr} is defined in the
+@samp{attrs} section then that declaration can be both definition and
+declaration.  If both @samp{attrs} and @code{set_attr} are defined for the same
+entry then the attribute string must be empty or blank.
+
+@item
+Additional @code{set_attr} can be specified other than the ones in the
+@samp{attrs} list.  These must use the @samp{normal} syntax and must be defined
+after all @samp{attrs} specified.
+
+In other words, the following are valid:
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)"
+  ...
+  [(set_attr "type")]
+  [(set_attr "arch")]
+  [(set_attr "length")]
+  [(set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+and
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)"
+  ...
+  [(set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+but these are not valid:
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)"
+  ...
+  [(set_attr "type")]
+  [(set_attr "arch")]
+  [(set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+and
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)"
+  ...
+  [(set_attr "type")]
+  [(set_attr "foo" "mov_imm")]
+  [(set_attr "arch")]
+  [(set_attr "length")]
+)
+@end group
+@end smallexample
+
+because the order of the entries don't match and new entries must be last.
+@end itemize
+
 @node Predicates
 @section Predicates
 @cindex predicates
diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
index 163e8dfef4ca2c2c92ce1cf001ee6be40a54ca3e..4e67cd6ca5356c62165382de01da6bbc6f3c5fa2 100644
--- a/gcc/genoutput.cc
+++ b/gcc/genoutput.cc
@@ -91,6 +91,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "errors.h"
 #include "read-md.h"
 #include "gensupport.h"
+#include <string>
 
 /* No instruction can have more operands than this.  Sorry for this
    arbitrary limit, but what machine will have an instruction with
@@ -157,6 +158,7 @@ public:
   int n_alternatives;		/* Number of alternatives in each constraint */
   int operand_number;		/* Operand index in the big array.  */
   int output_format;		/* INSN_OUTPUT_FORMAT_*.  */
+  bool compact_syntax_p;
   struct operand_data operand[MAX_MAX_OPERANDS];
 };
 
@@ -700,12 +702,37 @@ process_template (class data *d, const char *template_code)
 	  if (sp != ep)
 	    message_at (d->loc, "trailing whitespace in output template");
 
-	  while (cp < sp)
+	  /* Check for any unexpanded iterators.  */
+	  std::string buff (cp, sp - cp);
+	  if (bp[0] != '*' && d->compact_syntax_p)
 	    {
-	      putchar (*cp);
-	      cp++;
+	      size_t start = buff.find ('<');
+	      size_t end = buff.find ('>', start + 1);
+	      if (end != std::string::npos || start != std::string::npos)
+		{
+		  if (end == std::string::npos || start == std::string::npos)
+		    fatal_at (d->loc, "unmatched angle brackets, likely an "
+			      "error in iterator syntax in %s", buff.c_str ());
+
+		  if (start != 0
+		      && buff[start-1] == '\\'
+		      && buff[end-1] == '\\')
+		    {
+		      /* Found a valid escape sequence, erase the characters for
+			 output.  */
+		      buff.erase (end-1, 1);
+		      buff.erase (start-1, 1);
+		    }
+		  else
+		    fatal_at (d->loc, "unresolved iterator '%s' in '%s'",
+			      buff.substr(start+1, end - start-1).c_str (),
+			      buff.c_str ());
+		}
 	    }
 
+	  printf ("%s", buff.c_str ());
+	  cp = sp;
+
 	  if (!found_star)
 	    puts ("\",");
 	  else if (*bp != '*')
@@ -881,6 +908,8 @@ gen_insn (md_rtx_info *info)
   else
     d->name = 0;
 
+  d->compact_syntax_p = compact_syntax.contains (insn);
+
   /* Build up the list in the same order as the insns are seen
      in the machine description.  */
   d->next = 0;
diff --git a/gcc/gensupport.h b/gcc/gensupport.h
index a1edfbd71908b6244b40f801c6c01074de56777e..7925e22ed418767576567cad583bddf83c0846b1 100644
--- a/gcc/gensupport.h
+++ b/gcc/gensupport.h
@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_GENSUPPORT_H
 #define GCC_GENSUPPORT_H
 
+#include "hash-set.h"
 #include "read-md.h"
 
 struct obstack;
@@ -218,6 +219,8 @@ struct pattern_stats
   int num_operand_vars;
 };
 
+extern hash_set<rtx> compact_syntax;
+
 extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec);
 extern void compute_test_codes (rtx, file_location, char *);
 extern file_location get_file_location (rtx);
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index f9efc6eb7572a44b8bb154b0b22be3815bd0d244..c6a731968d2d6c7c9b01ad00e9dabb2b6d5f173e 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -27,12 +27,16 @@
 #include "read-md.h"
 #include "gensupport.h"
 #include "vec.h"
+#include <string>
+#include <vector>
 
 #define MAX_OPERANDS 40
 
 static rtx operand_data[MAX_OPERANDS];
 static rtx match_operand_entries_in_pattern[MAX_OPERANDS];
 static char used_operands_numbers[MAX_OPERANDS];
+/* List of entries which are part of the new syntax.  */
+hash_set<rtx> compact_syntax;
 
 
 /* In case some macros used by files we include need it, define this here.  */
@@ -545,6 +549,532 @@ gen_rewrite_sequence (rtvec vec)
   return new_vec;
 }
 
+/* The following is for handling the compact syntax for constraints and
+   attributes.
+
+   The normal syntax looks like this:
+
+       ...
+       (match_operand: 0 "s_register_operand" "r,I,k")
+       (match_operand: 2 "s_register_operand" "r,k,I")
+       ...
+       "@
+	<asm>
+	<asm>
+	<asm>"
+       ...
+       (set_attr "length" "4,8,8")
+
+   The compact syntax looks like this:
+
+       ...
+       (match_operand: 0 "s_register_operand")
+       (match_operand: 2 "s_register_operand")
+       ...
+       "@@ (cons: 0 2; attrs: length)
+	[r,r; 4] <asm>
+	[I,k; 8] <asm>
+	[k,I; 8] <asm>"
+       ...
+       (set_attr "length")
+
+   This is the only place where this syntax needs to be handled.  Relevant
+   patterns are transformed from compact to the normal syntax before they are
+   queued, so none of the gen* programs need to know about this syntax at all.
+
+   Conversion process (convert_syntax):
+
+   0) Check that pattern actually uses new syntax (check for "@@").
+
+   1) Get the "layout", i.e. the "(cons: 0 2; attrs: length)" from the above
+      example.  cons must come first; both are optional. Set up two vecs,
+      convec and attrvec, for holding the results of the transformation.
+
+   2) For each alternative: parse the list of constraints and/or attributes,
+      and enqueue them in the relevant lists in convec and attrvec.  By the end
+      of this process, convec[N].con and attrvec[N].con should contain regular
+      syntax constraint/attribute lists like "r,I,k".  Copy the asm to a string
+      as we go.
+
+   3) Search the rtx and write the constraint and attribute lists into the
+      correct places. Write the asm back into the template.  */
+
+/* Helper class for shuffling constraints/attributes in convert_syntax and
+   add_constraints/add_attributes.  This includes commas but not whitespace.  */
+
+class conlist {
+private:
+  std::string con;
+
+public:
+  std::string name;
+
+  /* [ns..ns + len) should be a string with the id of the rtx to match
+     i.e. if rtx is the relevant match_operand or match_scratch then
+     [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then
+     [ns..ns + len) should equal XSTR (rtx, 0).  */
+  conlist (const char *ns, unsigned int len)
+  {
+    name.assign (ns, len);
+  }
+
+  /* Adds a character to the end of the string.  */
+  void add (char c)
+  {
+    con += c;
+  }
+
+  /* Output the string in the form of a brand-new char *, then effectively
+     clear the internal string by resetting len to 0.  */
+  char * out ()
+  {
+    /* Final character is always a trailing comma, so strip it out.  */
+    char * q = xstrndup (con.c_str (), con.size () - 1);
+    con.clear ();
+    return q;
+  }
+};
+
+typedef std::vector<conlist> vec_conlist;
+
+/* Add constraints to an rtx. The match_operand/match_scratch that are matched
+   must be in depth-first order i.e. read from top to bottom in the pattern.
+   index is the index of the conlist we are up to so far.
+   This function is similar to remove_constraints.
+   Errors if adding the constraints would overwrite existing constraints.
+   Returns 1 + index of last conlist to be matched.  */
+
+static unsigned int
+add_constraints (rtx part, file_location loc, unsigned int index,
+		 vec_conlist &cons)
+{
+  const char *format_ptr;
+  char id[3];
+
+  if (part == NULL_RTX || index == cons.size ())
+    return index;
+
+  /* If match_op or match_scr, check if we have the right one, and if so, copy
+     over the constraint list.  */
+  if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH)
+    {
+      int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1;
+
+      snprintf (id, 3, "%d", XINT (part, 0));
+      if (cons[index].name.compare (id) == 0)
+	{
+	  if (XSTR (part, field)[0] != '\0')
+	    {
+	      error_at (loc, "can't mix normal and compact constraint syntax");
+	      return cons.size ();
+	    }
+	  XSTR (part, field) = cons[index].out ();
+
+	  ++index;
+	}
+    }
+
+  format_ptr = GET_RTX_FORMAT (GET_CODE (part));
+
+  /* Recursively search the rtx.  */
+  for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++)
+    switch (*format_ptr++)
+      {
+      case 'e':
+      case 'u':
+	index = add_constraints (XEXP (part, i), loc, index, cons);
+	break;
+      case 'E':
+	if (XVEC (part, i) != NULL)
+	  for (int j = 0; j < XVECLEN (part, i); j++)
+	    index = add_constraints (XVECEXP (part, i, j), loc, index, cons);
+	break;
+      default:
+	continue;
+      }
+
+  return index;
+}
+
+/* Add attributes to an rtx. The attributes that are matched must be in order
+   i.e. read from top to bottom in the pattern.
+   Errors if adding the attributes would overwrite existing attributes.
+   Returns 1 + index of last conlist to be matched.  */
+
+static unsigned int
+add_attributes (rtx x, file_location loc, vec_conlist &attrs)
+{
+  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
+  unsigned int index = 0;
+
+  if (XVEC (x, attr_index) == NULL)
+    return index;
+
+  for (int i = 0; i < XVECLEN (x, attr_index); ++i)
+    {
+      rtx part = XVECEXP (x, attr_index, i);
+
+      if (GET_CODE (part) != SET_ATTR)
+	continue;
+
+      if (attrs[index].name.compare (XSTR (part, 0)) == 0)
+	{
+	  if (XSTR (part, 1) && XSTR (part, 1)[0] != '\0')
+	    {
+	      error_at (loc, "can't mix normal and compact attribute syntax");
+	      break;
+	    }
+	  XSTR (part, 1) = attrs[index].out ();
+
+	  ++index;
+	  if (index == attrs.size ())
+	    break;
+	}
+    }
+
+  return index;
+}
+
+/* Modify the attributes list to make space for the implicitly declared
+   attributes in the attrs: list.  */
+
+static void
+create_missing_attributes (rtx x, file_location /* loc */, vec_conlist &attrs)
+{
+  if (attrs.empty ())
+    return;
+
+  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
+  vec_conlist missing;
+
+  /* This is an O(n*m) loop but it's fine, both n and m will always be very
+     small.  */
+  for (conlist cl : attrs)
+    {
+      bool found = false;
+      for (int i = 0; XVEC (x, attr_index) && i < XVECLEN (x, attr_index); ++i)
+	{
+	  rtx part = XVECEXP (x, attr_index, i);
+
+	  if (GET_CODE (part) != SET_ATTR
+	      || cl.name.compare (XSTR (part, 0)) == 0)
+	    {
+	      found = true;
+	      break;
+	    }
+	}
+
+      if (!found)
+	missing.push_back (cl);
+    }
+
+  rtvec orig = XVEC (x, attr_index);
+  size_t n_curr = orig ? XVECLEN (x, attr_index) : 0;
+  rtvec copy = rtvec_alloc (n_curr + missing.size ());
+
+  /* Create a shallow copy of existing entries.  */
+  memcpy (&copy->elem[missing.size ()], &orig->elem[0], sizeof (rtx) * n_curr);
+  XVEC (x, attr_index) = copy;
+
+  /* Create the new elements.  */
+  for (unsigned i = 0; i < missing.size (); i++)
+    {
+      rtx attr = rtx_alloc (SET_ATTR);
+      XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
+      XSTR (attr, 1) = NULL;
+      XVECEXP (x, attr_index, i) = attr;
+    }
+
+  return;
+}
+
+/* Consumes spaces and tabs.  */
+
+static inline void
+skip_spaces (const char **str)
+{
+  while (**str == ' ' || **str == '\t')
+    (*str)++;
+}
+
+/* Consumes the given character, if it's there.  */
+
+static inline bool
+expect_char (const char **str, char c)
+{
+  if (**str != c)
+    return false;
+  (*str)++;
+  return true;
+}
+
+/* Parses the section layout that follows a "@@" if using new syntax. Builds
+   a vector for a single section. E.g. if we have "attrs: length arch)..."
+   then list will have two elements, the first for "length" and the second
+   for "arch".  */
+
+static void
+parse_section_layout (const char **templ, const char *label,
+		      vec_conlist &list)
+{
+  const char *name_start;
+  size_t label_len = strlen (label);
+  if (strncmp (label, *templ, label_len) == 0)
+    {
+      *templ += label_len;
+
+      /* Gather the names.  */
+      while (**templ != ';' && **templ != ')')
+	{
+	  skip_spaces (templ);
+	  name_start = *templ;
+	  int len = 0;
+	  while ((*templ)[len] != ' ' && (*templ)[len] != '\t'
+		 && (*templ)[len] != ';' && (*templ)[len] != ')')
+	    len++;
+	  *templ += len;
+	  list.push_back (conlist (name_start, len));
+	}
+    }
+}
+
+/* Parse a section, a section is defined as a named space separated list, e.g.
+
+   foo: a b c
+
+   is a section named "foo" with entries a,b and c.  */
+
+static void
+parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no,
+	       vec_conlist &list, file_location loc, const char *name)
+{
+  unsigned int i;
+
+  /* Go through the list, one character at a time, adding said character
+     to the correct string.  */
+  for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
+    {
+      if (**templ != ' ' && **templ != '\t')
+	{
+	  list[i].add(**templ);
+	  if (**templ == ',')
+	    {
+	      ++i;
+	      if (i == n_elems)
+		fatal_at (loc, "too many %ss in alternative %d: expected %d",
+			  name, alt_no, n_elems);
+	    }
+	}
+    }
+
+  if (i + 1 < n_elems)
+    fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
+	      name, alt_no, n_elems, i);
+
+  list[i].add(',');
+}
+
+/* The compact syntax has more convience syntaxes.  As such we post process
+   the lines to get them back to something the normal syntax understands.  */
+
+static void
+preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
+			   std::string &last_line)
+{
+  /* Check if we're copying the last statement.  */
+  if (line.find ("^") == 0 && line.size () == 1)
+    {
+      if (last_line.empty ())
+	fatal_at (loc, "found instruction to copy previous line (^) in"
+		       "alternative %d but no previous line to copy", alt_no);
+      line = last_line;
+      return;
+    }
+
+  std::string result;
+  std::string buffer;
+  /* Check if we have << which means return c statement.  */
+  if (line.find ("<<") == 0)
+    {
+      result.append ("* return ");
+      buffer.append (line.substr (3));
+    }
+  else
+    buffer.append (line);
+
+  /* Now perform string expansion.  Replace ' with " if more than one character
+     in the string.  "*/
+  bool double_quoted = false;
+  bool quote_open = false;
+  for (unsigned i = 0; i < buffer.length (); i++)
+    {
+      char chr = buffer[i];
+      if (chr == '\'')
+	{
+	  if (quote_open)
+	    {
+	      if (double_quoted)
+		result += '"';
+	      else
+		result += chr;
+	      quote_open = false;
+	    }
+	  else
+	    {
+	      if (i + 2 < buffer.length ()
+		  && buffer[i+1] != '\''
+		  && buffer[i+2] != '\'')
+		{
+		  double_quoted = true;
+		  result += '"';
+		}
+	      else
+		result += chr;
+	      quote_open = true;
+	    }
+	}
+      else
+	result += chr;
+    }
+
+  /* Braces were mismatched.  Abort.  */
+  if (quote_open)
+    fatal_at (loc, "brace mismatch in instruction template '%s'",
+	      line.c_str ());
+
+  line = result;
+  return;
+}
+
+/* Converts an rtx from compact syntax to normal syntax if possible.  */
+
+static void
+convert_syntax (rtx x, file_location loc)
+{
+  int alt_no;
+  unsigned int index, templ_index;
+  const char *templ;
+  vec_conlist convec, attrvec;
+
+  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
+
+  templ = XTMPL (x, templ_index);
+
+  /* Templates with constraints start with "@@".  */
+  if (strncmp ("@@", templ, 2))
+    return;
+
+  /* Get the layout for the template.  */
+  templ += 2;
+  skip_spaces (&templ);
+
+  if (!expect_char (&templ, '('))
+    fatal_at (loc, "expecing `(' to begin section list");
+
+  parse_section_layout (&templ, "cons:", convec);
+
+  if (*templ != ')')
+    {
+      if (*templ == ';')
+	skip_spaces (&(++templ));
+      parse_section_layout (&templ, "attrs:", attrvec);
+      create_missing_attributes (x, loc, attrvec);
+    }
+
+  if (!expect_char (&templ, ')'))
+    {
+      fatal_at (loc, "expecting `)` to end section list - section list "
+		"must have cons first, attrs second");
+    }
+
+  /* We will write the un-constrainified template into new_templ.  */
+  std::string new_templ;
+  new_templ.append ("@\n");
+
+  /* Skip to the first proper line.  */
+  while (*templ++ != '\n');
+  alt_no = 0;
+
+  std::string last_line;
+
+  /* Process the alternatives.  */
+  while (*(templ - 1) != '\0')
+    {
+      /* Copy leading whitespace.  */
+      while (*templ == ' ' || *templ == '\t')
+	new_templ += *templ++;
+
+      if (expect_char (&templ, '['))
+	{
+	  /* Parse the constraint list, then the attribute list.  */
+	  if (convec.size () > 0)
+	    parse_section (&templ, convec.size (), alt_no, convec, loc,
+			   "constraint");
+
+	  if (attrvec.size () > 0)
+	    {
+	      if (convec.size () > 0 && !expect_char (&templ, ';'))
+		fatal_at (loc, "expected `;' to separate constraints "
+			       "and attributes in alternative %d", alt_no);
+
+	      parse_section (&templ, attrvec.size (), alt_no,
+			     attrvec, loc, "attribute");
+	    }
+
+	  if (!expect_char (&templ, ']'))
+	    fatal_at (loc, "expected end of constraint/attribute list but "
+			   "missing an ending `]' in alternative %d", alt_no);
+	}
+      else
+	fatal_at (loc, "expected constraint/attribute list at beginning of "
+		       "alternative %d but missing a starting `['", alt_no);
+
+      /* Skip whitespace between list and asm.  */
+      ++templ;
+      skip_spaces (&templ);
+
+      /* Copy asm to new template.  */
+      std::string line;
+      while (*templ != '\n' && *templ != '\0')
+	line += *templ++;
+
+      /* Apply any pre-processing needed to the line.  */
+      preprocess_compact_syntax (loc, alt_no, line, last_line);
+      new_templ.append (line);
+      last_line = line;
+
+      new_templ += *templ++;
+      ++alt_no;
+    }
+
+  /* Write the constraints and attributes into their proper places.  */
+  if (convec.size () > 0)
+    {
+      index = add_constraints (x, loc, 0, convec);
+      if (index < convec.size ())
+	fatal_at (loc, "could not find match_operand/scratch with id %s",
+		  convec[index].name.c_str ());
+    }
+
+  if (attrvec.size () > 0)
+    {
+      index = add_attributes (x, loc, attrvec);
+      if (index < attrvec.size ())
+	fatal_at (loc, "could not find set_attr for attribute %s",
+		  attrvec[index].name.c_str ());
+    }
+
+  /* Copy over the new un-constrainified template.  */
+  XTMPL (x, templ_index) = xstrdup (new_templ.c_str ());
+
+  /* Register for later checks during iterator expansions.  */
+  compact_syntax.add (x);
+
+#if DEBUG
+  print_rtl_single (stderr, x);
+#endif
+}
+
 /* Process a top level rtx in some way, queuing as appropriate.  */
 
 static void
@@ -553,10 +1083,12 @@ process_rtx (rtx desc, file_location loc)
   switch (GET_CODE (desc))
     {
     case DEFINE_INSN:
+      convert_syntax (desc, loc);
       queue_pattern (desc, &define_insn_tail, loc);
       break;
 
     case DEFINE_COND_EXEC:
+      convert_syntax (desc, loc);
       queue_pattern (desc, &define_cond_exec_tail, loc);
       break;
 
@@ -631,6 +1163,7 @@ process_rtx (rtx desc, file_location loc)
 	attr = XVEC (desc, split_code + 1);
 	PUT_CODE (desc, DEFINE_INSN);
 	XVEC (desc, 4) = attr;
+	convert_syntax (desc, loc);
 
 	/* Queue them.  */
 	insn_elem = queue_pattern (desc, &define_insn_tail, loc);
  

Comments

Richard Sandiford April 21, 2023, 5:18 p.m. UTC | #1
Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> This patch adds support for a compact syntax for specifying constraints in
> instruction patterns. Credit for the idea goes to Richard Earnshaw.
>
> I am sending up this RFC to get feedback for it's inclusion in GCC 14.
> With this new syntax we want a clean break from the current limitations to make
> something that is hopefully easier to use and maintain.
>
> The idea behind this compact syntax is that often times it's quite hard to
> correlate the entries in the constrains list, attributes and instruction lists.
>
> One has to count and this often is tedious.  Additionally when changing a single
> line in the insn multiple lines in a diff change, making it harder to see what's
> going on.
>
> This new syntax takes into account many of the common things that are done in MD
> files.   It's also worth saying that this version is intended to deal with the
> common case of a string based alternatives.   For C chunks we have some ideas
> but those are not intended to be addressed here.
>
> It's easiest to explain with an example:
>
> normal syntax:
>
> (define_insn_and_split "*movsi_aarch64"
>   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
> 	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
>   "(register_operand (operands[0], SImode)
>     || aarch64_reg_or_zero (operands[1], SImode))"
>   "@
>    mov\\t%w0, %w1
>    mov\\t%w0, %w1
>    mov\\t%w0, %w1
>    mov\\t%w0, %1
>    #
>    * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
>    ldr\\t%w0, %1
>    ldr\\t%s0, %1
>    str\\t%w1, %0
>    str\\t%s1, %0
>    adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
>    adr\\t%x0, %c1
>    adrp\\t%x0, %A1
>    fmov\\t%s0, %w1
>    fmov\\t%w0, %s1
>    fmov\\t%s0, %s1
>    * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
>   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
>     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>    [(const_int 0)]
>    "{
>        aarch64_expand_mov_immediate (operands[0], operands[1]);
>        DONE;
>     }"
>   ;; The "mov_imm" type for CNT is just a placeholder.
>   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
> 		    load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
>    (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
>    (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
> ]
> )
>
> New syntax:
>
> (define_insn_and_split "*movsi_aarch64"
>   [(set (match_operand:SI 0 "nonimmediate_operand")
> 	(match_operand:SI 1 "aarch64_mov_operand"))]
>   "(register_operand (operands[0], SImode)
>     || aarch64_reg_or_zero (operands[1], SImode))"
>   "@@ (cons: 0 1; attrs: type arch length)
>    [=r, r  ; mov_reg  , *   , 4] mov\t%w0, %w1
>    [k , r  ; mov_reg  , *   , 4] ^
>    [r , k  ; mov_reg  , *   , 4] ^
>    [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
>    [r , n  ; mov_imm  , *   , *] #
>    [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
>    [r , m  ; load_4   , *   , 4] ldr\t%w0, %1
>    [w , m  ; load_4   , fp  , 4] ldr\t%s0, %1
>    [m , rZ ; store_4  , *   , 4] str\t%w1, %0
>    [m , w  ; store_4  , fp  , 4] str\t%s1, %0
>    [r , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
>    [r , Usa; adr      , *   , 4] adr\t%x0, %c1
>    [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
>    [w , rZ ; f_mcr    , fp  , 4] fmov\t%s0, %w1
>    [r , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
>    [w , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
>    [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
>   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
>     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>   [(const_int 0)]
>   {
>     aarch64_expand_mov_immediate (operands[0], operands[1]);
>     DONE;
>   }
>   ;; The "mov_imm" type for CNT is just a placeholder.
> )
>
> The patch contains some more rewritten examples for both Arm and AArch64.  I
> have included them for examples in this RFC but the final version posted in
> GCC 14 will have these split out.
>
> The main syntax rules are as follows (See docs for full rules):
>   - Template must start with "@@" to use the new syntax.
>   - "@@" is followed by a layout in parentheses which is "cons:" followed by
>     a list of match_operand/match_scratch IDs, then a semicolon, then the
>     same for attributes ("attrs:"). Both sections are optional (so you can
>     use only cons, or only attrs, or both), and cons must come before attrs
>     if present.
>   - Each alternative begins with any amount of whitespace.
>   - Following the whitespace is a comma-separated list of constraints and/or
>     attributes within brackets [], with sections separated by a semicolon.
>   - Following the closing ']' is any amount of whitespace, and then the actual
>     asm output.
>   - Spaces are allowed in the list (they will simply be removed).
>   - All alternatives should be specified: a blank list should be
>     "[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if
>     you leave certain attributes empty, I have found).
>   - The actual constraint string in the match_operand or match_scratch, and
>     the attribute string in the set_attr, must be blank or an empty string
>     (you can't combine the old and new syntaxes).
>   - The common idion * return can be shortened by using <<.
>   - Any unexpanded iterators left during processing will result in an error at
>     compile time.   If for some reason <> is needed in the output then these
>     must be escaped using \.
>   - Inside a @@ block '' is treated as "" when there are multiple characters
>     inside the single quotes.  This version does not handle multi byte literals
>     like specifying characters as their numerical encoding, like \003 nor does
>     it handle unicode, especially multibyte encodings.  This feature may be more
>     trouble than it's worth so have no finished it off, however this means one
>     can use 'foo' instead of \"foo\" to denote a multicharacter string.
>   - Inside an @@ block any unexpanded iterators will result in a compile time
>     fault instead of incorrect assembly being generated at runtime.  If the
>     literal <> is needed in the output this needs to be escaped with \<\>.
>   - This check is not performed inside C blocks (lines starting with *).
>   - Instead of copying the previous instruction again in the next pattern, one
>     can use ^ to refer to the previous asm string.

Thanks for doing this.  The new syntax seems like a clear improvement
for complex patterns like movs.

Some comments/suggestions:

- From a style perspective, out-of-order constraints should IMO be strongly
  discouraged.  The asm string uses %0, %1, %2 etc. to refer to operands,
  and having that directly after a list that puts the constraints in
  a different order (such as [%2, %0, %1]) would IMO be very confusing.

  I agree there might be cases where dropping constraints makes sense.
  But I think in general we should encourage all constraints to be
  specified, and be specified in order.  And that's likely to be the
  natural choice in an overwhelming majority of cases anyway.

  So how about having a simpler syntax for the first line when all
  constraints are specified in order?  Maybe just "cons" (without the
  colon or numbers).

- I'm not too keen on the '' thing.  It sounded from internal
  discussion like backslashes and quoting were a problem generally.

  Would it work to quote the new form in {@ ... } instead?  There should
  be no compatibility problem with that, since @ isn't a standard C++
  lexing token.

- Could we support a comment syntax?  E.g. ignore lines beginning with
  ;; or // (or both)?  In the example above, it would be good to keep
  the comment about the CNT type attribute nearer to the attribute itself.

- Very minor, but using [...] rather than (...) for the first line
  might make it more visually obvious that it's acting as a table
  header for the [...] rows.

Haven't done a detailed review of the gensupport bits, but:

> [...]
> @@ -700,12 +702,37 @@ process_template (class data *d, const char *template_code)
>  	  if (sp != ep)
>  	    message_at (d->loc, "trailing whitespace in output template");
>  
> -	  while (cp < sp)
> +	  /* Check for any unexpanded iterators.  */
> +	  std::string buff (cp, sp - cp);
> +	  if (bp[0] != '*' && d->compact_syntax_p)
>  	    {
> -	      putchar (*cp);
> -	      cp++;
> +	      size_t start = buff.find ('<');
> +	      size_t end = buff.find ('>', start + 1);
> +	      if (end != std::string::npos || start != std::string::npos)
> +		{
> +		  if (end == std::string::npos || start == std::string::npos)
> +		    fatal_at (d->loc, "unmatched angle brackets, likely an "
> +			      "error in iterator syntax in %s", buff.c_str ());
> +
> +		  if (start != 0
> +		      && buff[start-1] == '\\'
> +		      && buff[end-1] == '\\')
> +		    {
> +		      /* Found a valid escape sequence, erase the characters for
> +			 output.  */
> +		      buff.erase (end-1, 1);
> +		      buff.erase (start-1, 1);
> +		    }
> +		  else
> +		    fatal_at (d->loc, "unresolved iterator '%s' in '%s'",
> +			      buff.substr(start+1, end - start-1).c_str (),
> +			      buff.c_str ());
> +		}
>  	    }

Asm strings that want unbalanced but quoted < or > should be able
to use them, so the check for backslashes should probably come first.
I suppose this also runs into the classic problem of whether
the preceding backslash was itself quoted, etc.

So maybe it would make sense to walk through character-by-character,
something like:

    const char *p = cp;
    const char *last_bracket = nullptr;
    while (p < sp)
      {
        if (*p == '\\' && p + 1 < sp)
          {
            p += 2;
            continue;
          }
        if (*p == '>' && last_bracket && *last_bracket == '<')
          ... unexpanded iterator ...
        else if (*p == '<' || *p == '>')
          last_bracket = p;
        p += 1;
      }
    if (last_bracket)
      ... error ...

That also copes with unlikely things like \<...\>...<foo>, where an
unexpanded iterator (or incorrectly quoted <...>) comes after a
correctly-quoted <...>.

Thanks,
Richard
  
Richard Sandiford April 24, 2023, 8:33 a.m. UTC | #2
Richard Sandiford <richard.sandiford@arm.com> writes:
> Tamar Christina <tamar.christina@arm.com> writes:
>> Hi All,
>>
>> This patch adds support for a compact syntax for specifying constraints in
>> instruction patterns. Credit for the idea goes to Richard Earnshaw.
>>
>> I am sending up this RFC to get feedback for it's inclusion in GCC 14.
>> With this new syntax we want a clean break from the current limitations to make
>> something that is hopefully easier to use and maintain.
>>
>> The idea behind this compact syntax is that often times it's quite hard to
>> correlate the entries in the constrains list, attributes and instruction lists.
>>
>> One has to count and this often is tedious.  Additionally when changing a single
>> line in the insn multiple lines in a diff change, making it harder to see what's
>> going on.
>>
>> This new syntax takes into account many of the common things that are done in MD
>> files.   It's also worth saying that this version is intended to deal with the
>> common case of a string based alternatives.   For C chunks we have some ideas
>> but those are not intended to be addressed here.
>>
>> It's easiest to explain with an example:
>>
>> normal syntax:
>>
>> (define_insn_and_split "*movsi_aarch64"
>>   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
>> 	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
>>   "(register_operand (operands[0], SImode)
>>     || aarch64_reg_or_zero (operands[1], SImode))"
>>   "@
>>    mov\\t%w0, %w1
>>    mov\\t%w0, %w1
>>    mov\\t%w0, %w1
>>    mov\\t%w0, %1
>>    #
>>    * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
>>    ldr\\t%w0, %1
>>    ldr\\t%s0, %1
>>    str\\t%w1, %0
>>    str\\t%s1, %0
>>    adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
>>    adr\\t%x0, %c1
>>    adrp\\t%x0, %A1
>>    fmov\\t%s0, %w1
>>    fmov\\t%w0, %s1
>>    fmov\\t%s0, %s1
>>    * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
>>   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
>>     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>>    [(const_int 0)]
>>    "{
>>        aarch64_expand_mov_immediate (operands[0], operands[1]);
>>        DONE;
>>     }"
>>   ;; The "mov_imm" type for CNT is just a placeholder.
>>   [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
>> 		    load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
>>    (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
>>    (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
>> ]
>> )
>>
>> New syntax:
>>
>> (define_insn_and_split "*movsi_aarch64"
>>   [(set (match_operand:SI 0 "nonimmediate_operand")
>> 	(match_operand:SI 1 "aarch64_mov_operand"))]
>>   "(register_operand (operands[0], SImode)
>>     || aarch64_reg_or_zero (operands[1], SImode))"
>>   "@@ (cons: 0 1; attrs: type arch length)
>>    [=r, r  ; mov_reg  , *   , 4] mov\t%w0, %w1
>>    [k , r  ; mov_reg  , *   , 4] ^
>>    [r , k  ; mov_reg  , *   , 4] ^
>>    [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
>>    [r , n  ; mov_imm  , *   , *] #
>>    [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
>>    [r , m  ; load_4   , *   , 4] ldr\t%w0, %1
>>    [w , m  ; load_4   , fp  , 4] ldr\t%s0, %1
>>    [m , rZ ; store_4  , *   , 4] str\t%w1, %0
>>    [m , w  ; store_4  , fp  , 4] str\t%s1, %0
>>    [r , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
>>    [r , Usa; adr      , *   , 4] adr\t%x0, %c1
>>    [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
>>    [w , rZ ; f_mcr    , fp  , 4] fmov\t%s0, %w1
>>    [r , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
>>    [w , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
>>    [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
>>   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
>>     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>>   [(const_int 0)]
>>   {
>>     aarch64_expand_mov_immediate (operands[0], operands[1]);
>>     DONE;
>>   }
>>   ;; The "mov_imm" type for CNT is just a placeholder.
>> )
>>
>> The patch contains some more rewritten examples for both Arm and AArch64.  I
>> have included them for examples in this RFC but the final version posted in
>> GCC 14 will have these split out.
>>
>> The main syntax rules are as follows (See docs for full rules):
>>   - Template must start with "@@" to use the new syntax.
>>   - "@@" is followed by a layout in parentheses which is "cons:" followed by
>>     a list of match_operand/match_scratch IDs, then a semicolon, then the
>>     same for attributes ("attrs:"). Both sections are optional (so you can
>>     use only cons, or only attrs, or both), and cons must come before attrs
>>     if present.
>>   - Each alternative begins with any amount of whitespace.
>>   - Following the whitespace is a comma-separated list of constraints and/or
>>     attributes within brackets [], with sections separated by a semicolon.
>>   - Following the closing ']' is any amount of whitespace, and then the actual
>>     asm output.
>>   - Spaces are allowed in the list (they will simply be removed).
>>   - All alternatives should be specified: a blank list should be
>>     "[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if
>>     you leave certain attributes empty, I have found).
>>   - The actual constraint string in the match_operand or match_scratch, and
>>     the attribute string in the set_attr, must be blank or an empty string
>>     (you can't combine the old and new syntaxes).
>>   - The common idion * return can be shortened by using <<.
>>   - Any unexpanded iterators left during processing will result in an error at
>>     compile time.   If for some reason <> is needed in the output then these
>>     must be escaped using \.
>>   - Inside a @@ block '' is treated as "" when there are multiple characters
>>     inside the single quotes.  This version does not handle multi byte literals
>>     like specifying characters as their numerical encoding, like \003 nor does
>>     it handle unicode, especially multibyte encodings.  This feature may be more
>>     trouble than it's worth so have no finished it off, however this means one
>>     can use 'foo' instead of \"foo\" to denote a multicharacter string.
>>   - Inside an @@ block any unexpanded iterators will result in a compile time
>>     fault instead of incorrect assembly being generated at runtime.  If the
>>     literal <> is needed in the output this needs to be escaped with \<\>.
>>   - This check is not performed inside C blocks (lines starting with *).
>>   - Instead of copying the previous instruction again in the next pattern, one
>>     can use ^ to refer to the previous asm string.
>
> Thanks for doing this.  The new syntax seems like a clear improvement
> for complex patterns like movs.
>
> Some comments/suggestions:
>
> - From a style perspective, out-of-order constraints should IMO be strongly
>   discouraged.  The asm string uses %0, %1, %2 etc. to refer to operands,
>   and having that directly after a list that puts the constraints in
>   a different order (such as [%2, %0, %1]) would IMO be very confusing.
>
>   I agree there might be cases where dropping constraints makes sense.
>   But I think in general we should encourage all constraints to be
>   specified, and be specified in order.  And that's likely to be the
>   natural choice in an overwhelming majority of cases anyway.
>
>   So how about having a simpler syntax for the first line when all
>   constraints are specified in order?  Maybe just "cons" (without the
>   colon or numbers).

Alternatively: leading "=" and "+" characters describe the operand
as a whole, rather than individual alternatives.  So maybe "=" and
"+" should be in the "header" (the first line) rather than the first
"row"/alternative.  That would then be a justification for keeping
the operand numbers even for the simple case.

E.g.:

   cons: =0 1

Also, it would be good if the header and rows were consistent about
whether they use comma separators or whitespace separators.  At the
moment, the headers are whitespace-separated while the rows are
comma-separated.

On that: it might be possible to use whitespace-separated columns even
for constraints, since an empty constraint should be equivalent to X.
That would look like:

    [r  r  ; mov_reg   *    4] mov\t%w0, %w1
    [k  r  ; mov_reg   *    4] ^
    [r  k  ; mov_reg   *    4] ^
    [r  M  ; mov_imm   *    4] mov\t%w0, %1
    [r  n  ; mov_imm   *    *] #
    [r  Usv; mov_imm   sve  4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
    [r  m  ; load_4    *    4] ldr\t%w0, %1
    [w  m  ; load_4    fp   4] ldr\t%s0, %1
    [m  rZ ; store_4   *    4] str\t%w1, %0
    [m  w  ; store_4   fp   4] str\t%s1, %0
    [r  Usw; load_4    *    8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
    [r  Usa; adr       *    4] adr\t%x0, %c1
    [r  Ush; adr       *    4] adrp\t%x0, %A1
    [w  rZ ; f_mcr     fp   4] fmov\t%s0, %w1
    [r  w  ; f_mrc     fp   4] fmov\t%w0, %s1
    [w  w  ; fmov      fp   4] fmov\t%s0, %s1
    [w  Ds ; neon_move simd 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"

I'm not sure whether that's better or worse though.  Keeping commas is
fine by me FWIW, as long as we do it in the header too.

Thanks,
Richard
  
Richard Sandiford April 24, 2023, 9:37 a.m. UTC | #3
Tamar Christina <Tamar.Christina@arm.com> writes:
>> -----Original Message-----
>> From: Richard Sandiford <richard.sandiford@arm.com>
>> Sent: Friday, April 21, 2023 6:19 PM
>> To: Tamar Christina <Tamar.Christina@arm.com>
>> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw
>> <Richard.Earnshaw@arm.com>
>> Subject: Re: [PATCH] RFC: New compact syntax for insn and insn_split in
>> Machine Descriptions
>> 
>> Tamar Christina <tamar.christina@arm.com> writes:
>> > Hi All,
>> >
>> > This patch adds support for a compact syntax for specifying
>> > constraints in instruction patterns. Credit for the idea goes to Richard
>> Earnshaw.
>> >
>> > I am sending up this RFC to get feedback for it's inclusion in GCC 14.
>> > With this new syntax we want a clean break from the current
>> > limitations to make something that is hopefully easier to use and maintain.
>> >
>> > The idea behind this compact syntax is that often times it's quite
>> > hard to correlate the entries in the constrains list, attributes and instruction
>> lists.
>> >
>> > One has to count and this often is tedious.  Additionally when
>> > changing a single line in the insn multiple lines in a diff change,
>> > making it harder to see what's going on.
>> >
>> > This new syntax takes into account many of the common things that are
>> done in MD
>> > files.   It's also worth saying that this version is intended to deal with the
>> > common case of a string based alternatives.   For C chunks we have some
>> ideas
>> > but those are not intended to be addressed here.
>> >
>> > It's easiest to explain with an example:
>> >
>> > normal syntax:
>> >
>> > (define_insn_and_split "*movsi_aarch64"
>> >   [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,
>> r,  r,  r, w,r,w, w")
>> > 	(match_operand:SI 1 "aarch64_mov_operand"  "
>> r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
>> >   "(register_operand (operands[0], SImode)
>> >     || aarch64_reg_or_zero (operands[1], SImode))"
>> >   "@
>> >    mov\\t%w0, %w1
>> >    mov\\t%w0, %w1
>> >    mov\\t%w0, %w1
>> >    mov\\t%w0, %1
>> >    #
>> >    * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\",
>> operands[1]);
>> >    ldr\\t%w0, %1
>> >    ldr\\t%s0, %1
>> >    str\\t%w1, %0
>> >    str\\t%s1, %0
>> >    adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
>> >    adr\\t%x0, %c1
>> >    adrp\\t%x0, %A1
>> >    fmov\\t%s0, %w1
>> >    fmov\\t%w0, %s1
>> >    fmov\\t%s0, %s1
>> >    * return aarch64_output_scalar_simd_mov_immediate (operands[1],
>> SImode);"
>> >   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL
>> (operands[1]), SImode)
>> >     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>> >    [(const_int 0)]
>> >    "{
>> >        aarch64_expand_mov_immediate (operands[0], operands[1]);
>> >        DONE;
>> >     }"
>> >   ;; The "mov_imm" type for CNT is just a placeholder.
>> >   [(set_attr "type"
>> "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
>> >
>> load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
>> >    (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
>> >    (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
>> > ]
>> > )
>> >
>> > New syntax:
>> >
>> > (define_insn_and_split "*movsi_aarch64"
>> >   [(set (match_operand:SI 0 "nonimmediate_operand")
>> > 	(match_operand:SI 1 "aarch64_mov_operand"))]
>> >   "(register_operand (operands[0], SImode)
>> >     || aarch64_reg_or_zero (operands[1], SImode))"
>> >   "@@ (cons: 0 1; attrs: type arch length)
>> >    [=r, r  ; mov_reg  , *   , 4] mov\t%w0, %w1
>> >    [k , r  ; mov_reg  , *   , 4] ^
>> >    [r , k  ; mov_reg  , *   , 4] ^
>> >    [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
>> >    [r , n  ; mov_imm  , *   , *] #
>> >    [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt',
>> '%x0', operands[1]);
>> >    [r , m  ; load_4   , *   , 4] ldr\t%w0, %1
>> >    [w , m  ; load_4   , fp  , 4] ldr\t%s0, %1
>> >    [m , rZ ; store_4  , *   , 4] str\t%w1, %0
>> >    [m , w  ; store_4  , fp  , 4] str\t%s1, %0
>> >    [r , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
>> >    [r , Usa; adr      , *   , 4] adr\t%x0, %c1
>> >    [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
>> >    [w , rZ ; f_mcr    , fp  , 4] fmov\t%s0, %w1
>> >    [r , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
>> >    [w , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
>> >    [w , Ds ; neon_move, simd, 4] <<
>> aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
>> >   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL
>> (operands[1]), SImode)
>> >     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>> >   [(const_int 0)]
>> >   {
>> >     aarch64_expand_mov_immediate (operands[0], operands[1]);
>> >     DONE;
>> >   }
>> >   ;; The "mov_imm" type for CNT is just a placeholder.
>> > )
>> >
>> > The patch contains some more rewritten examples for both Arm and
>> > AArch64.  I have included them for examples in this RFC but the final
>> > version posted in GCC 14 will have these split out.
>> >
>> > The main syntax rules are as follows (See docs for full rules):
>> >   - Template must start with "@@" to use the new syntax.
>> >   - "@@" is followed by a layout in parentheses which is "cons:" followed by
>> >     a list of match_operand/match_scratch IDs, then a semicolon, then the
>> >     same for attributes ("attrs:"). Both sections are optional (so you can
>> >     use only cons, or only attrs, or both), and cons must come before attrs
>> >     if present.
>> >   - Each alternative begins with any amount of whitespace.
>> >   - Following the whitespace is a comma-separated list of constraints and/or
>> >     attributes within brackets [], with sections separated by a semicolon.
>> >   - Following the closing ']' is any amount of whitespace, and then the actual
>> >     asm output.
>> >   - Spaces are allowed in the list (they will simply be removed).
>> >   - All alternatives should be specified: a blank list should be
>> >     "[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if
>> >     you leave certain attributes empty, I have found).
>> >   - The actual constraint string in the match_operand or match_scratch, and
>> >     the attribute string in the set_attr, must be blank or an empty string
>> >     (you can't combine the old and new syntaxes).
>> >   - The common idion * return can be shortened by using <<.
>> >   - Any unexpanded iterators left during processing will result in an error at
>> >     compile time.   If for some reason <> is needed in the output then these
>> >     must be escaped using \.
>> >   - Inside a @@ block '' is treated as "" when there are multiple characters
>> >     inside the single quotes.  This version does not handle multi byte literals
>> >     like specifying characters as their numerical encoding, like \003 nor does
>> >     it handle unicode, especially multibyte encodings.  This feature may be
>> more
>> >     trouble than it's worth so have no finished it off, however this means one
>> >     can use 'foo' instead of \"foo\" to denote a multicharacter string.
>> >   - Inside an @@ block any unexpanded iterators will result in a compile time
>> >     fault instead of incorrect assembly being generated at runtime.  If the
>> >     literal <> is needed in the output this needs to be escaped with \<\>.
>> >   - This check is not performed inside C blocks (lines starting with *).
>> >   - Instead of copying the previous instruction again in the next pattern, one
>> >     can use ^ to refer to the previous asm string.
>> 
>> Thanks for doing this.  The new syntax seems like a clear improvement for
>> complex patterns like movs.
>> 
>> Some comments/suggestions:
>> 
>> - From a style perspective, out-of-order constraints should IMO be strongly
>>   discouraged.  The asm string uses %0, %1, %2 etc. to refer to operands,
>>   and having that directly after a list that puts the constraints in
>>   a different order (such as [%2, %0, %1]) would IMO be very confusing.
>> 
>>   I agree there might be cases where dropping constraints makes sense.
>>   But I think in general we should encourage all constraints to be
>>   specified, and be specified in order.  And that's likely to be the
>>   natural choice in an overwhelming majority of cases anyway.
>> 
>>   So how about having a simpler syntax for the first line when all
>>   constraints are specified in order?  Maybe just "cons" (without the
>>   colon or numbers).
>> 
>> - I'm not too keen on the '' thing.  It sounded from internal
>>   discussion like backslashes and quoting were a problem generally.
>> 
>>   Would it work to quote the new form in {@ ... } instead?  There should
>>   be no compatibility problem with that, since @ isn't a standard C++
>>   lexing token.
>
> Fair enough, did you mean {@<string>} or @'string' ? 

I meant quote that whole asm block in {@...} rather than "@@...".  I.e.:

   {@ [cons: =0, 1; attrs: type, arch, length]
    [r,  r  ; mov_reg  , *   , 4] mov\t%w0, %w1
    [k , r  ; mov_reg  , *   , 4] ^
    [r , k  ; mov_reg  , *   , 4] ^
    [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
    [r , n  ; mov_imm  , *   , *] #
    [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
    [r , m  ; load_4   , *   , 4] ldr\t%w0, %1
    [w , m  ; load_4   , fp  , 4] ldr\t%s0, %1
    [m , rZ ; store_4  , *   , 4] str\t%w1, %0
    [m , w  ; store_4  , fp  , 4] str\t%s1, %0
    [r , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
    [r , Usa; adr      , *   , 4] adr\t%x0, %c1
    [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
    [w , rZ ; f_mcr    , fp  , 4] fmov\t%s0, %w1
    [r , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
    [w , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
    [w , Ds ; neon_move, simd, 4] << arch64_output_scalar_simd_mov_immediate (operands[1], SImode);
   }

That will also help if we do want to support C++ code blocks in future.

Thanks,
Richard
  
Richard Earnshaw (lists) May 16, 2023, 1:56 p.m. UTC | #4
On 24/04/2023 09:33, Richard Sandiford via Gcc-patches wrote:
> Richard Sandiford <richard.sandiford@arm.com> writes:
>> Tamar Christina <tamar.christina@arm.com> writes:
>>> Hi All,
>>>
>>> This patch adds support for a compact syntax for specifying constraints in
>>> instruction patterns. Credit for the idea goes to Richard Earnshaw.
>>>
>>> I am sending up this RFC to get feedback for it's inclusion in GCC 14.
>>> With this new syntax we want a clean break from the current limitations to make
>>> something that is hopefully easier to use and maintain.
>>>
>>> The idea behind this compact syntax is that often times it's quite hard to
>>> correlate the entries in the constrains list, attributes and instruction lists.
>>>
>>> One has to count and this often is tedious.  Additionally when changing a single
>>> line in the insn multiple lines in a diff change, making it harder to see what's
>>> going on.
>>>
>>> This new syntax takes into account many of the common things that are done in MD
>>> files.   It's also worth saying that this version is intended to deal with the
>>> common case of a string based alternatives.   For C chunks we have some ideas
>>> but those are not intended to be addressed here.
>>>
>>> It's easiest to explain with an example:
>>>
>>> normal syntax:
>>>
>>> (define_insn_and_split "*movsi_aarch64"
>>>    [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
>>> 	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
>>>    "(register_operand (operands[0], SImode)
>>>      || aarch64_reg_or_zero (operands[1], SImode))"
>>>    "@
>>>     mov\\t%w0, %w1
>>>     mov\\t%w0, %w1
>>>     mov\\t%w0, %w1
>>>     mov\\t%w0, %1
>>>     #
>>>     * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
>>>     ldr\\t%w0, %1
>>>     ldr\\t%s0, %1
>>>     str\\t%w1, %0
>>>     str\\t%s1, %0
>>>     adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
>>>     adr\\t%x0, %c1
>>>     adrp\\t%x0, %A1
>>>     fmov\\t%s0, %w1
>>>     fmov\\t%w0, %s1
>>>     fmov\\t%s0, %s1
>>>     * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
>>>    "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
>>>      && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>>>     [(const_int 0)]
>>>     "{
>>>         aarch64_expand_mov_immediate (operands[0], operands[1]);
>>>         DONE;
>>>      }"
>>>    ;; The "mov_imm" type for CNT is just a placeholder.
>>>    [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
>>> 		    load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
>>>     (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
>>>     (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
>>> ]
>>> )
>>>
>>> New syntax:
>>>
>>> (define_insn_and_split "*movsi_aarch64"
>>>    [(set (match_operand:SI 0 "nonimmediate_operand")
>>> 	(match_operand:SI 1 "aarch64_mov_operand"))]
>>>    "(register_operand (operands[0], SImode)
>>>      || aarch64_reg_or_zero (operands[1], SImode))"
>>>    "@@ (cons: 0 1; attrs: type arch length)
>>>     [=r, r  ; mov_reg  , *   , 4] mov\t%w0, %w1
>>>     [k , r  ; mov_reg  , *   , 4] ^
>>>     [r , k  ; mov_reg  , *   , 4] ^
>>>     [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
>>>     [r , n  ; mov_imm  , *   , *] #
>>>     [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
>>>     [r , m  ; load_4   , *   , 4] ldr\t%w0, %1
>>>     [w , m  ; load_4   , fp  , 4] ldr\t%s0, %1
>>>     [m , rZ ; store_4  , *   , 4] str\t%w1, %0
>>>     [m , w  ; store_4  , fp  , 4] str\t%s1, %0
>>>     [r , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
>>>     [r , Usa; adr      , *   , 4] adr\t%x0, %c1
>>>     [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
>>>     [w , rZ ; f_mcr    , fp  , 4] fmov\t%s0, %w1
>>>     [r , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
>>>     [w , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
>>>     [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
>>>    "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
>>>      && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
>>>    [(const_int 0)]
>>>    {
>>>      aarch64_expand_mov_immediate (operands[0], operands[1]);
>>>      DONE;
>>>    }
>>>    ;; The "mov_imm" type for CNT is just a placeholder.
>>> )
>>>
>>> The patch contains some more rewritten examples for both Arm and AArch64.  I
>>> have included them for examples in this RFC but the final version posted in
>>> GCC 14 will have these split out.
>>>
>>> The main syntax rules are as follows (See docs for full rules):
>>>    - Template must start with "@@" to use the new syntax.
>>>    - "@@" is followed by a layout in parentheses which is "cons:" followed by
>>>      a list of match_operand/match_scratch IDs, then a semicolon, then the
>>>      same for attributes ("attrs:"). Both sections are optional (so you can
>>>      use only cons, or only attrs, or both), and cons must come before attrs
>>>      if present.
>>>    - Each alternative begins with any amount of whitespace.
>>>    - Following the whitespace is a comma-separated list of constraints and/or
>>>      attributes within brackets [], with sections separated by a semicolon.
>>>    - Following the closing ']' is any amount of whitespace, and then the actual
>>>      asm output.
>>>    - Spaces are allowed in the list (they will simply be removed).
>>>    - All alternatives should be specified: a blank list should be
>>>      "[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if
>>>      you leave certain attributes empty, I have found).
>>>    - The actual constraint string in the match_operand or match_scratch, and
>>>      the attribute string in the set_attr, must be blank or an empty string
>>>      (you can't combine the old and new syntaxes).
>>>    - The common idion * return can be shortened by using <<.
>>>    - Any unexpanded iterators left during processing will result in an error at
>>>      compile time.   If for some reason <> is needed in the output then these
>>>      must be escaped using \.
>>>    - Inside a @@ block '' is treated as "" when there are multiple characters
>>>      inside the single quotes.  This version does not handle multi byte literals
>>>      like specifying characters as their numerical encoding, like \003 nor does
>>>      it handle unicode, especially multibyte encodings.  This feature may be more
>>>      trouble than it's worth so have no finished it off, however this means one
>>>      can use 'foo' instead of \"foo\" to denote a multicharacter string.
>>>    - Inside an @@ block any unexpanded iterators will result in a compile time
>>>      fault instead of incorrect assembly being generated at runtime.  If the
>>>      literal <> is needed in the output this needs to be escaped with \<\>.
>>>    - This check is not performed inside C blocks (lines starting with *).
>>>    - Instead of copying the previous instruction again in the next pattern, one
>>>      can use ^ to refer to the previous asm string.
>>
>> Thanks for doing this.  The new syntax seems like a clear improvement
>> for complex patterns like movs.
>>
>> Some comments/suggestions:
>>
>> - From a style perspective, out-of-order constraints should IMO be strongly
>>    discouraged.  The asm string uses %0, %1, %2 etc. to refer to operands,
>>    and having that directly after a list that puts the constraints in
>>    a different order (such as [%2, %0, %1]) would IMO be very confusing.
>>
>>    I agree there might be cases where dropping constraints makes sense.
>>    But I think in general we should encourage all constraints to be
>>    specified, and be specified in order.  And that's likely to be the
>>    natural choice in an overwhelming majority of cases anyway.
>>
>>    So how about having a simpler syntax for the first line when all
>>    constraints are specified in order?  Maybe just "cons" (without the
>>    colon or numbers).
> 
> Alternatively: leading "=" and "+" characters describe the operand
> as a whole, rather than individual alternatives.  So maybe "=" and
> "+" should be in the "header" (the first line) rather than the first
> "row"/alternative.  That would then be a justification for keeping
> the operand numbers even for the simple case.
> 
> E.g.:
> 
>     cons: =0 1
> 

Yes, that seems a sensible idea.

> Also, it would be good if the header and rows were consistent about
> whether they use comma separators or whitespace separators.  At the
> moment, the headers are whitespace-separated while the rows are
> comma-separated.
> 
> On that: it might be possible to use whitespace-separated columns even
> for constraints, since an empty constraint should be equivalent to X.
> That would look like:
> 
>      [r  r  ; mov_reg   *    4] mov\t%w0, %w1
>      [k  r  ; mov_reg   *    4] ^
>      [r  k  ; mov_reg   *    4] ^
>      [r  M  ; mov_imm   *    4] mov\t%w0, %1
>      [r  n  ; mov_imm   *    *] #
>      [r  Usv; mov_imm   sve  4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
>      [r  m  ; load_4    *    4] ldr\t%w0, %1
>      [w  m  ; load_4    fp   4] ldr\t%s0, %1
>      [m  rZ ; store_4   *    4] str\t%w1, %0
>      [m  w  ; store_4   fp   4] str\t%s1, %0
>      [r  Usw; load_4    *    8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
>      [r  Usa; adr       *    4] adr\t%x0, %c1
>      [r  Ush; adr       *    4] adrp\t%x0, %A1
>      [w  rZ ; f_mcr     fp   4] fmov\t%s0, %w1
>      [r  w  ; f_mrc     fp   4] fmov\t%w0, %s1
>      [w  w  ; fmov      fp   4] fmov\t%s0, %s1
>      [w  Ds ; neon_move simd 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"

Keeping the commas means that when an alternative takes two constraints 
(you can conform to either) it's possible to put white space between 
them, so "rZ" can be written as "r Z" to make it clear that it's not a 
multi-letter constraint.  If white space becomes the delimiter then you 
must write "rZ" with no separator.


> 
> I'm not sure whether that's better or worse though.  Keeping commas is
> fine by me FWIW, as long as we do it in the header too.
> 
> Thanks,
> Richard

R.
  
Richard Sandiford June 5, 2023, 8:35 p.m. UTC | #5
Looks good!  Just some minor comments:

Tamar Christina <tamar.christina@arm.com> writes:
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 6a435eb44610960513e9739ac9ac1e8a27182c10..1437ab55b260ab5c876e92d59ba39d24bffc6276 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -27,6 +27,7 @@ See the next chapter for information on the C header file.
>                          from such an insn.
>  * Output Statement::    For more generality, write C code to output
>                          the assembler code.
> +* Compact Syntax::      Compact syntax for writing Machine descriptors.

s/Machine/machine/

>  * Predicates::          Controlling what kinds of operands can be used
>                          for an insn.
>  * Constraints::         Fine-tuning operand selection.
> @@ -713,6 +714,213 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template:
>  @end group
>  @end smallexample
>  
> +@node Compact Syntax
> +@section Compact Syntax
> +@cindex compact syntax
> +
> +In cases where the number of alternatives in a @code{define_insn} or
> +@code{define_insn_and_split} are large then it may be beneficial to use the
> +compact syntax when specifying alternatives.
> +
> +This syntax puts the constraints and attributes on the same horizontal line as
> +the instruction assembly template.
> +
> +As an example
> +
> +@smallexample
> +@group
> +(define_insn_and_split ""
> +  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r")
> +	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv"))]
> +  ""
> +  "@@
> +   mov\\t%w0, %w1
> +   mov\\t%w0, %w1
> +   mov\\t%w0, %w1
> +   mov\\t%w0, %1
> +   #
> +   * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
> +  "&& true"
> +   [(const_int 0)]
> +  @{
> +     aarch64_expand_mov_immediate (operands[0], operands[1]);
> +     DONE;
> +  @}
> +  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm")
> +   (set_attr "arch"   "*,*,*,*,*,sve")
> +   (set_attr "length" "4,4,4,4,*,  4")
> +]
> +)
> +@end group
> +@end smallexample
> +
> +can be better expressed as:
> +
> +@smallexample
> +@group
> +(define_insn_and_split ""
> +  [(set (match_operand:SI 0 "nonimmediate_operand")
> +	(match_operand:SI 1 "aarch64_mov_operand"))]
> +  ""
> +  @{@@ [cons: =0, 1; attrs: type, arch, length]
> +     [r , r  ; mov_reg  , *   , 4] mov\t%w0, %w1
> +     [k , r  ; mov_reg  , *   , 4] ^
> +     [r , k  ; mov_reg  , *   , 4] ^
> +     [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
> +     [r , n  ; mov_imm  , *   , *] #
> +     [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
> +  @}
> +  "&& true"
> +  [(const_int 0)]
> +  @{
> +    aarch64_expand_mov_immediate (operands[0], operands[1]);
> +    DONE;
> +  @}
> +)
> +@end group
> +@end smallexample
> +
> +The syntax rules are as follows:
> +@itemize @bullet
> +@item
> +Template must start with "@{@@" to use the new syntax.

s/Template/Templates/ or s/Template/The template/

@{@@ should be quoted using @samp{...} rather than "...".  Same for later
instances.

> +
> +@item
> +"@{@@" is followed by a layout in parentheses which is @samp{"cons:"} followed by
> +a list of @code{match_operand}/@code{match_scratch} comma operand numbers, then a
> +semicolon, followed by the same for attributes (@samp{"attrs:"}).  Operand

No "..." needed for cons: and attrs: (@samp is enough)

> +modifiers can be placed in this section group as well.  Both sections
> +are optional (so you can use only @samp{cons}, or only @samp{attrs}, or both),
> +and @samp{cons} must come before @samp{attrs} if present.
> +
> +@item
> +Each alternative begins with any amount of whitespace.
> +
> +@item
> +Following the whitespace is a comma-separated list of @samp{constraints} and/or
> +@samp{attributes} within brackets @code{[]}, with sections separated by a

I think "constraints" and "attributes" should be unquoted here, rather
than @samp.

> +semicolon.
> +
> +@item
> +Should you want to copy the previous asm line, the symbol @code{^} can be used.
> +This allows less copy pasting between alternative and reduces the number of
> +lines to update on changes.
> +
> +@item
> +When using C functions for output, the idiom @code{* return <function>;} can be

I think this should be @samp rather than @code, since it's quoting
a sample rather than a single entity (but I don't know texinfo well,
so could be wrong).

s/<function>/@var{function}/

> +replaced with the shorthand @code{<< <function>;}.
> +
> +@item
> +Following the closing ']' is any amount of whitespace, and then the actual asm

@samp or @code here too

> +output.
> +
> +@item
> +Spaces are allowed in the list (they will simply be removed).
> +
> +@item
> +All alternatives should be specified: a blank list should be "[,,]", "[,,;,]"
> +etc., not "[]" or "".

@samp for these too.  I don't think @samp{} prints anything, so ""
probably needs to be described in words.

Maybe s/All alternatives/All constraint alternatives/?  Attribute
alternatives should generally use "*" rather than be blank.

> +
> +@item
> +Within an @{@@ block both multiline and singleline C comments are allowed, but

@samp quoting here too

> +when used outside of a C block they must be the only non-whitespace blocks on
> +the line.
> +
> +@item
> +Any unexpanded iterators within the block will result in a compile time error
> +rather than accepting the generating the @code{<..>} in the output asm.  If the

Typo: "than accepting the generating the".

> +literal @code{<..>} is required it should be escaped as @code{\<..\>}.
> +
> +@item
> +Within an @{@@ block, any iterators that do not get expanded will result in an
> +error.  If for some reason it is required to have @code{<>} in the output then
> +these must be escaped using @backslashchar{}.

It sounds like these last two bullet points are saying the same thing.
If they do say the same thing, the second one seems clearer to me FWIW.

> +@item
> +The actual constraint string in the @code{match_operand} or
> +@code{match_scratch}, and the attribute string in the @code{set_attr}, must be
> +blank or an empty string (you can't combine the old and new syntaxes).
> +
> +@item
> +@code{set_attr} are optional.  If a @code{set_attr} is defined in the
> +@samp{attrs} section then that declaration can be both definition and
> +declaration.  If both @samp{attrs} and @code{set_attr} are defined for the same
> +entry then the attribute string must be empty or blank.

Took me a couple of reads to get this.  How about:

@code{set_attr}s are optional if the template includes an @code{attrs}
section.  If there is a @code{set_attr} for an attribute that is mentioned
in the @code{attrs} section, the @code{set_attr} @var{value-string} must
be empty or blank.

(Although see below for a comment about this kind of placeholder.)

> +@item
> +Additional @code{set_attr} can be specified other than the ones in the
> +@samp{attrs} list.  These must use the @samp{normal} syntax and must be defined

Should be no @samp{} around "normal".

> +after all @samp{attrs} specified.

How about "must come last"?

> +
> +In other words, the following are valid:
> +@smallexample
> +@group
> +(define_insn_and_split ""
> +  [(set (match_operand:SI 0 "nonimmediate_operand")
> +	(match_operand:SI 1 "aarch64_mov_operand"))]
> +  ""
> +  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
> +  ...

@dots{} for ellipsis.  Same for the other examples.

> +  [(set_attr "type")]
> +  [(set_attr "arch")]
> +  [(set_attr "length")]
> +  [(set_attr "foo" "mov_imm")]

Too many [ and ], should be:

  [(set_attr "type")
   (set_attr "arch")
   (set_attr "length")
   (set_attr "foo" "mov_imm")]

Same for the other examples.

> diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
> index 163e8dfef4ca2c2c92ce1cf001ee6be40a54ca3e..8ac62dc37edf4c095d694e5c7caa4499cf201334 100644
> --- a/gcc/genoutput.cc
> +++ b/gcc/genoutput.cc
> @@ -91,6 +91,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "errors.h"
>  #include "read-md.h"
>  #include "gensupport.h"
> +#include <string>

This include isn't needed any more.

>  
>  /* No instruction can have more operands than this.  Sorry for this
>     arbitrary limit, but what machine will have an instruction with
> @@ -157,6 +158,7 @@ public:
>    int n_alternatives;		/* Number of alternatives in each constraint */
>    int operand_number;		/* Operand index in the big array.  */
>    int output_format;		/* INSN_OUTPUT_FORMAT_*.  */
> +  bool compact_syntax_p;
>    struct operand_data operand[MAX_MAX_OPERANDS];
>  };
>  
> @@ -700,12 +702,57 @@ process_template (class data *d, const char *template_code)
>  	  if (sp != ep)
>  	    message_at (d->loc, "trailing whitespace in output template");
>  
> -	  while (cp < sp)
> +	  /* Check for any unexpanded iterators.  */
> +	  if (bp[0] != '*' && d->compact_syntax_p)

I assume the bp[0] != '*' condition skips the check for C code blocks.
Genuine question, but are you sure we want that?  C code often includes
asm strings (in quotes), such as for the SVE CNT[BHWD] example.

Extending the check would mean that any use of <...> for C++ templates
will need to be quoted, but explicit instantiation is pretty rare
in .md files.  It would also look weird for conditions.

Either way is fine, just asking.

>  	    {
> -	      putchar (*cp);
> -	      cp++;
> +	      const char *p = cp;
> +	      const char *last_bracket = nullptr;
> +	      while (p < sp)
> +		{
> +		  if (*p == '\\' && p + 1 < sp)
> +		    {
> +		      putchar (*p);
> +		      putchar (*(p+1));
> +		      p += 2;
> +		      continue;
> +		    }
> +
> +		  if (*p == '>' && last_bracket && *last_bracket == '<')
> +		    {
> +		      size_t len = p - last_bracket;
> +		      char *iter = XNEWVEC (char, len);
> +		      memcpy (iter, last_bracket+1, (size_t)(len - 1));
> +		      char *nl = strchr (const_cast<char*> (cp), '\n');
> +		      if (nl)
> +			*nl ='\0';
> +		      iter[len - 1] = '\0';
> +		      fatal_at (d->loc, "unresolved iterator '%s' in '%s'",
> +				iter, cp);

I think we could just use:

		      fatal_at (d->loc, "unresolved iterator '%.*s' in '%s'",
				len - 1, last_bracket + 1, cp);

and avoid the copy.

> +		    }
> +		  else if (*p == '<' || *p == '>')
> +		    last_bracket = p;
> +
> +		  putchar (*p);
> +		  p += 1;
> +		}
> +
> +	      if (last_bracket)
> +		{
> +		  char *nl = strchr (const_cast<char*> (cp), '\n');
> +		  if (nl)
> +		    *nl ='\0';
> +		  fatal_at (d->loc, "unmatched angle brackets, likely an "
> +			    "error in iterator syntax in %s", cp);
> +		}
> +	    }
> +	  else
> +	    {
> +	      while (cp < sp)
> +		putchar (*(cp++));
>  	    }
>  
> +	  cp = sp;
> +
>  	  if (!found_star)
>  	    puts ("\",");
>  	  else if (*bp != '*')
> @@ -881,6 +928,8 @@ gen_insn (md_rtx_info *info)
>    else
>      d->name = 0;
>  
> +  d->compact_syntax_p = compact_syntax.contains (insn);
> +
>    /* Build up the list in the same order as the insns are seen
>       in the machine description.  */
>    d->next = 0;
> diff --git a/gcc/gensupport.h b/gcc/gensupport.h
> index a1edfbd71908b6244b40f801c6c01074de56777e..7925e22ed418767576567cad583bddf83c0846b1 100644
> --- a/gcc/gensupport.h
> +++ b/gcc/gensupport.h
> @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
>  #ifndef GCC_GENSUPPORT_H
>  #define GCC_GENSUPPORT_H
>  
> +#include "hash-set.h"
>  #include "read-md.h"
>  
>  struct obstack;
> @@ -218,6 +219,8 @@ struct pattern_stats
>    int num_operand_vars;
>  };
>  
> +extern hash_set<rtx> compact_syntax;
> +
>  extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec);
>  extern void compute_test_codes (rtx, file_location, char *);
>  extern file_location get_file_location (rtx);
> diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
> index f9efc6eb7572a44b8bb154b0b22be3815bd0d244..f1d6b512356844da5d1dadbc69e08c16ef7a3abd 100644
> --- a/gcc/gensupport.cc
> +++ b/gcc/gensupport.cc
> @@ -27,12 +27,17 @@
>  #include "read-md.h"
>  #include "gensupport.h"
>  #include "vec.h"
> +#include <string>
> +#include <vector>
> +#include <ctype.h>
>  
>  #define MAX_OPERANDS 40
>  
>  static rtx operand_data[MAX_OPERANDS];
>  static rtx match_operand_entries_in_pattern[MAX_OPERANDS];
>  static char used_operands_numbers[MAX_OPERANDS];
> +/* List of entries which are part of the new syntax.  */
> +hash_set<rtx> compact_syntax;

Might as well make this static too, like the other vars.

>  
>  
>  /* In case some macros used by files we include need it, define this here.  */
> @@ -545,6 +550,569 @@ gen_rewrite_sequence (rtvec vec)
>    return new_vec;
>  }
>  
> +/* The following is for handling the compact syntax for constraints and
> +   attributes.
> +
> +   The normal syntax looks like this:
> +
> +       ...
> +       (match_operand: 0 "s_register_operand" "r,I,k")
> +       (match_operand: 2 "s_register_operand" "r,k,I")
> +       ...
> +       "@
> +	<asm>
> +	<asm>
> +	<asm>"
> +       ...
> +       (set_attr "length" "4,8,8")
> +
> +   The compact syntax looks like this:
> +
> +       ...
> +       (match_operand: 0 "s_register_operand")
> +       (match_operand: 2 "s_register_operand")
> +       ...
> +       {@ [cons: 0, 2; attrs: length]
> +	[r,r; 4] <asm>
> +	[I,k; 8] <asm>
> +	[k,I; 8] <asm>
> +       }
> +       ...
> +       (set_attr "length")
> +
> +   This is the only place where this syntax needs to be handled.  Relevant
> +   patterns are transformed from compact to the normal syntax before they are
> +   queued, so none of the gen* programs need to know about this syntax at all.
> +
> +   Conversion process (convert_syntax):
> +
> +   0) Check that pattern actually uses new syntax (check for {@ ... }).
> +
> +   1) Get the "layout", i.e. the "(cons: 0 2; attrs: length)" from the above

Now "[cons: 0, 2; attrs: length]"

> +      example.  cons must come first; both are optional. Set up two vecs,
> +      convec and attrvec, for holding the results of the transformation.
> +
> +   2) For each alternative: parse the list of constraints and/or attributes,
> +      and enqueue them in the relevant lists in convec and attrvec.  By the end
> +      of this process, convec[N].con and attrvec[N].con should contain regular
> +      syntax constraint/attribute lists like "r,I,k".  Copy the asm to a string
> +      as we go.
> +
> +   3) Search the rtx and write the constraint and attribute lists into the
> +      correct places. Write the asm back into the template.  */
> +
> +/* Helper class for shuffling constraints/attributes in convert_syntax and
> +   add_constraints/add_attributes.  This includes commas but not whitespace.  */
> +
> +class conlist {
> +private:
> +  std::string con;
> +
> +public:
> +  std::string name;
> +  std::string modifier;
> +  int idx = -1;
> +
> +  conlist ()
> +  {
> +  }

Now that we're C++11, it's probably more canonical to use:

  conlist () = default;

> +
> +  /* [ns..ns + len) should be a string with the id of the rtx to match
> +     i.e. if rtx is the relevant match_operand or match_scratch then
> +     [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then
> +     [ns..ns + len) should equal XSTR (rtx, 0).  */
> +  conlist (const char *ns, unsigned int len, bool numeric)
> +  {
> +    /* Trim leading whitespaces.  */
> +    while (*ns == ' ' || *ns == '\t')

I think ISSPACE (*ns) is preferred here

> +      {
> +	ns++;
> +	len--;
> +      }
> +
> +    /* Trim trailing whitespace.  */
> +    for (int i = len - 1; i >= 0; i++, len--)
> +      if (ns[len] != ' ' && ns[len] != '\t')

Similarly !ISSPACE (ns[len]) here.

> +	break;
> +
> +    /* Parse off any modifiers.  */
> +    while (!isalnum (*ns))

Should be ISALNUM (to make it independent of the locale)

> +      {
> +	modifier += *(ns++);
> +	len--;
> +      }
> +
> +    /* What remains is the name.  */
> +    name.assign (ns, len);
> +    if (numeric)
> +      idx = std::stoi(name);

Formatting: space before "(".

> +  }
> +
> +  /* Adds a character to the end of the string.  */
> +  void add (char c)
> +  {
> +    con += c;
> +  }
> +
> +  /* Output the string in the form of a brand-new char *, then effectively
> +     clear the internal string by resetting len to 0.  */
> +  char * out ()

Formatting: no need for a space before "out".

> +  {
> +    /* Final character is always a trailing comma, so strip it out.  */

trailing ',', ';' or ']', rather than just a comma?

> +    char * q;

Similarly no space before "q" here.

> +    if (modifier.empty ())
> +      q = xstrndup (con.c_str (), con.size () - 1);

Could just be "xstrdup (con.c_str ())".

> +    else
> +      {
> +	int len = con.size () + modifier.size ();
> +	q = XNEWVEC (char, len);
> +	strncpy (q, modifier.c_str (), modifier.size ());
> +	strncpy (q + modifier.size (), con.c_str (), con.size ());
> +	q[len -1] = '\0';
> +      }

Do we need the separation between "modifier" and "cons"?  It looks
like the code completes the initialisation of "modifier" before it
writes to "cons", and so we could just use a single string.

> +
> +    con.clear ();
> +    modifier.clear ();
> +    return q;
> +  }
> +};
> +
> +typedef std::vector<conlist> vec_conlist;
> +
> +/* Add constraints to an rtx. The match_operand/match_scratch that are matched
> +   must be in depth-first order i.e. read from top to bottom in the pattern.

This is no longer true (thanks).  Because of that...

> +   index is the index of the conlist we are up to so far.

...I don't think we need the index parameter or the return value.

Instead, maybe we should reset vec_conlist::idx to -1...

> +   This function is similar to remove_constraints.
> +   Errors if adding the constraints would overwrite existing constraints.
> +   Returns 1 + index of last conlist to be matched.  */
> +
> +static unsigned int
> +add_constraints (rtx part, file_location loc, unsigned int index,
> +		 vec_conlist &cons)
> +{
> +  const char *format_ptr;
> +
> +  if (part == NULL_RTX || index == cons.size ())
> +    return index;
> +
> +  /* If match_op or match_scr, check if we have the right one, and if so, copy
> +     over the constraint list.  */
> +  if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH)
> +    {
> +      int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1;
> +      int id = XINT (part, 0);
> +
> +      if (XSTR (part, field)[0] != '\0')
> +	{
> +	  error_at (loc, "can't mix normal and compact constraint syntax");
> +	  return cons.size ();
> +	}
> +      XSTR (part, field) = cons[id].out ();

...here, to indicate that the constraint has been consumed.

It would be good to check that id < cons.size ().

> +
> +      ++index;
> +    }
> +
> +  format_ptr = GET_RTX_FORMAT (GET_CODE (part));
> +
> +  /* Recursively search the rtx.  */
> +  for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++)
> +    switch (*format_ptr++)
> +      {
> +      case 'e':
> +      case 'u':
> +	index = add_constraints (XEXP (part, i), loc, index, cons);
> +	break;
> +      case 'E':
> +	if (XVEC (part, i) != NULL)
> +	  for (int j = 0; j < XVECLEN (part, i); j++)
> +	    index = add_constraints (XVECEXP (part, i, j), loc, index, cons);
> +	break;
> +      default:
> +	continue;
> +      }
> +
> +  return index;
> +}
> +
> +/* Add attributes to an rtx. The attributes that are matched must be in order
> +   i.e. read from top to bottom in the pattern.
> +   Errors if adding the attributes would overwrite existing attributes.
> +   Returns 1 + index of last conlist to be matched.  */
> +
> +static unsigned int
> +add_attributes (rtx x, file_location loc, vec_conlist &attrs)
> +{
> +  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
> +  unsigned int index = 0;
> +
> +  if (XVEC (x, attr_index) == NULL)
> +    return index;
> +
> +  for (int i = 0; i < XVECLEN (x, attr_index); ++i)
> +    {
> +      rtx part = XVECEXP (x, attr_index, i);
> +
> +      if (GET_CODE (part) != SET_ATTR)
> +	continue;
> +
> +      if (attrs[index].name.compare (XSTR (part, 0)) == 0)

Just "attrs[index].name == XSTR (part, 0)" should work.

> +	{
> +	  if (XSTR (part, 1) && XSTR (part, 1)[0] != '\0')
> +	    {
> +	      error_at (loc, "can't mix normal and compact attribute syntax");
> +	      break;
> +	    }
> +	  XSTR (part, 1) = attrs[index].out ();
> +
> +	  ++index;
> +	  if (index == attrs.size ())
> +	    break;
> +	}

It looks like you forgive mixing new-style and old-style syntax,
since there's no "else error" here.  But the documentation said
that that wasn't allowed.

Either way seems OK to me, but see the next comment.

> +    }
> +
> +  return index;
> +}
> +
> +/* Modify the attributes list to make space for the implicitly declared
> +   attributes in the attrs: list.  */
> +
> +static void
> +create_missing_attributes (rtx x, file_location /* loc */, vec_conlist &attrs)
> +{
> +  if (attrs.empty ())
> +    return;
> +
> +  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
> +  vec_conlist missing;
> +
> +  /* This is an O(n*m) loop but it's fine, both n and m will always be very
> +     small.  */

Agreed that quadraticness isn't a problem.  But I wonder how many
people would write an explicit placeholder set_attr.  Unlike match_operand
and match_scratch, a placeholder set_attr doesn't carry any additional
information.

It might be simpler to drop add_attributes and add all attributes
unconditionally in this function instead.  If the user tries to specify
the same attribute using both syntaxes, the pattern would end up with
two definitions of the same attribute, which ought to be flagged by
existing code.

> +  for (conlist cl : attrs)
> +    {
> +      bool found = false;
> +      for (int i = 0; XVEC (x, attr_index) && i < XVECLEN (x, attr_index); ++i)
> +	{
> +	  rtx part = XVECEXP (x, attr_index, i);
> +
> +	  if (GET_CODE (part) != SET_ATTR
> +	      || cl.name.compare (XSTR (part, 0)) == 0)
> +	    {
> +	      found = true;
> +	      break;
> +	    }
> +	}
> +
> +      if (!found)
> +	missing.push_back (cl);
> +    }
> +
> +  rtvec orig = XVEC (x, attr_index);
> +  size_t n_curr = orig ? XVECLEN (x, attr_index) : 0;
> +  rtvec copy = rtvec_alloc (n_curr + missing.size ());
> +
> +  /* Create a shallow copy of existing entries.  */
> +  memcpy (&copy->elem[missing.size ()], &orig->elem[0], sizeof (rtx) * n_curr);
> +  XVEC (x, attr_index) = copy;
> +
> +  /* Create the new elements.  */
> +  for (unsigned i = 0; i < missing.size (); i++)
> +    {
> +      rtx attr = rtx_alloc (SET_ATTR);
> +      XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
> +      XSTR (attr, 1) = NULL;
> +      XVECEXP (x, attr_index, i) = attr;
> +    }
> +
> +  return;
> +}
> +
> +/* Consumes spaces and tabs.  */
> +
> +static inline void
> +skip_spaces (const char **str)
> +{
> +  while (**str == ' ' || **str == '\t')

ISSPACE here too.

> +    (*str)++;
> +}
> +
> +/* Consumes the given character, if it's there.  */
> +
> +static inline bool
> +expect_char (const char **str, char c)
> +{
> +  if (**str != c)
> +    return false;
> +  (*str)++;
> +  return true;
> +}
> +
> +/* Parses the section layout that follows a "{@}" if using new syntax. Builds
> +   a vector for a single section. E.g. if we have "attrs: length arch)..."
> +   then list will have two elements, the first for "length" and the second
> +   for "arch".  */
> +
> +static void
> +parse_section_layout (const char **templ, const char *label,
> +		      vec_conlist &list, bool numeric)
> +{
> +  const char *name_start;
> +  size_t label_len = strlen (label);
> +  if (strncmp (label, *templ, label_len) == 0)
> +    {
> +      *templ += label_len;
> +
> +      /* Gather the names.  */
> +      while (**templ != ';' && **templ != ']')
> +	{
> +	  skip_spaces (templ);
> +	  name_start = *templ;
> +	  int len = 0;
> +	  char val = (*templ)[len];
> +	  while (val != ',' && val != ';' && val != ']')
> +	     val = (*templ)[++len];
> +	  *templ += len;
> +	  if (val == ',')
> +	    (*templ)++;
> +	  list.push_back (conlist (name_start, len, numeric));
> +	}
> +    }
> +}
> +
> +/* Parse a section, a section is defined as a named space separated list, e.g.
> +
> +   foo: a b c

Now comma-separated rather than space-separated.  Applies to the
example too.

> +
> +   is a section named "foo" with entries a,b and c.  */
> +
> +static void
> +parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no,
> +	       vec_conlist &list, file_location loc, const char *name)
> +{
> +  unsigned int i;
> +
> +  /* Go through the list, one character at a time, adding said character
> +     to the correct string.  */
> +  for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
> +    {
> +      if (**templ != ' ' && **templ != '\t')

!ISSPACE

> +	{
> +	  list[i].add(**templ);

Formatting: should be a space before "(".

> +	  if (**templ == ',')
> +	    {
> +	      ++i;
> +	      if (i == n_elems)
> +		fatal_at (loc, "too many %ss in alternative %d: expected %d",
> +			  name, alt_no, n_elems);
> +	    }
> +	}
> +    }
> +
> +  if (i + 1 < n_elems)
> +    fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
> +	      name, alt_no, n_elems, i);
> +
> +  list[i].add(',');
> +}
> +
> +/* The compact syntax has more convience syntaxes.  As such we post process
> +   the lines to get them back to something the normal syntax understands.  */
> +
> +static void
> +preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
> +			   std::string &last_line)
> +{
> +  /* Check if we're copying the last statement.  */
> +  if (line.find ("^") == 0 && line.size () == 1)
> +    {
> +      if (last_line.empty ())
> +	fatal_at (loc, "found instruction to copy previous line (^) in"
> +		       "alternative %d but no previous line to copy", alt_no);
> +      line = last_line;
> +      return;
> +    }
> +
> +  std::string result;
> +  std::string buffer;
> +  /* Check if we have << which means return c statement.  */
> +  if (line.find ("<<") == 0)
> +    {
> +      result.append ("* return ");
> +      result.append (line.substr (3));

Seems like this should be line.substr (2) or that the find() should
include a space after "<<".  As it stands, we'd accept <<X and drop
the X.

> +    }
> +  else
> +    result.append (line);
> +
> +  line = result;
> +  return;
> +}
> +
> +/* Converts an rtx from compact syntax to normal syntax if possible.  */
> +
> +static void
> +convert_syntax (rtx x, file_location loc)
> +{
> +  int alt_no;
> +  unsigned int index, templ_index;
> +  const char *templ;
> +  vec_conlist tconvec, convec, attrvec;
> +
> +  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
> +
> +  templ = XTMPL (x, templ_index);
> +
> +  /* Templates with constraints start with "{@".  */
> +  if (strncmp ("*{@", templ, 3))
> +    return;
> +
> +  /* Get the layout for the template.  */
> +  templ += 3;
> +  skip_spaces (&templ);
> +
> +  if (!expect_char (&templ, '['))
> +    fatal_at (loc, "expecing `[' to begin section list");
> +
> +  parse_section_layout (&templ, "cons:", tconvec, true);
> +  convec.resize (tconvec.size ());
> +
> +  /* Check for any duplicate cons entries and sort based on i.  */
> +  for (unsigned i = 0; i < tconvec.size (); i++)
> +    {
> +      int idx = tconvec[i].idx;
> +      if (convec[idx].idx >= 0)
> +	fatal_at (loc, "duplicate cons number found: %d", idx);
> +      convec[idx] = tconvec[i];
> +    }
> +  tconvec.clear ();

"convec.resize (tconvec.size ());" isn't guaranteed to be enough
if the cons: skips operands.  Either we need to calculate the
maximum idx first, or we need to grow convec on demand.

> +
> +

Nit: excess whitespace

> +  if (*templ != ']')
> +    {
> +      if (*templ == ';')
> +	skip_spaces (&(++templ));
> +      parse_section_layout (&templ, "attrs:", attrvec, false);
> +      create_missing_attributes (x, loc, attrvec);
> +    }
> +
> +  if (!expect_char (&templ, ']'))
> +    {
> +      fatal_at (loc, "expecting `]` to end section list - section list "
> +		"must have cons first, attrs second");
> +    }

Formatting nit: unnecessary braces

> +
> +  /* We will write the un-constrainified template into new_templ.  */
> +  std::string new_templ;
> +  new_templ.append ("@");
> +
> +  /* Skip to the first proper line.  */
> +  while (*templ++ != '\n');

This seems to allow anything to follow the "]".  Should we instead
use skip_spaces and then require a '\n'?

> +  alt_no = 0;
> +
> +  std::string last_line;
> +
> +  /* Process the alternatives.  */
> +  while (*(templ - 1) != '\0')
> +    {
> +      /* Copy leading whitespace.  */
> +      std::string buffer;
> +      while (*templ == ' ' || *templ == '\t')
> +	buffer += *templ++;

Why do we need to do that?  The '@' handling in genoutput.cc seems
to skip whatever space is present.

I was wondering if it was so that column numbers matched in compiler error
messages against "<<" lines, but those would already be off because of
the "* return" transformation (not an issue that needs to be fixed).

> +
> +      /* Check if we're at the end.  */
> +      if (templ[0] == '}' && templ[1] == '\0')
> +	break;
> +
> +      new_templ += '\n';
> +      new_templ.append (buffer);
> +
> +      if (expect_char (&templ, '['))
> +	{
> +	  /* Parse the constraint list, then the attribute list.  */
> +	  if (convec.size () > 0)
> +	    parse_section (&templ, convec.size (), alt_no, convec, loc,
> +			   "constraint");
> +
> +	  if (attrvec.size () > 0)
> +	    {
> +	      if (convec.size () > 0 && !expect_char (&templ, ';'))
> +		fatal_at (loc, "expected `;' to separate constraints "
> +			       "and attributes in alternative %d", alt_no);
> +
> +	      parse_section (&templ, attrvec.size (), alt_no,
> +			     attrvec, loc, "attribute");
> +	    }
> +
> +	  if (!expect_char (&templ, ']'))
> +	    fatal_at (loc, "expected end of constraint/attribute list but "
> +			   "missing an ending `]' in alternative %d", alt_no);
> +	}
> +      else if (templ[0] == '/' && templ[1] == '/')
> +	{
> +	  templ+=2;

Formatting: should be spaces around "+=".  But here, and...

> + 	  /* Glob till newline or end of string.  */
> +	  while (*templ != '\n' || *templ != '\0')
> +	    templ++;
> +	}
> +      else if (templ[0] == '/' && templ[1] == '*')
> +	{
> +	  templ+=2;
> + 	  /* Glob till newline or end of multiline comment.  */
> +	  while (templ[0] != '*' && templ[1] != '/')
> +	    templ++;
> +	  templ++;

...especially here, I think we should instead completely skip
lines with comments and then "continue", without adding anything
to new_templ for that iteration of the loop.  That would ensure
that:

(a) multi-line // comments work correctly
(b) a comment at the end gets silently dropped without adding a
    line to the new template

> +	}
> +      else
> +	fatal_at (loc, "expected constraint/attribute list at beginning of "
> +		       "alternative %d but missing a starting `['", alt_no);
> +
> +      /* Skip whitespace between list and asm.  */
> +      ++templ;
> +      skip_spaces (&templ);
> +
> +      /* Copy asm to new template.  */
> +      std::string line;
> +      while (*templ != '\n' && *templ != '\0')
> +	line += *templ++;
> +
> +      /* Apply any pre-processing needed to the line.  */
> +      preprocess_compact_syntax (loc, alt_no, line, last_line);
> +      new_templ.append (line);
> +      last_line = line;
> +
> +      /* The processing is very sensitive to whitespace, so preserve
> +	 all but the trailing ones.  */
> +      if (templ[0] == '\n')
> +	*templ++;

Is the point here that we allow the closing "}" to be on its own line?
It might be worth calling that out explicitly if so.

In other words, I'd understood this to mean something like:

    /* Normal "*..." syntax expects the closing quote to be on the final
       line of asm, whereas we allow the closing "}" to be on its own line.
       Postpone copying the '\n' until we know that there is another
       alternative in the list.  */

> +      ++alt_no;
> +    }
> +
> +  /* Write the constraints and attributes into their proper places.  */
> +  if (convec.size () > 0)
> +    {
> +      index = add_constraints (x, loc, 0, convec);
> +      if (index < convec.size ())
> +	fatal_at (loc, "could not find match_operand/scratch with id %d",
> +		  convec[index].idx);
> +    }
> +
> +  if (attrvec.size () > 0)
> +    {
> +      index = add_attributes (x, loc, attrvec);
> +      if (index < attrvec.size ())
> +	fatal_at (loc, "could not find set_attr for attribute %s",
> +		  attrvec[index].name.c_str ());
> +    }
> +
> +  /* Copy over the new un-constrainified template.  */
> +  XTMPL (x, templ_index) = xstrdup (new_templ.c_str ());
> +
> +  /* Register for later checks during iterator expansions.  */
> +  compact_syntax.add (x);
> +
> +#if DEBUG
> +  print_rtl_single (stderr, x);
> +#endif

IMO it'd be better to drop this.  It's easy enough to add locally
if that's what someone wants.  ("make mddump" would also be useful
for debugging this.)

Thanks,
Richard

> +}
> +
>  /* Process a top level rtx in some way, queuing as appropriate.  */
>  
>  static void
> @@ -553,10 +1121,12 @@ process_rtx (rtx desc, file_location loc)
>    switch (GET_CODE (desc))
>      {
>      case DEFINE_INSN:
> +      convert_syntax (desc, loc);
>        queue_pattern (desc, &define_insn_tail, loc);
>        break;
>  
>      case DEFINE_COND_EXEC:
> +      convert_syntax (desc, loc);
>        queue_pattern (desc, &define_cond_exec_tail, loc);
>        break;
>  
> @@ -631,6 +1201,7 @@ process_rtx (rtx desc, file_location loc)
>  	attr = XVEC (desc, split_code + 1);
>  	PUT_CODE (desc, DEFINE_INSN);
>  	XVEC (desc, 4) = attr;
> +	convert_syntax (desc, loc);
>  
>  	/* Queue them.  */
>  	insn_elem = queue_pattern (desc, &define_insn_tail, loc);
  
Richard Sandiford June 6, 2023, 7:47 a.m. UTC | #6
Richard Sandiford <richard.sandiford@arm.com> writes:
>> diff --git a/gcc/gensupport.h b/gcc/gensupport.h
>> index a1edfbd71908b6244b40f801c6c01074de56777e..7925e22ed418767576567cad583bddf83c0846b1 100644
>> --- a/gcc/gensupport.h
>> +++ b/gcc/gensupport.h
>> @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #ifndef GCC_GENSUPPORT_H
>>  #define GCC_GENSUPPORT_H
>>  
>> +#include "hash-set.h"
>>  #include "read-md.h"
>>  
>>  struct obstack;
>> @@ -218,6 +219,8 @@ struct pattern_stats
>>    int num_operand_vars;
>>  };
>>  
>> +extern hash_set<rtx> compact_syntax;
>> +
>>  extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec);
>>  extern void compute_test_codes (rtx, file_location, char *);
>>  extern file_location get_file_location (rtx);
>> diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
>> index f9efc6eb7572a44b8bb154b0b22be3815bd0d244..f1d6b512356844da5d1dadbc69e08c16ef7a3abd 100644
>> --- a/gcc/gensupport.cc
>> +++ b/gcc/gensupport.cc
>> @@ -27,12 +27,17 @@
>>  #include "read-md.h"
>>  #include "gensupport.h"
>>  #include "vec.h"
>> +#include <string>
>> +#include <vector>
>> +#include <ctype.h>

Also: these need to be included via defining:

#define INCLUDE_STRING
#define INCLUDE_VECTOR

before system.h.  The ctype.h include should go away after s/isalpha/ISALPHA/.

>>  
>>  #define MAX_OPERANDS 40
>>  
>>  static rtx operand_data[MAX_OPERANDS];
>>  static rtx match_operand_entries_in_pattern[MAX_OPERANDS];
>>  static char used_operands_numbers[MAX_OPERANDS];
>> +/* List of entries which are part of the new syntax.  */
>> +hash_set<rtx> compact_syntax;
>
> Might as well make this static too, like the other vars.

Sorry, ignore that.  I somehow glazed over the header file changes
directly above...

Richard
  
Tamar Christina June 6, 2023, noon UTC | #7
Hi,

Thanks for the review, just some quick responses before I make the changes:

> >    int operand_number;		/* Operand index in the big array.  */
> >    int output_format;		/* INSN_OUTPUT_FORMAT_*.  */
> > +  bool compact_syntax_p;
> >    struct operand_data operand[MAX_MAX_OPERANDS];  };
> >
> > @@ -700,12 +702,57 @@ process_template (class data *d, const char
> *template_code)
> >  	  if (sp != ep)
> >  	    message_at (d->loc, "trailing whitespace in output template");
> >
> > -	  while (cp < sp)
> > +	  /* Check for any unexpanded iterators.  */
> > +	  if (bp[0] != '*' && d->compact_syntax_p)
> 
> I assume the bp[0] != '*' condition skips the check for C code blocks.
> Genuine question, but are you sure we want that?  C code often includes asm
> strings (in quotes), such as for the SVE CNT[BHWD] example.
> 
> Extending the check would mean that any use of <...> for C++ templates will
> need to be quoted, but explicit instantiation is pretty rare in .md files.  It would
> also look weird for conditions.
> 
> Either way is fine, just asking.

I excluded it entirely to avoid also running afoul of the binary operators. So e.g.
* a < b && b > c ? foo : bar shouldn't trigger it.   It seemed more trouble than it's
worth to try to get correct.

> > +  }
> > +
> > +  /* Adds a character to the end of the string.  */  void add (char
> > + c)  {
> > +    con += c;
> > +  }
> > +
> > +  /* Output the string in the form of a brand-new char *, then effectively
> > +     clear the internal string by resetting len to 0.  */  char * out
> > + ()
> 
> Formatting: no need for a space before "out".
> 
> > +  {
> > +    /* Final character is always a trailing comma, so strip it out.
> > + */
> 
> trailing ',', ';' or ']', rather than just a comma?

Ah no, this is a bit of a lazy intercalate, when the alternatives are pushed in it's
not easy to tell how many there will be (because we don't keep track of it in this part),
so we just always add a trailing "," and ignore the last char on output.  Validation of the
alternative counts themselves is done later by the normal machinery.

> 
> > +    char * q;
> 
> Similarly no space before "q" here.
> 
> > +    if (modifier.empty ())
> > +      q = xstrndup (con.c_str (), con.size () - 1);
> 
> Could just be "xstrdup (con.c_str ())".
> 
> > +    else
> > +      {
> > +	int len = con.size () + modifier.size ();
> > +	q = XNEWVEC (char, len);
> > +	strncpy (q, modifier.c_str (), modifier.size ());
> > +	strncpy (q + modifier.size (), con.c_str (), con.size ());
> > +	q[len -1] = '\0';
> > +      }
> 
> Do we need the separation between "modifier" and "cons"?  It looks like the
> code completes the initialisation of "modifier" before it writes to "cons", and
> so we could just use a single string.

Fair point.

> > +	{
> > +	  if (XSTR (part, 1) && XSTR (part, 1)[0] != '\0')
> > +	    {
> > +	      error_at (loc, "can't mix normal and compact attribute syntax");
> > +	      break;
> > +	    }
> > +	  XSTR (part, 1) = attrs[index].out ();
> > +
> > +	  ++index;
> > +	  if (index == attrs.size ())
> > +	    break;
> > +	}
> 
> It looks like you forgive mixing new-style and old-style syntax, since there's no
> "else error" here.  But the documentation said that that wasn't allowed.
> 
> Either way seems OK to me, but see the next comment.
> 
> > +    }
> > +
> > +  return index;
> > +}
> > +
> > +/* Modify the attributes list to make space for the implicitly declared
> > +   attributes in the attrs: list.  */
> > +
> > +static void
> > +create_missing_attributes (rtx x, file_location /* loc */,
> > +vec_conlist &attrs) {
> > +  if (attrs.empty ())
> > +    return;
> > +
> > +  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
> > + vec_conlist missing;
> > +
> > +  /* This is an O(n*m) loop but it's fine, both n and m will always be very
> > +     small.  */
> 
> Agreed that quadraticness isn't a problem.  But I wonder how many people
> would write an explicit placeholder set_attr.  Unlike match_operand and
> match_scratch, a placeholder set_attr doesn't carry any additional
> information.
> 
> It might be simpler to drop add_attributes and add all attributes
> unconditionally in this function instead.  If the user tries to specify the same
> attribute using both syntaxes, the pattern would end up with two definitions
> of the same attribute, which ought to be flagged by existing code.
> 

This was done to support the (in arm backend) common thing of having attributes
which are either too complex to add inline in the new syntax or that just repeat a
value.

i.e. it's to allow cases like this:

  [(set_attr "length")
   (set_attr "predicable" "yes")
   (set_attr "predicable_short_it")
   (set_attr "arch")
   (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
		      (const_string "alu_imm")
		      (const_string "alu_sreg")))

Where your attrs contains:

  {@ [cons: =0, 1, 2; attrs: length, predicable_short_it, arch]

However you're right, I could simply say that you must omit the set_attr in attrs and just
merge the two lists?  I think that's what you were alluding to?

Cheers,
Tamar

> > +  for (conlist cl : attrs)
> > +    {
> > +      bool found = false;
> > +      for (int i = 0; XVEC (x, attr_index) && i < XVECLEN (x, attr_index); ++i)
> > +	{
> > +	  rtx part = XVECEXP (x, attr_index, i);
> > +
> > +	  if (GET_CODE (part) != SET_ATTR
> > +	      || cl.name.compare (XSTR (part, 0)) == 0)
> > +	    {
> > +	      found = true;
> > +	      break;
> > +	    }
> > +	}
> > +
> > +      if (!found)
> > +	missing.push_back (cl);
> > +    }
> > +
> > +  rtvec orig = XVEC (x, attr_index);
> > +  size_t n_curr = orig ? XVECLEN (x, attr_index) : 0;  rtvec copy =
> > + rtvec_alloc (n_curr + missing.size ());
> > +
> > +  /* Create a shallow copy of existing entries.  */  memcpy
> > + (&copy->elem[missing.size ()], &orig->elem[0], sizeof (rtx) *
> > + n_curr);  XVEC (x, attr_index) = copy;
> > +
> > +  /* Create the new elements.  */
> > +  for (unsigned i = 0; i < missing.size (); i++)
> > +    {
> > +      rtx attr = rtx_alloc (SET_ATTR);
> > +      XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
> > +      XSTR (attr, 1) = NULL;
> > +      XVECEXP (x, attr_index, i) = attr;
> > +    }
> > +
> > +  return;
> > +}
> > +
> > +/* Consumes spaces and tabs.  */
> > +
> > +static inline void
> > +skip_spaces (const char **str)
> > +{
> > +  while (**str == ' ' || **str == '\t')
> 
> ISSPACE here too.
> 
> > +    (*str)++;
> > +}
> > +
> > +/* Consumes the given character, if it's there.  */
> > +
> > +static inline bool
> > +expect_char (const char **str, char c) {
> > +  if (**str != c)
> > +    return false;
> > +  (*str)++;
> > +  return true;
> > +}
> > +
> > +/* Parses the section layout that follows a "{@}" if using new syntax. Builds
> > +   a vector for a single section. E.g. if we have "attrs: length arch)..."
> > +   then list will have two elements, the first for "length" and the second
> > +   for "arch".  */
> > +
> > +static void
> > +parse_section_layout (const char **templ, const char *label,
> > +		      vec_conlist &list, bool numeric) {
> > +  const char *name_start;
> > +  size_t label_len = strlen (label);
> > +  if (strncmp (label, *templ, label_len) == 0)
> > +    {
> > +      *templ += label_len;
> > +
> > +      /* Gather the names.  */
> > +      while (**templ != ';' && **templ != ']')
> > +	{
> > +	  skip_spaces (templ);
> > +	  name_start = *templ;
> > +	  int len = 0;
> > +	  char val = (*templ)[len];
> > +	  while (val != ',' && val != ';' && val != ']')
> > +	     val = (*templ)[++len];
> > +	  *templ += len;
> > +	  if (val == ',')
> > +	    (*templ)++;
> > +	  list.push_back (conlist (name_start, len, numeric));
> > +	}
> > +    }
> > +}
> > +
> > +/* Parse a section, a section is defined as a named space separated list, e.g.
> > +
> > +   foo: a b c
> 
> Now comma-separated rather than space-separated.  Applies to the example
> too.
> 
> > +
> > +   is a section named "foo" with entries a,b and c.  */
> > +
> > +static void
> > +parse_section (const char **templ, unsigned int n_elems, unsigned int
> alt_no,
> > +	       vec_conlist &list, file_location loc, const char *name) {
> > +  unsigned int i;
> > +
> > +  /* Go through the list, one character at a time, adding said character
> > +     to the correct string.  */
> > +  for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
> > +    {
> > +      if (**templ != ' ' && **templ != '\t')
> 
> !ISSPACE
> 
> > +	{
> > +	  list[i].add(**templ);
> 
> Formatting: should be a space before "(".
> 
> > +	  if (**templ == ',')
> > +	    {
> > +	      ++i;
> > +	      if (i == n_elems)
> > +		fatal_at (loc, "too many %ss in alternative %d: expected %d",
> > +			  name, alt_no, n_elems);
> > +	    }
> > +	}
> > +    }
> > +
> > +  if (i + 1 < n_elems)
> > +    fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
> > +	      name, alt_no, n_elems, i);
> > +
> > +  list[i].add(',');
> > +}
> > +
> > +/* The compact syntax has more convience syntaxes.  As such we post
> process
> > +   the lines to get them back to something the normal syntax
> > +understands.  */
> > +
> > +static void
> > +preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
> > +			   std::string &last_line)
> > +{
> > +  /* Check if we're copying the last statement.  */
> > +  if (line.find ("^") == 0 && line.size () == 1)
> > +    {
> > +      if (last_line.empty ())
> > +	fatal_at (loc, "found instruction to copy previous line (^) in"
> > +		       "alternative %d but no previous line to copy", alt_no);
> > +      line = last_line;
> > +      return;
> > +    }
> > +
> > +  std::string result;
> > +  std::string buffer;
> > +  /* Check if we have << which means return c statement.  */  if
> > + (line.find ("<<") == 0)
> > +    {
> > +      result.append ("* return ");
> > +      result.append (line.substr (3));
> 
> Seems like this should be line.substr (2) or that the find() should include a
> space after "<<".  As it stands, we'd accept <<X and drop the X.
> 
> > +    }
> > +  else
> > +    result.append (line);
> > +
> > +  line = result;
> > +  return;
> > +}
> > +
> > +/* Converts an rtx from compact syntax to normal syntax if possible.
> > +*/
> > +
> > +static void
> > +convert_syntax (rtx x, file_location loc) {
> > +  int alt_no;
> > +  unsigned int index, templ_index;
> > +  const char *templ;
> > +  vec_conlist tconvec, convec, attrvec;
> > +
> > +  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
> > +
> > +  templ = XTMPL (x, templ_index);
> > +
> > +  /* Templates with constraints start with "{@".  */  if (strncmp
> > + ("*{@", templ, 3))
> > +    return;
> > +
> > +  /* Get the layout for the template.  */  templ += 3;  skip_spaces
> > + (&templ);
> > +
> > +  if (!expect_char (&templ, '['))
> > +    fatal_at (loc, "expecing `[' to begin section list");
> > +
> > +  parse_section_layout (&templ, "cons:", tconvec, true);
> > + convec.resize (tconvec.size ());
> > +
> > +  /* Check for any duplicate cons entries and sort based on i.  */
> > +  for (unsigned i = 0; i < tconvec.size (); i++)
> > +    {
> > +      int idx = tconvec[i].idx;
> > +      if (convec[idx].idx >= 0)
> > +	fatal_at (loc, "duplicate cons number found: %d", idx);
> > +      convec[idx] = tconvec[i];
> > +    }
> > +  tconvec.clear ();
> 
> "convec.resize (tconvec.size ());" isn't guaranteed to be enough if the cons:
> skips operands.  Either we need to calculate the maximum idx first, or we need
> to grow convec on demand.
> 
> > +
> > +
> 
> Nit: excess whitespace
> 
> > +  if (*templ != ']')
> > +    {
> > +      if (*templ == ';')
> > +	skip_spaces (&(++templ));
> > +      parse_section_layout (&templ, "attrs:", attrvec, false);
> > +      create_missing_attributes (x, loc, attrvec);
> > +    }
> > +
> > +  if (!expect_char (&templ, ']'))
> > +    {
> > +      fatal_at (loc, "expecting `]` to end section list - section list "
> > +		"must have cons first, attrs second");
> > +    }
> 
> Formatting nit: unnecessary braces
> 
> > +
> > +  /* We will write the un-constrainified template into new_templ.  */
> > + std::string new_templ;  new_templ.append ("@");
> > +
> > +  /* Skip to the first proper line.  */  while (*templ++ != '\n');
> 
> This seems to allow anything to follow the "]".  Should we instead use
> skip_spaces and then require a '\n'?
> 
> > +  alt_no = 0;
> > +
> > +  std::string last_line;
> > +
> > +  /* Process the alternatives.  */
> > +  while (*(templ - 1) != '\0')
> > +    {
> > +      /* Copy leading whitespace.  */
> > +      std::string buffer;
> > +      while (*templ == ' ' || *templ == '\t')
> > +	buffer += *templ++;
> 
> Why do we need to do that?  The '@' handling in genoutput.cc seems to skip
> whatever space is present.
> 
> I was wondering if it was so that column numbers matched in compiler error
> messages against "<<" lines, but those would already be off because of the "*
> return" transformation (not an issue that needs to be fixed).
> 
> > +
> > +      /* Check if we're at the end.  */
> > +      if (templ[0] == '}' && templ[1] == '\0')
> > +	break;
> > +
> > +      new_templ += '\n';
> > +      new_templ.append (buffer);
> > +
> > +      if (expect_char (&templ, '['))
> > +	{
> > +	  /* Parse the constraint list, then the attribute list.  */
> > +	  if (convec.size () > 0)
> > +	    parse_section (&templ, convec.size (), alt_no, convec, loc,
> > +			   "constraint");
> > +
> > +	  if (attrvec.size () > 0)
> > +	    {
> > +	      if (convec.size () > 0 && !expect_char (&templ, ';'))
> > +		fatal_at (loc, "expected `;' to separate constraints "
> > +			       "and attributes in alternative %d", alt_no);
> > +
> > +	      parse_section (&templ, attrvec.size (), alt_no,
> > +			     attrvec, loc, "attribute");
> > +	    }
> > +
> > +	  if (!expect_char (&templ, ']'))
> > +	    fatal_at (loc, "expected end of constraint/attribute list but "
> > +			   "missing an ending `]' in alternative %d", alt_no);
> > +	}
> > +      else if (templ[0] == '/' && templ[1] == '/')
> > +	{
> > +	  templ+=2;
> 
> Formatting: should be spaces around "+=".  But here, and...
> 
> > + 	  /* Glob till newline or end of string.  */
> > +	  while (*templ != '\n' || *templ != '\0')
> > +	    templ++;
> > +	}
> > +      else if (templ[0] == '/' && templ[1] == '*')
> > +	{
> > +	  templ+=2;
> > + 	  /* Glob till newline or end of multiline comment.  */
> > +	  while (templ[0] != '*' && templ[1] != '/')
> > +	    templ++;
> > +	  templ++;
> 
> ...especially here, I think we should instead completely skip lines with
> comments and then "continue", without adding anything to new_templ for
> that iteration of the loop.  That would ensure
> that:
> 
> (a) multi-line // comments work correctly
> (b) a comment at the end gets silently dropped without adding a
>     line to the new template
> 
> > +	}
> > +      else
> > +	fatal_at (loc, "expected constraint/attribute list at beginning of "
> > +		       "alternative %d but missing a starting `['", alt_no);
> > +
> > +      /* Skip whitespace between list and asm.  */
> > +      ++templ;
> > +      skip_spaces (&templ);
> > +
> > +      /* Copy asm to new template.  */
> > +      std::string line;
> > +      while (*templ != '\n' && *templ != '\0')
> > +	line += *templ++;
> > +
> > +      /* Apply any pre-processing needed to the line.  */
> > +      preprocess_compact_syntax (loc, alt_no, line, last_line);
> > +      new_templ.append (line);
> > +      last_line = line;
> > +
> > +      /* The processing is very sensitive to whitespace, so preserve
> > +	 all but the trailing ones.  */
> > +      if (templ[0] == '\n')
> > +	*templ++;
> 
> Is the point here that we allow the closing "}" to be on its own line?
> It might be worth calling that out explicitly if so.
> 
> In other words, I'd understood this to mean something like:
> 
>     /* Normal "*..." syntax expects the closing quote to be on the final
>        line of asm, whereas we allow the closing "}" to be on its own line.
>        Postpone copying the '\n' until we know that there is another
>        alternative in the list.  */
> 
> > +      ++alt_no;
> > +    }
> > +
> > +  /* Write the constraints and attributes into their proper places.
> > +*/
> > +  if (convec.size () > 0)
> > +    {
> > +      index = add_constraints (x, loc, 0, convec);
> > +      if (index < convec.size ())
> > +	fatal_at (loc, "could not find match_operand/scratch with id %d",
> > +		  convec[index].idx);
> > +    }
> > +
> > +  if (attrvec.size () > 0)
> > +    {
> > +      index = add_attributes (x, loc, attrvec);
> > +      if (index < attrvec.size ())
> > +	fatal_at (loc, "could not find set_attr for attribute %s",
> > +		  attrvec[index].name.c_str ());
> > +    }
> > +
> > +  /* Copy over the new un-constrainified template.  */  XTMPL (x,
> > + templ_index) = xstrdup (new_templ.c_str ());
> > +
> > +  /* Register for later checks during iterator expansions.  */
> > + compact_syntax.add (x);
> > +
> > +#if DEBUG
> > +  print_rtl_single (stderr, x);
> > +#endif
> 
> IMO it'd be better to drop this.  It's easy enough to add locally if that's what
> someone wants.  ("make mddump" would also be useful for debugging this.)
> 
> Thanks,
> Richard
> 
> > +}
> > +
> >  /* Process a top level rtx in some way, queuing as appropriate.  */
> >
> >  static void
> > @@ -553,10 +1121,12 @@ process_rtx (rtx desc, file_location loc)
> >    switch (GET_CODE (desc))
> >      {
> >      case DEFINE_INSN:
> > +      convert_syntax (desc, loc);
> >        queue_pattern (desc, &define_insn_tail, loc);
> >        break;
> >
> >      case DEFINE_COND_EXEC:
> > +      convert_syntax (desc, loc);
> >        queue_pattern (desc, &define_cond_exec_tail, loc);
> >        break;
> >
> > @@ -631,6 +1201,7 @@ process_rtx (rtx desc, file_location loc)
> >  	attr = XVEC (desc, split_code + 1);
> >  	PUT_CODE (desc, DEFINE_INSN);
> >  	XVEC (desc, 4) = attr;
> > +	convert_syntax (desc, loc);
> >
> >  	/* Queue them.  */
> >  	insn_elem = queue_pattern (desc, &define_insn_tail, loc);
  
Richard Sandiford June 6, 2023, 12:49 p.m. UTC | #8
Tamar Christina <Tamar.Christina@arm.com> writes:
>> >    int operand_number;		/* Operand index in the big array.  */
>> >    int output_format;		/* INSN_OUTPUT_FORMAT_*.  */
>> > +  bool compact_syntax_p;
>> >    struct operand_data operand[MAX_MAX_OPERANDS];  };
>> >
>> > @@ -700,12 +702,57 @@ process_template (class data *d, const char
>> *template_code)
>> >  	  if (sp != ep)
>> >  	    message_at (d->loc, "trailing whitespace in output template");
>> >
>> > -	  while (cp < sp)
>> > +	  /* Check for any unexpanded iterators.  */
>> > +	  if (bp[0] != '*' && d->compact_syntax_p)
>> 
>> I assume the bp[0] != '*' condition skips the check for C code blocks.
>> Genuine question, but are you sure we want that?  C code often includes asm
>> strings (in quotes), such as for the SVE CNT[BHWD] example.
>> 
>> Extending the check would mean that any use of <...> for C++ templates will
>> need to be quoted, but explicit instantiation is pretty rare in .md files.  It would
>> also look weird for conditions.
>> 
>> Either way is fine, just asking.
>
> I excluded it entirely to avoid also running afoul of the binary operators. So e.g.
> * a < b && b > c ? foo : bar shouldn't trigger it.   It seemed more trouble than it's
> worth to try to get correct.

Yeah.  I agree it's probably better to skip.

>> > +  }
>> > +
>> > +  /* Adds a character to the end of the string.  */  void add (char
>> > + c)  {
>> > +    con += c;
>> > +  }
>> > +
>> > +  /* Output the string in the form of a brand-new char *, then effectively
>> > +     clear the internal string by resetting len to 0.  */  char * out
>> > + ()
>> 
>> Formatting: no need for a space before "out".
>> 
>> > +  {
>> > +    /* Final character is always a trailing comma, so strip it out.
>> > + */
>> 
>> trailing ',', ';' or ']', rather than just a comma?
>
> Ah no, this is a bit of a lazy intercalate, when the alternatives are pushed in it's
> not easy to tell how many there will be (because we don't keep track of it in this part),
> so we just always add a trailing "," and ignore the last char on output.  Validation of the
> alternative counts themselves is done later by the normal machinery.

Ah, I get it now, thanks.

>> > +    }
>> > +
>> > +  return index;
>> > +}
>> > +
>> > +/* Modify the attributes list to make space for the implicitly declared
>> > +   attributes in the attrs: list.  */
>> > +
>> > +static void
>> > +create_missing_attributes (rtx x, file_location /* loc */,
>> > +vec_conlist &attrs) {
>> > +  if (attrs.empty ())
>> > +    return;
>> > +
>> > +  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
>> > + vec_conlist missing;
>> > +
>> > +  /* This is an O(n*m) loop but it's fine, both n and m will always be very
>> > +     small.  */
>> 
>> Agreed that quadraticness isn't a problem.  But I wonder how many people
>> would write an explicit placeholder set_attr.  Unlike match_operand and
>> match_scratch, a placeholder set_attr doesn't carry any additional
>> information.
>> 
>> It might be simpler to drop add_attributes and add all attributes
>> unconditionally in this function instead.  If the user tries to specify the same
>> attribute using both syntaxes, the pattern would end up with two definitions
>> of the same attribute, which ought to be flagged by existing code.
>> 
>
> This was done to support the (in arm backend) common thing of having attributes
> which are either too complex to add inline in the new syntax or that just repeat a
> value.
>
> i.e. it's to allow cases like this:
>
>   [(set_attr "length")
>    (set_attr "predicable" "yes")
>    (set_attr "predicable_short_it")
>    (set_attr "arch")
>    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
> 		      (const_string "alu_imm")
> 		      (const_string "alu_sreg")))
>
> Where your attrs contains:
>
>   {@ [cons: =0, 1, 2; attrs: length, predicable_short_it, arch]

Yeah, agree it needs to be possible to define things like "type"
in this way.

> However you're right, I could simply say that you must omit the set_attr in attrs and just
> merge the two lists?  I think that's what you were alluding to?

Yeah, that's right.  Or just concatenate them and rely on later
error checking (which should give reasonable diagnostics).

Thanks,
Richard
  
Richard Earnshaw (lists) June 6, 2023, 4:13 p.m. UTC | #9
On 06/06/2023 13:49, Richard Sandiford via Gcc-patches wrote:
> Tamar Christina <Tamar.Christina@arm.com> writes:
>>>>     int operand_number;		/* Operand index in the big array.  */
>>>>     int output_format;		/* INSN_OUTPUT_FORMAT_*.  */
>>>> +  bool compact_syntax_p;
>>>>     struct operand_data operand[MAX_MAX_OPERANDS];  };
>>>>
>>>> @@ -700,12 +702,57 @@ process_template (class data *d, const char
>>> *template_code)
>>>>   	  if (sp != ep)
>>>>   	    message_at (d->loc, "trailing whitespace in output template");
>>>>
>>>> -	  while (cp < sp)
>>>> +	  /* Check for any unexpanded iterators.  */
>>>> +	  if (bp[0] != '*' && d->compact_syntax_p)
>>>
>>> I assume the bp[0] != '*' condition skips the check for C code blocks.
>>> Genuine question, but are you sure we want that?  C code often includes asm
>>> strings (in quotes), such as for the SVE CNT[BHWD] example.
>>>
>>> Extending the check would mean that any use of <...> for C++ templates will
>>> need to be quoted, but explicit instantiation is pretty rare in .md files.  It would
>>> also look weird for conditions.
>>>
>>> Either way is fine, just asking.
>>
>> I excluded it entirely to avoid also running afoul of the binary operators. So e.g.
>> * a < b && b > c ? foo : bar shouldn't trigger it.   It seemed more trouble than it's
>> worth to try to get correct.
> 
> Yeah.  I agree it's probably better to skip.
> 
>>>> +  }
>>>> +
>>>> +  /* Adds a character to the end of the string.  */  void add (char
>>>> + c)  {
>>>> +    con += c;
>>>> +  }
>>>> +
>>>> +  /* Output the string in the form of a brand-new char *, then effectively
>>>> +     clear the internal string by resetting len to 0.  */  char * out
>>>> + ()
>>>
>>> Formatting: no need for a space before "out".
>>>
>>>> +  {
>>>> +    /* Final character is always a trailing comma, so strip it out.
>>>> + */
>>>
>>> trailing ',', ';' or ']', rather than just a comma?
>>
>> Ah no, this is a bit of a lazy intercalate, when the alternatives are pushed in it's
>> not easy to tell how many there will be (because we don't keep track of it in this part),
>> so we just always add a trailing "," and ignore the last char on output.  Validation of the
>> alternative counts themselves is done later by the normal machinery.
> 
> Ah, I get it now, thanks.
> 
>>>> +    }
>>>> +
>>>> +  return index;
>>>> +}
>>>> +
>>>> +/* Modify the attributes list to make space for the implicitly declared
>>>> +   attributes in the attrs: list.  */
>>>> +
>>>> +static void
>>>> +create_missing_attributes (rtx x, file_location /* loc */,
>>>> +vec_conlist &attrs) {
>>>> +  if (attrs.empty ())
>>>> +    return;
>>>> +
>>>> +  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
>>>> + vec_conlist missing;
>>>> +
>>>> +  /* This is an O(n*m) loop but it's fine, both n and m will always be very
>>>> +     small.  */
>>>
>>> Agreed that quadraticness isn't a problem.  But I wonder how many people
>>> would write an explicit placeholder set_attr.  Unlike match_operand and
>>> match_scratch, a placeholder set_attr doesn't carry any additional
>>> information.
>>>
>>> It might be simpler to drop add_attributes and add all attributes
>>> unconditionally in this function instead.  If the user tries to specify the same
>>> attribute using both syntaxes, the pattern would end up with two definitions
>>> of the same attribute, which ought to be flagged by existing code.
>>>
>>
>> This was done to support the (in arm backend) common thing of having attributes
>> which are either too complex to add inline in the new syntax or that just repeat a
>> value.
>>
>> i.e. it's to allow cases like this:
>>
>>    [(set_attr "length")
>>     (set_attr "predicable" "yes")
>>     (set_attr "predicable_short_it")
>>     (set_attr "arch")
>>     (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
>> 		      (const_string "alu_imm")
>> 		      (const_string "alu_sreg")))
>>
>> Where your attrs contains:
>>
>>    {@ [cons: =0, 1, 2; attrs: length, predicable_short_it, arch]
> 
> Yeah, agree it needs to be possible to define things like "type"
> in this way.

You also want it for the case where every alternative takes the same 
value, eg the "predicable - yes" attr.

R.

> 
>> However you're right, I could simply say that you must omit the set_attr in attrs and just
>> merge the two lists?  I think that's what you were alluding to?
> 
> Yeah, that's right.  Or just concatenate them and rely on later
> error checking (which should give reasonable diagnostics).
> 
> Thanks,
> Richard
  
Andreas Schwab June 8, 2023, 10:12 a.m. UTC | #10
On Jun 08 2023, Tamar Christina via Gcc-patches wrote:

> @@ -713,6 +714,183 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template:
>  @end group
>  @end smallexample
>  
> +@node Compact Syntax
> +@section Compact Syntax
> +@cindex compact syntax
> +
> +In cases where the number of alternatives in a @code{define_insn} or
> +@code{define_insn_and_split} are large then it may be beneficial to use the

                                is large
  
Richard Earnshaw (lists) June 8, 2023, 10:29 a.m. UTC | #11
On 08/06/2023 11:12, Andreas Schwab wrote:
> On Jun 08 2023, Tamar Christina via Gcc-patches wrote:
> 
>> @@ -713,6 +714,183 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template:
>>   @end group
>>   @end smallexample
>>   
>> +@node Compact Syntax
>> +@section Compact Syntax
>> +@cindex compact syntax
>> +
>> +In cases where the number of alternatives in a @code{define_insn} or
>> +@code{define_insn_and_split} are large then it may be beneficial to use the
> 
>                                  is large
> 

Or perhaps better still:

When a define_insn or define_insn_and split has many alternatives it may 
be beneficial to ...

R.
  
Richard Earnshaw (lists) June 8, 2023, 10:33 a.m. UTC | #12
On 08/06/2023 11:29, Richard Earnshaw (lists) via Gcc-patches wrote:
> On 08/06/2023 11:12, Andreas Schwab wrote:
>> On Jun 08 2023, Tamar Christina via Gcc-patches wrote:
>>
>>> @@ -713,6 +714,183 @@ you can use @samp{*} inside of a @samp{@@} 
>>> multi-alternative template:
>>>   @end group
>>>   @end smallexample
>>> +@node Compact Syntax
>>> +@section Compact Syntax
>>> +@cindex compact syntax
>>> +
>>> +In cases where the number of alternatives in a @code{define_insn} or
>>> +@code{define_insn_and_split} are large then it may be beneficial to 
>>> use the
>>
>>                                  is large
>>
> 
> Or perhaps better still:
> 
> When a define_insn or define_insn_and split has many alternatives it may 
> be beneficial to ...
> 
> R.

Or perhaps even s/many/multiple/.  It doesn't have to have very many to 
make this new syntax preferable, IMO.

R.
  
Richard Sandiford June 8, 2023, 2:24 p.m. UTC | #13
In addition to Andreas's and Richard's comments:

Tamar Christina <Tamar.Christina@arm.com> writes:
> +@item
> +@samp{@{@@} is followed by a layout in parentheses which is @samp{cons:} followed by
> +a list of @code{match_operand}/@code{match_scratch} comma operand numbers, then a

How about:

  a comma-separated list of @code{match_operand}/@code{match_scratch} operand
  numbers, then a

Some lines are >80 chars.

> +semicolon, followed by the same for attributes (@samp{attrs:}).  Operand
> +modifiers can be placed in this section group as well.  Both sections
> +are optional (so you can use only @samp{cons}, or only @samp{attrs}, or both),
> +and @samp{cons} must come before @samp{attrs} if present.
> +
> +@item
> +Each alternative begins with any amount of whitespace.
> +
> +@item
> +Following the whitespace is a comma-separated list of "constraints" and/or
> +"attributes" within brackets @code{[]}, with sections separated by a semicolon.
> +
> +@item
> +Should you want to copy the previous asm line, the symbol @code{^} can be used.
> +This allows less copy pasting between alternative and reduces the number of
> +lines to update on changes.
> +
> +@item
> +When using C functions for output, the idiom @samp{* return <function>;} can be

@samp{* return @var{function};}

> +replaced with the shorthand @samp{<< @var{function};}.
> +
> +@item
> +Following the closing @samp{]} is any amount of whitespace, and then the actual
> +asm output.
> +
> +@item
> +Spaces are allowed in the list (they will simply be removed).
> +
> +@item
> +All constraint alternatives should be specified: a blank list should be
> +@samp{[,,]} or generally use @samp{*} for the alternatives. e.g. @samp{[*,*,*]}.

I think this is mixing two things.  How about:

@item
All constraint alternatives should be specified.  For example, a list of
of three blank alternatives should be written @samp{[,,]} rather than
@samp{[]}.

@item
All attribute alternatives should be non-empty, with @samp{*}
representing the default attribute value.  For example, a list of three
default attribute values should be written @samp{[*,*,*]} rather than
@samp{[]}.

> +
> +@item
> +Within an @samp{@{@@} block both multiline and singleline C comments are
> +allowed, but when used outside of a C block they must be the only non-whitespace
> +blocks on the line.
> +
> +@item
> +Within an @samp{@{@@} block, any iterators that do not get expanded will result in an
> +error.  If for some reason it is required to have @code{<>} in the output then

Maybe better as:

s/@code{<>}/@code{<} or @code{>}/

> +these must be escaped using @backslashchar{}.
> +
> +@item
> +The actual constraint string in the @code{match_operand} or
> +@code{match_scratch}, and the attribute string in the @code{set_attr}, must be
> +blank or an empty string (you can't combine the old and new syntaxes).

It looks like the new version drops support for the set_attr case though
(thanks).

> +
> +@item
> +Additional @code{set_attr} can be specified other than the ones in the
> +@samp{attrs} list.  These must use the normal syntax and must come last.  There
> +must not be any overlap between the two lists.

Similarly here: I don't think the “they must come last” bit applies
any more.  How about something like:

  It is possible to use the @samp{attrs} list to specify some attributes
  and to use the normal @code{set_attr} syntax to specify other attributes.
  There must not be any overlap between the two lists.

> +
> +In other words, the following is valid:
> +@smallexample
> +@group
> +(define_insn_and_split ""
> +  [(set (match_operand:SI 0 "nonimmediate_operand")
> +       (match_operand:SI 1 "aarch64_mov_operand"))]
> +  ""
> +  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
> +  @dots{}
> +  [(set_attr "foo" "mov_imm")]
> +)
> +@end group
> +@end smallexample
> +
> +but these are not valid:
> +@smallexample
> +@group
> +(define_insn_and_split ""
> +  [(set (match_operand:SI 0 "nonimmediate_operand")
> +       (match_operand:SI 1 "aarch64_mov_operand"))]
> +  ""
> +  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
> +  @dots{}
> +  [(set_attr "type")
> +   (set_attr "arch")
> +   (set_attr "foo" "mov_imm")]
> +)
> +@end group
> +@end smallexample
> +
> +and
> +
> +@smallexample
> +@group
> +(define_insn_and_split ""
> +  [(set (match_operand:SI 0 "nonimmediate_operand")
> +       (match_operand:SI 1 "aarch64_mov_operand"))]
> +  ""
> +  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
> +  @dots{}
> +  [(set_attr "type")
> +   (set_attr "foo" "mov_imm")
> +   (set_attr "arch")
> +   (set_attr "length")]
> +)
> +@end group
> +@end smallexample
> +
> +because the order of the entries don't match and new entries must be last.
> +@end itemize

These examples probably need updating too.

> +
>  @node Predicates
>  @section Predicates
>  @cindex predicates
> diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
> index 163e8dfef4ca2c2c92ce1cf001ee6be40a54ca3e..7088f816cfa6e6ab2c1f51b8bbaa5eae990a0a4b 100644
> --- a/gcc/genoutput.cc
> +++ b/gcc/genoutput.cc
> @@ -157,6 +157,7 @@ public:
>    int n_alternatives;          /* Number of alternatives in each constraint */
>    int operand_number;          /* Operand index in the big array.  */
>    int output_format;           /* INSN_OUTPUT_FORMAT_*.  */
> +  bool compact_syntax_p;
>    struct operand_data operand[MAX_MAX_OPERANDS];
>  };
>
> @@ -700,12 +701,51 @@ process_template (class data *d, const char *template_code)
>           if (sp != ep)
>             message_at (d->loc, "trailing whitespace in output template");
>
> -         while (cp < sp)
> +         /* Check for any unexpanded iterators.  */
> +         if (bp[0] != '*' && d->compact_syntax_p)
>             {
> -             putchar (*cp);
> -             cp++;
> +             const char *p = cp;
> +             const char *last_bracket = nullptr;
> +             while (p < sp)
> +               {
> +                 if (*p == '\\' && p + 1 < sp)
> +                   {
> +                     putchar (*p);
> +                     putchar (*(p+1));
> +                     p += 2;
> +                     continue;
> +                   }
> +
> +                 if (*p == '>' && last_bracket && *last_bracket == '<')
> +                   {
> +                     int len = p - last_bracket;
> +                     fatal_at (d->loc, "unresolved iterator '%.*s' in '%s'",
> +                               len - 1, last_bracket + 1, cp);
> +                   }
> +                 else if (*p == '<' || *p == '>')
> +                   last_bracket = p;
> +
> +                 putchar (*p);
> +                 p += 1;
> +               }
> +
> +             if (last_bracket)
> +               {
> +                 char *nl = strchr (const_cast<char*> (cp), '\n');
> +                 if (nl)
> +                   *nl ='\0';

Nit: missing space after "=".

> +                 fatal_at (d->loc, "unmatched angle brackets, likely an "
> +                           "error in iterator syntax in %s", cp);
> +               }
> +           }
> +         else
> +           {
> +             while (cp < sp)
> +               putchar (*(cp++));
>             }
>
> +         cp = sp;
> +
>           if (!found_star)
>             puts ("\",");
>           else if (*bp != '*')
> @@ -881,6 +921,8 @@ gen_insn (md_rtx_info *info)
>    else
>      d->name = 0;
>
> +  d->compact_syntax_p = compact_syntax.contains (insn);
> +
>    /* Build up the list in the same order as the insns are seen
>       in the machine description.  */
>    d->next = 0;
> diff --git a/gcc/gensupport.h b/gcc/gensupport.h
> index a1edfbd71908b6244b40f801c6c01074de56777e..7925e22ed418767576567cad583bddf83c0846b1 100644
> --- a/gcc/gensupport.h
> +++ b/gcc/gensupport.h
> @@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
>  #ifndef GCC_GENSUPPORT_H
>  #define GCC_GENSUPPORT_H
>
> +#include "hash-set.h"
>  #include "read-md.h"
>
>  struct obstack;
> @@ -218,6 +219,8 @@ struct pattern_stats
>    int num_operand_vars;
>  };
>
> +extern hash_set<rtx> compact_syntax;
> +
>  extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec);
>  extern void compute_test_codes (rtx, file_location, char *);
>  extern file_location get_file_location (rtx);
> diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
> index f9efc6eb7572a44b8bb154b0b22be3815bd0d244..507285e8fef5443ec42501447e730022e1426ee1 100644
> --- a/gcc/gensupport.cc
> +++ b/gcc/gensupport.cc
> @@ -18,6 +18,8 @@
>     <http://www.gnu.org/licenses/>.  */
>
>  #include "bconfig.h"
> +#define INCLUDE_STRING
> +#define INCLUDE_VECTOR
>  #include "system.h"
>  #include "coretypes.h"
>  #include "tm.h"
> @@ -33,6 +35,8 @@
>  static rtx operand_data[MAX_OPERANDS];
>  static rtx match_operand_entries_in_pattern[MAX_OPERANDS];
>  static char used_operands_numbers[MAX_OPERANDS];
> +/* List of entries which are part of the new syntax.  */
> +hash_set<rtx> compact_syntax;
>
>
>  /* In case some macros used by files we include need it, define this here.  */
> @@ -545,6 +549,526 @@ gen_rewrite_sequence (rtvec vec)
>    return new_vec;
>  }
>
> +/* The following is for handling the compact syntax for constraints and
> +   attributes.
> +
> +   The normal syntax looks like this:
> +
> +       ...
> +       (match_operand: 0 "s_register_operand" "r,I,k")
> +       (match_operand: 2 "s_register_operand" "r,k,I")
> +       ...
> +       "@
> +       <asm>
> +       <asm>
> +       <asm>"
> +       ...
> +       (set_attr "length" "4,8,8")
> +
> +   The compact syntax looks like this:
> +
> +       ...
> +       (match_operand: 0 "s_register_operand")
> +       (match_operand: 2 "s_register_operand")
> +       ...
> +       {@ [cons: 0, 2; attrs: length]
> +       [r,r; 4] <asm>
> +       [I,k; 8] <asm>
> +       [k,I; 8] <asm>
> +       }
> +       ...
> +       (set_attr "length")

Similarly here, I think the final set_attr is no longer expected.

> +
> +   This is the only place where this syntax needs to be handled.  Relevant
> +   patterns are transformed from compact to the normal syntax before they are
> +   queued, so none of the gen* programs need to know about this syntax at all.
> +
> +   Conversion process (convert_syntax):
> +
> +   0) Check that pattern actually uses new syntax (check for {@ ... }).
> +
> +   1) Get the "layout", i.e. the "[cons: 0 2; attrs: length]" from the above
> +      example.  cons must come first; both are optional. Set up two vecs,
> +      convec and attrvec, for holding the results of the transformation.
> +
> +   2) For each alternative: parse the list of constraints and/or attributes,
> +      and enqueue them in the relevant lists in convec and attrvec.  By the end
> +      of this process, convec[N].con and attrvec[N].con should contain regular
> +      syntax constraint/attribute lists like "r,I,k".  Copy the asm to a string
> +      as we go.
> +
> +   3) Search the rtx and write the constraint and attribute lists into the
> +      correct places. Write the asm back into the template.  */
> +
> +/* Helper class for shuffling constraints/attributes in convert_syntax and
> +   add_constraints/add_attributes.  This includes commas but not whitespace.  */
> +
> +class conlist {
> +private:
> +  std::string con;
> +
> +public:
> +  std::string name;
> +  int idx = -1;
> +
> +  conlist () = default;
> +
> +  /* [ns..ns + len) should be a string with the id of the rtx to match
> +     i.e. if rtx is the relevant match_operand or match_scratch then
> +     [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then
> +     [ns..ns + len) should equal XSTR (rtx, 0).  */
> +  conlist (const char *ns, unsigned int len, bool numeric)
> +  {
> +    /* Trim leading whitespaces.  */
> +    while (ISSPACE (*ns))
> +      {
> +       ns++;
> +       len--;
> +      }
> +
> +    /* Trim trailing whitespace.  */
> +    for (int i = len - 1; i >= 0; i++, len--)
> +      if (!ISSPACE (*ns))
> +       break;

As you pointed out off-list, these should be ISBLANK rather than ISSPACE.
Sorry for missing the effect on '\n'.

> +    /* Parse off any modifiers.  */
> +    while (!ISALNUM (*ns))
> +      {
> +       con += *(ns++);
> +       len--;
> +      }
> +
> +    name.assign (ns, len);
> +    if (numeric)
> +      idx = std::stoi (name);
> +  }
> +
> +  /* Adds a character to the end of the string.  */
> +  void add (char c)
> +  {
> +    con += c;
> +  }
> +
> +  /* Output the string in the form of a brand-new char *, then effectively
> +     clear the internal string by resetting len to 0.  */
> +  char *out ()
> +  {
> +    /* Final character is always a trailing comma, so strip it out.  */
> +    char *q = xstrndup (con.c_str (), con.size () - 1);
> +    con.clear ();
> +    return q;
> +  }
> +};
> +
> +typedef std::vector<conlist> vec_conlist;
> +
> +/* Add constraints to an rtx.  This function is similar to remove_constraints.
> +   Errors if adding the constraints would overwrite existing constraints.  */
> +
> +static void
> +add_constraints (rtx part, file_location loc, vec_conlist &cons)
> +{
> +  const char *format_ptr;
> +
> +  if (part == NULL_RTX)
> +    return;
> +
> +  /* If match_op or match_scr, check if we have the right one, and if so, copy
> +     over the constraint list.  */
> +  if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH)
> +    {
> +      int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1;
> +      unsigned id = XINT (part, 0);
> +
> +      if (id >= cons.size ())
> +       fatal_at (loc, "could not find match_operand/scratch with id %d", id);
> +
> +      if (cons[id].idx == -1)
> +       {
> +         error_at (loc, "constructor %d encountered more than once", id);
> +         return;
> +       }

-1 can also mean that the cons: didn't list "id" at all.  So if we want
to detect duplicates, we would need to assign something else (such as -2)
when consuming a constraint.  But I don't think it's necessary to check
for duplicates here, since later code should do that for both syntaxes.

So there are two options:

(1) Return early without error if "cons[id].idx == -1".
    Continue to use "cons[id].idx = -1" when consuming the constraint.
    
(2) Return early without error if "cons[id].idx == -1".
    Flag an error if "cons[id].idx == -2".
    Use "cons[id].idx = -2" when consuming the constraint.

I was thinking of (1), but (2) would also be OK if you prefer.

> +      if (XSTR (part, field)[0] != '\0')
> +       {
> +         error_at (loc, "can't mix normal and compact constraint syntax");
> +         return;
> +       }
> +      XSTR (part, field) = cons[id].out ();
> +      cons[id].idx = -1;
> +    }
> +
> +  format_ptr = GET_RTX_FORMAT (GET_CODE (part));
> +
> +  /* Recursively search the rtx.  */
> +  for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++)
> +    switch (*format_ptr++)
> +      {
> +      case 'e':
> +      case 'u':
> +       add_constraints (XEXP (part, i), loc, cons);
> +       break;
> +      case 'E':
> +       if (XVEC (part, i) != NULL)
> +         for (int j = 0; j < XVECLEN (part, i); j++)
> +           add_constraints (XVECEXP (part, i, j), loc, cons);
> +       break;
> +      default:
> +       continue;
> +      }
> +}
> +
> +/* Add attributes to an rtx. The attributes that are matched must be in order
> +   i.e. read from top to bottom in the pattern.
> +   Errors if adding the attributes would overwrite existing attributes.
> +   Returns 1 + index of last conlist to be matched.  */
> +
> +static unsigned int
> +add_attributes (rtx x, file_location loc, vec_conlist &attrs)
> +{
> +  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
> +  unsigned int index = 0;
> +
> +  if (XVEC (x, attr_index) == NULL)
> +    return index;
> +
> +  for (int i = 0; i < XVECLEN (x, attr_index); ++i)
> +    {
> +      rtx part = XVECEXP (x, attr_index, i);
> +
> +      if (GET_CODE (part) != SET_ATTR)
> +       continue;
> +
> +      if (attrs[index].name == XSTR (part, 0))
> +       {
> +         if (XSTR (part, 1) && XSTR (part, 1)[0] != '\0')
> +           fatal_at (loc, "can't mix normal and compact attribute syntax");
> +         XSTR (part, 1) = attrs[index].out ();
> +
> +         if (++index == attrs.size ())
> +           break;
> +       }
> +      else
> +       fatal_at (loc, "can't mix normal and compact attribute syntax");
> +    }
> +
> +  return index;
> +}
> +
> +/* Modify the attributes list to make space for the implicitly declared
> +   attributes in the attrs: list.  */
> +
> +static void
> +create_missing_attributes (rtx x, file_location /* loc */, vec_conlist &attrs)
> +{
> +  if (attrs.empty ())
> +    return;
> +
> +  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
> +  rtvec orig = XVEC (x, attr_index);
> +  size_t n_curr = orig ? XVECLEN (x, attr_index) : 0;
> +  rtvec copy = rtvec_alloc (n_curr + attrs.size ());
> +
> +  /* Create a shallow copy of existing entries.  */
> +  memcpy (&copy->elem[attrs.size ()], &orig->elem[0], sizeof (rtx) * n_curr);
> +  XVEC (x, attr_index) = copy;
> +
> +  /* Create the new elements.  */
> +  for (unsigned i = 0; i < attrs.size (); i++)
> +    {
> +      rtx attr = rtx_alloc (SET_ATTR);
> +      XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
> +      XSTR (attr, 1) = NULL;
> +      XVECEXP (x, attr_index, i) = attr;
> +    }
> +
> +  return;
> +}

I think we should be able to combine the last two functions into
something like:

/* Add ATTRS to definition X's attribute list.  */

static void
add_attributes (rtx x, vec_conlist &attrs)
{
  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
  rtvec orig = XVEC (x, attr_index);
  size_t n_curr = orig ? XVECLEN (x, attr_index) : 0;
  rtvec copy = rtvec_alloc (n_curr + attrs.size ());

  /* Create a shallow copy of existing entries.  */
  memcpy (&copy->elem[attrs.size ()], &orig->elem[0], sizeof (rtx) * n_curr);
  XVEC (x, attr_index) = copy;

  /* Create the new elements.  */
  for (unsigned i = 0; i < attrs.size (); i++)
    {
      rtx attr = rtx_alloc (SET_ATTR);
      XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
      XSTR (attr, 1) = attrs[i].out ();
      XVECEXP (x, attr_index, i) = attr;
    }
}

Then drop the call to create_missing_attributes and replace:

  if (attrvec.size () > 0)
    {
      index = add_attributes (x, loc, attrvec);
      if (index < attrvec.size ())
       fatal_at (loc, "could not find set_attr for attribute %s",
                 attrvec[index].name.c_str ());
    }

with:

  if (attrvec.size () > 0)
    add_attributes (x, attrs);

> +
> +/* Consumes spaces and tabs.  */
> +
> +static inline void
> +skip_spaces (const char **str)
> +{
> +  while (ISSPACE (**str))

ISBLANK here too (sorry).

> +    (*str)++;
> +}
> +
> +/* Consumes the given character, if it's there.  */
> +
> +static inline bool
> +expect_char (const char **str, char c)
> +{
> +  if (**str != c)
> +    return false;
> +  (*str)++;
> +  return true;
> +}
> +
> +/* Parses the section layout that follows a "{@}" if using new syntax. Builds

"{@"

> +   a vector for a single section. E.g. if we have "attrs: length arch]..."

"attrs: length, arch]..."

> +   then list will have two elements, the first for "length" and the second
> +   for "arch".  */
> +
> +static void
> +parse_section_layout (const char **templ, const char *label,
> +                     vec_conlist &list, bool numeric)
> +{
> +  const char *name_start;
> +  size_t label_len = strlen (label);
> +  if (strncmp (label, *templ, label_len) == 0)
> +    {
> +      *templ += label_len;
> +
> +      /* Gather the names.  */
> +      while (**templ != ';' && **templ != ']')
> +       {
> +         skip_spaces (templ);
> +         name_start = *templ;
> +         int len = 0;
> +         char val = (*templ)[len];
> +         while (val != ',' && val != ';' && val != ']')
> +            val = (*templ)[++len];

Sorry for not noticing last time, but this loop should raise an error
if val is 0 or '\n', before the assignment of a new val.  Otherwise a
malformed string would lead to a segfault.  E.g. something like:

         while (val != ',' && val != ';' && val != ']')
           {
	     if (val == 0 || val == '\n')
               fatal_at (loc, "missing ']'");
	     val = (*templ)[++len];
	   }

> +         *templ += len;
> +         if (val == ',')
> +           (*templ)++;
> +         list.push_back (conlist (name_start, len, numeric));
> +       }
> +    }
> +}
> +
> +/* Parse a section, a section is defined as a named space separated list, e.g.
> +
> +   foo: a, b, c
> +
> +   is a section named "foo" with entries a, b and c.  */
> +
> +static void
> +parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no,
> +              vec_conlist &list, file_location loc, const char *name)
> +{
> +  unsigned int i;
> +
> +  /* Go through the list, one character at a time, adding said character
> +     to the correct string.  */
> +  for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
> +    {
> +      if (!ISSPACE (**templ))

ISBLANK here too.  No need for the "for" loop to have braces.

> +       {

Here too we should check **templ for 0 or '\n', and raise an error if found.

> +         list[i].add(**templ);

Nit: should be a space after "add".

> +         if (**templ == ',')
> +           {
> +             ++i;
> +             if (i == n_elems)
> +               fatal_at (loc, "too many %ss in alternative %d: expected %d",
> +                         name, alt_no, n_elems);
> +           }
> +       }
> +    }
> +
> +  if (i + 1 < n_elems)
> +    fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
> +             name, alt_no, n_elems, i);
> +
> +  list[i].add(',');

Nit: should be a space after "add".

> +}
> +
> +/* The compact syntax has more convience syntaxes.  As such we post process
> +   the lines to get them back to something the normal syntax understands.  */
> +
> +static void
> +preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
> +                          std::string &last_line)
> +{
> +  /* Check if we're copying the last statement.  */
> +  if (line.find ("^") == 0 && line.size () == 1)
> +    {
> +      if (last_line.empty ())
> +       fatal_at (loc, "found instruction to copy previous line (^) in"
> +                      "alternative %d but no previous line to copy", alt_no);
> +      line = last_line;
> +      return;
> +    }
> +
> +  std::string result;
> +  std::string buffer;
> +  /* Check if we have << which means return c statement.  */
> +  if (line.find ("<<") == 0)
> +    {
> +      result.append ("* return ");
> +      const char *chunk = line.c_str () + 2;
> +      skip_spaces (&chunk);
> +      result.append (chunk);
> +    }
> +  else
> +    result.append (line);
> +
> +  line = result;
> +  return;
> +}
> +
> +/* Converts an rtx from compact syntax to normal syntax if possible.  */
> +
> +static void
> +convert_syntax (rtx x, file_location loc)
> +{
> +  int alt_no;
> +  unsigned int index, templ_index;
> +  const char *templ;
> +  vec_conlist tconvec, convec, attrvec;
> +
> +  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
> +
> +  templ = XTMPL (x, templ_index);
> +
> +  /* Templates with constraints start with "{@".  */
> +  if (strncmp ("*{@", templ, 3))
> +    return;
> +
> +  /* Get the layout for the template.  */
> +  templ += 3;
> +  skip_spaces (&templ);
> +
> +  if (!expect_char (&templ, '['))
> +    fatal_at (loc, "expecing `[' to begin section list");
> +
> +  parse_section_layout (&templ, "cons:", tconvec, true);
> +  convec.resize (tconvec.size ());
> +
> +  /* Check for any duplicate cons entries and sort based on i.  */
> +  for (auto e : tconvec)
> +    {
> +      unsigned idx = e.idx;
> +      if (idx >= convec.size ())
> +       convec.resize (idx + 1);
> +
> +      if (convec[idx].idx >= 0)
> +       fatal_at (loc, "duplicate cons number found: %d", idx);
> +      convec[idx] = e;
> +    }
> +  tconvec.clear ();
> +
> +  if (*templ != ']')
> +    {
> +      if (*templ == ';')
> +       skip_spaces (&(++templ));
> +      parse_section_layout (&templ, "attrs:", attrvec, false);
> +      create_missing_attributes (x, loc, attrvec);
> +    }
> +
> +  if (!expect_char (&templ, ']'))
> +    fatal_at (loc, "expecting `]` to end section list - section list must have "
> +                  "cons first, attrs second");
> +
> +  /* We will write the un-constrainified template into new_templ.  */
> +  std::string new_templ;
> +  new_templ.append ("@");
> +
> +  /* Skip to the first proper line.  */
> +  skip_spaces (&templ);
> +
> +  alt_no = 0;
> +  std::string last_line;
> +
> +  /* Process the alternatives.  */
> +  while (*(templ - 1) != '\0')
> +    {
> +      /* Skip leading whitespace.  */
> +      std::string buffer;
> +      skip_spaces (&templ);
> +
> +      /* Check if we're at the end.  */
> +      if (templ[0] == '}' && templ[1] == '\0')
> +       break;
> +
> +      new_templ += '\n';
> +      new_templ.append (buffer);

Shouldn't this be...

> +
> +      if (expect_char (&templ, '['))
> +       {

...here, to prevent an extra '\n' from being added if the template
ends with a comment?

> +         /* Parse the constraint list, then the attribute list.  */
> +         if (convec.size () > 0)
> +           parse_section (&templ, convec.size (), alt_no, convec, loc,
> +                          "constraint");
> +
> +         if (attrvec.size () > 0)
> +           {
> +             if (convec.size () > 0 && !expect_char (&templ, ';'))
> +               fatal_at (loc, "expected `;' to separate constraints "
> +                              "and attributes in alternative %d", alt_no);
> +
> +             parse_section (&templ, attrvec.size (), alt_no,
> +                            attrvec, loc, "attribute");
> +           }
> +
> +         if (!expect_char (&templ, ']'))
> +           fatal_at (loc, "expected end of constraint/attribute list but "
> +                          "missing an ending `]' in alternative %d", alt_no);
> +       }
> +      else if (templ[0] == '/' && templ[1] == '/')
> +       {
> +         templ += 2;
> +         /* Glob till newline or end of string.  */
> +         while (*templ != '\n' || *templ != '\0')
> +           templ++;
> +         templ++;
> +         continue;
> +       }
> +      else if (templ[0] == '/' && templ[1] == '*')
> +       {
> +         templ += 2;
> +         /* Glob till newline or end of multiline comment.  */
> +         while (templ[0] != '*' && templ[1] != '/')
> +           templ++;

This loop should check for templ[0] == 0 too.

> +         templ += 2;
> +         continue;
> +       }
> +      else
> +       fatal_at (loc, "expected constraint/attribute list at beginning of "
> +                      "alternative %d but missing a starting `['", alt_no);
> +
> +      /* Skip whitespace between list and asm.  */
> +      ++templ;
> +      skip_spaces (&templ);
> +
> +      /* Copy asm to new template.  */
> +      std::string line;
> +      while (*templ != '\n' && *templ != '\0')
> +       line += *templ++;
> +
> +      /* Apply any pre-processing needed to the line.  */
> +      preprocess_compact_syntax (loc, alt_no, line, last_line);
> +      new_templ.append (line);
> +      last_line = line;
> +
> +      /* Normal "*..." syntax expects the closing quote to be on the final
> +        line of asm, whereas we allow the closing "}" to be on its own line.
> +        Postpone copying the '\n' until we know that there is another
> +        alternative in the list.  */
> +      while (templ[0] == '\n' || ISSPACE (*templ))
> +       templ++;

The separate check for '\n' isn't necessary.

Sorry for all the micro-comments.

Thanks,
Richard

> +      ++alt_no;
> +    }
> +
> +  /* Write the constraints and attributes into their proper places.  */
> +  if (convec.size () > 0)
> +    add_constraints (x, loc, convec);
> +
> +  if (attrvec.size () > 0)
> +    {
> +      index = add_attributes (x, loc, attrvec);
> +      if (index < attrvec.size ())
> +       fatal_at (loc, "could not find set_attr for attribute %s",
> +                 attrvec[index].name.c_str ());
> +    }
> +
> +  /* Copy over the new un-constrainified template.  */
> +  XTMPL (x, templ_index) = xstrdup (new_templ.c_str ());
> +
> +  /* Register for later checks during iterator expansions.  */
> +  compact_syntax.add (x);
> +}
> +
>  /* Process a top level rtx in some way, queuing as appropriate.  */
>
>  static void
> @@ -553,10 +1077,12 @@ process_rtx (rtx desc, file_location loc)
>    switch (GET_CODE (desc))
>      {
>      case DEFINE_INSN:
> +      convert_syntax (desc, loc);
>        queue_pattern (desc, &define_insn_tail, loc);
>        break;
>
>      case DEFINE_COND_EXEC:
> +      convert_syntax (desc, loc);
>        queue_pattern (desc, &define_cond_exec_tail, loc);
>        break;
>
> @@ -631,6 +1157,7 @@ process_rtx (rtx desc, file_location loc)
>         attr = XVEC (desc, split_code + 1);
>         PUT_CODE (desc, DEFINE_INSN);
>         XVEC (desc, 4) = attr;
> +       convert_syntax (desc, loc);
>
>         /* Queue them.  */
>         insn_elem = queue_pattern (desc, &define_insn_tail, loc);
  
Tamar Christina June 13, 2023, 3:26 p.m. UTC | #14
Hi All,

Updated patch with feedback addressed.


Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Any feedback?

Thanks,
Tamar

gcc/ChangeLog:

	* gensupport.cc (class conlist, add_constraints, add_attributes,
	skip_spaces, expect_char, preprocess_compact_syntax,
	parse_section_layout, parse_section, convert_syntax): New.
	(process_rtx): Check for conversion.
	* genoutput.cc (process_template): Check for unresolved iterators.
	(class data): Add compact_syntax_p.
	(gen_insn): Use it.
	* gensupport.h (compact_syntax): New.
	(hash-set.h): Include.

Co-Authored-By: Omar Tahir <Omar.Tahir2@arm.com>

--- inline copy of patch ---

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6a435eb44610960513e9739ac9ac1e8a27182c10..3bd1bcbc8beda9bbaea71c65118ecfa2cdace335 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -27,6 +27,7 @@ See the next chapter for information on the C header file.
                         from such an insn.
 * Output Statement::    For more generality, write C code to output
                         the assembler code.
+* Compact Syntax::      Compact syntax for writing machine descriptors.
 * Predicates::          Controlling what kinds of operands can be used
                         for an insn.
 * Constraints::         Fine-tuning operand selection.
@@ -713,6 +714,167 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template:
 @end group
 @end smallexample
 
+@node Compact Syntax
+@section Compact Syntax
+@cindex compact syntax
+
+When a @code{define_insn} or @code{define_insn_and_split} has multiple
+alternatives it may be beneficial to use the compact syntax when specifying
+alternatives.
+
+This syntax puts the constraints and attributes on the same horizontal line as
+the instruction assembly template.
+
+As an example
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv"))]
+  ""
+  "@@
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %1
+   #
+   * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
+  "&& true"
+   [(const_int 0)]
+  @{
+     aarch64_expand_mov_immediate (operands[0], operands[1]);
+     DONE;
+  @}
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm")
+   (set_attr "arch"   "*,*,*,*,*,sve")
+   (set_attr "length" "4,4,4,4,*,  4")
+]
+)
+@end group
+@end smallexample
+
+can be better expressed as:
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  @{@@ [cons: =0, 1; attrs: type, arch, length]
+     [r , r  ; mov_reg  , *   , 4] mov\t%w0, %w1
+     [k , r  ; mov_reg  , *   , 4] ^
+     [r , k  ; mov_reg  , *   , 4] ^
+     [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
+     [r , n  ; mov_imm  , *   , *] #
+     [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+  @}
+  "&& true"
+  [(const_int 0)]
+  @{
+    aarch64_expand_mov_immediate (operands[0], operands[1]);
+    DONE;
+  @}
+)
+@end group
+@end smallexample
+
+The syntax rules are as follows:
+@itemize @bullet
+@item
+Templates must start with @samp{@{@@} to use the new syntax.
+
+@item
+@samp{@{@@} is followed by a layout in parentheses which is @samp{cons:}
+followed by a comma-separated list of @code{match_operand}/@code{match_scratch}
+operand numbers, then a semicolon, followed by the same for attributes
+(@samp{attrs:}).  Operand modifiers can be placed in this section group as well.
+Both sections are optional (so you can use only @samp{cons}, or only
+@samp{attrs}, or both), and @samp{cons} must come before @samp{attrs} if
+present.
+
+@item
+Each alternative begins with any amount of whitespace.
+
+@item
+Following the whitespace is a comma-separated list of "constraints" and/or
+"attributes" within brackets @code{[]}, with sections separated by a semicolon.
+
+@item
+Should you want to copy the previous asm line, the symbol @code{^} can be used.
+This allows less copy pasting between alternative and reduces the number of
+lines to update on changes.
+
+@item
+When using C functions for output, the idiom @samp{* return @var{function};}
+can be replaced with the shorthand @samp{<< @var{function};}.
+
+@item
+Following the closing @samp{]} is any amount of whitespace, and then the actual
+asm output.
+
+@item
+Spaces are allowed in the list (they will simply be removed).
+
+@item
+All constraint alternatives should be specified.  For example, a list of
+of three blank alternatives should be written @samp{[,,]} rather than
+@samp{[]}.
+
+@item
+All attribute alternatives should be non-empty, with @samp{*}
+representing the default attribute value.  For example, a list of three
+default attribute values should be written @samp{[*,*,*]} rather than
+@samp{[]}.
+
+
+@item
+Within an @samp{@{@@} block both multiline and singleline C comments are
+allowed, but when used outside of a C block they must be the only non-whitespace
+blocks on the line.
+
+@item
+Within an @samp{@{@@} block, any iterators that do not get expanded will result
+in an error.  If for some reason it is required to have @code{<} or @code{>} in
+the output then these must be escaped using @backslashchar{}.
+
+@item
+It is possible to use the @samp{attrs} list to specify some attributes and to
+use the normal @code{set_attr} syntax to specify other attributes.  There must
+not be any overlap between the two lists.
+
+In other words, the following is valid:
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
+  @dots{} 
+  [(set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+but this is not valid:
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
+  @dots{} 
+  [(set_attr "arch" "bar")
+   (set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+because you can't mix and match new and old syntax.
+
 @node Predicates
 @section Predicates
 @cindex predicates
diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
index 163e8dfef4ca2c2c92ce1cf001ee6be40a54ca3e..3fbdde70df30d23a11b23d59eefe95b86f148c76 100644
--- a/gcc/genoutput.cc
+++ b/gcc/genoutput.cc
@@ -157,6 +157,7 @@ public:
   int n_alternatives;		/* Number of alternatives in each constraint */
   int operand_number;		/* Operand index in the big array.  */
   int output_format;		/* INSN_OUTPUT_FORMAT_*.  */
+  bool compact_syntax_p;
   struct operand_data operand[MAX_MAX_OPERANDS];
 };
 
@@ -700,12 +701,51 @@ process_template (class data *d, const char *template_code)
 	  if (sp != ep)
 	    message_at (d->loc, "trailing whitespace in output template");
 
-	  while (cp < sp)
+	  /* Check for any unexpanded iterators.  */
+	  if (bp[0] != '*' && d->compact_syntax_p)
 	    {
-	      putchar (*cp);
-	      cp++;
+	      const char *p = cp;
+	      const char *last_bracket = nullptr;
+	      while (p < sp)
+		{
+		  if (*p == '\\' && p + 1 < sp)
+		    {
+		      putchar (*p);
+		      putchar (*(p+1));
+		      p += 2;
+		      continue;
+		    }
+
+		  if (*p == '>' && last_bracket && *last_bracket == '<')
+		    {
+		      int len = p - last_bracket;
+		      fatal_at (d->loc, "unresolved iterator '%.*s' in '%s'",
+				len - 1, last_bracket + 1, cp);
+		    }
+		  else if (*p == '<' || *p == '>')
+		    last_bracket = p;
+
+		  putchar (*p);
+		  p += 1;
+		}
+
+	      if (last_bracket)
+		{
+		  char *nl = strchr (const_cast<char*> (cp), '\n');
+		  if (nl)
+		    *nl = '\0';
+		  fatal_at (d->loc, "unmatched angle brackets, likely an "
+			    "error in iterator syntax in %s", cp);
+		}
+	    }
+	  else
+	    {
+	      while (cp < sp)
+		putchar (*(cp++));
 	    }
 
+	  cp = sp;
+
 	  if (!found_star)
 	    puts ("\",");
 	  else if (*bp != '*')
@@ -881,6 +921,8 @@ gen_insn (md_rtx_info *info)
   else
     d->name = 0;
 
+  d->compact_syntax_p = compact_syntax.contains (insn);
+
   /* Build up the list in the same order as the insns are seen
      in the machine description.  */
   d->next = 0;
diff --git a/gcc/gensupport.h b/gcc/gensupport.h
index a1edfbd71908b6244b40f801c6c01074de56777e..7925e22ed418767576567cad583bddf83c0846b1 100644
--- a/gcc/gensupport.h
+++ b/gcc/gensupport.h
@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_GENSUPPORT_H
 #define GCC_GENSUPPORT_H
 
+#include "hash-set.h"
 #include "read-md.h"
 
 struct obstack;
@@ -218,6 +219,8 @@ struct pattern_stats
   int num_operand_vars;
 };
 
+extern hash_set<rtx> compact_syntax;
+
 extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec);
 extern void compute_test_codes (rtx, file_location, char *);
 extern file_location get_file_location (rtx);
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index f9efc6eb7572a44b8bb154b0b22be3815bd0d244..7b704284153f3702bc023194d9ef394d8722c3bd 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -18,6 +18,8 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include "bconfig.h"
+#define INCLUDE_STRING
+#define INCLUDE_VECTOR
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
@@ -33,6 +35,8 @@
 static rtx operand_data[MAX_OPERANDS];
 static rtx match_operand_entries_in_pattern[MAX_OPERANDS];
 static char used_operands_numbers[MAX_OPERANDS];
+/* List of entries which are part of the new syntax.  */
+hash_set<rtx> compact_syntax;
 
 
 /* In case some macros used by files we include need it, define this here.  */
@@ -545,6 +549,485 @@ gen_rewrite_sequence (rtvec vec)
   return new_vec;
 }
 
+/* The following is for handling the compact syntax for constraints and
+   attributes.
+
+   The normal syntax looks like this:
+
+       ...
+       (match_operand: 0 "s_register_operand" "r,I,k")
+       (match_operand: 2 "s_register_operand" "r,k,I")
+       ...
+       "@
+	<asm>
+	<asm>
+	<asm>"
+       ...
+       (set_attr "length" "4,8,8")
+
+   The compact syntax looks like this:
+
+       ...
+       (match_operand: 0 "s_register_operand")
+       (match_operand: 2 "s_register_operand")
+       ...
+       {@ [cons: 0, 2; attrs: length]
+	[r,r; 4] <asm>
+	[I,k; 8] <asm>
+	[k,I; 8] <asm>
+       }
+       ...
+       [<other attributes>]
+
+   This is the only place where this syntax needs to be handled.  Relevant
+   patterns are transformed from compact to the normal syntax before they are
+   queued, so none of the gen* programs need to know about this syntax at all.
+
+   Conversion process (convert_syntax):
+
+   0) Check that pattern actually uses new syntax (check for {@ ... }).
+
+   1) Get the "layout", i.e. the "[cons: 0 2; attrs: length]" from the above
+      example.  cons must come first; both are optional. Set up two vecs,
+      convec and attrvec, for holding the results of the transformation.
+
+   2) For each alternative: parse the list of constraints and/or attributes,
+      and enqueue them in the relevant lists in convec and attrvec.  By the end
+      of this process, convec[N].con and attrvec[N].con should contain regular
+      syntax constraint/attribute lists like "r,I,k".  Copy the asm to a string
+      as we go.
+
+   3) Search the rtx and write the constraint and attribute lists into the
+      correct places. Write the asm back into the template.  */
+
+/* Helper class for shuffling constraints/attributes in convert_syntax and
+   add_constraints/add_attributes.  This includes commas but not whitespace.  */
+
+class conlist {
+private:
+  std::string con;
+
+public:
+  std::string name;
+  int idx = -1;
+
+  conlist () = default;
+
+  /* [ns..ns + len) should be a string with the id of the rtx to match
+     i.e. if rtx is the relevant match_operand or match_scratch then
+     [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then
+     [ns..ns + len) should equal XSTR (rtx, 0).  */
+  conlist (const char *ns, unsigned int len, bool numeric)
+  {
+    /* Trim leading whitespaces.  */
+    while (ISBLANK (*ns))
+      {
+	ns++;
+	len--;
+      }
+
+    /* Trim trailing whitespace.  */
+    for (int i = len - 1; i >= 0; i--, len--)
+      if (!ISBLANK (ns[i]))
+	break;
+
+    /* Parse off any modifiers.  */
+    while (!ISALNUM (*ns))
+      {
+	con += *(ns++);
+	len--;
+      }
+
+    name.assign (ns, len);
+    if (numeric)
+      idx = std::stoi (name);
+  }
+
+  /* Adds a character to the end of the string.  */
+  void add (char c)
+  {
+    con += c;
+  }
+
+  /* Output the string in the form of a brand-new char *, then effectively
+     clear the internal string by resetting len to 0.  */
+  char *out ()
+  {
+    /* Final character is always a trailing comma, so strip it out.  */
+    char *q = xstrndup (con.c_str (), con.size () - 1);
+    con.clear ();
+    return q;
+  }
+};
+
+typedef std::vector<conlist> vec_conlist;
+
+/* Add constraints to an rtx.  This function is similar to remove_constraints.
+   Errors if adding the constraints would overwrite existing constraints.  */
+
+static void
+add_constraints (rtx part, file_location loc, vec_conlist &cons)
+{
+  const char *format_ptr;
+
+  if (part == NULL_RTX)
+    return;
+
+  /* If match_op or match_scr, check if we have the right one, and if so, copy
+     over the constraint list.  */
+  if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH)
+    {
+      int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1;
+      unsigned id = XINT (part, 0);
+
+      if (id >= cons.size ())
+	fatal_at (loc, "could not find match_operand/scratch with id %d", id);
+
+      if (cons[id].idx == -1)
+	return;
+
+      if (XSTR (part, field)[0] != '\0')
+	{
+	  error_at (loc, "can't mix normal and compact constraint syntax");
+	  return;
+	}
+      XSTR (part, field) = cons[id].out ();
+      cons[id].idx = -1;
+    }
+
+  format_ptr = GET_RTX_FORMAT (GET_CODE (part));
+
+  /* Recursively search the rtx.  */
+  for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++)
+    switch (*format_ptr++)
+      {
+      case 'e':
+      case 'u':
+	add_constraints (XEXP (part, i), loc, cons);
+	break;
+      case 'E':
+	if (XVEC (part, i) != NULL)
+	  for (int j = 0; j < XVECLEN (part, i); j++)
+	    add_constraints (XVECEXP (part, i, j), loc, cons);
+	break;
+      default:
+	continue;
+      }
+}
+
+/* Add ATTRS to definition X's attribute list.  */
+
+static void
+add_attributes (rtx x, vec_conlist &attrs)
+{
+  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
+  rtvec orig = XVEC (x, attr_index);
+  size_t n_curr = orig ? XVECLEN (x, attr_index) : 0;
+  rtvec copy = rtvec_alloc (n_curr + attrs.size ());
+
+  /* Create a shallow copy of existing entries.  */
+  memcpy (&copy->elem[attrs.size ()], &orig->elem[0], sizeof (rtx) * n_curr);
+  XVEC (x, attr_index) = copy;
+
+  /* Create the new elements.  */
+  for (unsigned i = 0; i < attrs.size (); i++)
+    {
+      rtx attr = rtx_alloc (SET_ATTR);
+      XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
+      XSTR (attr, 1) = attrs[i].out ();
+      XVECEXP (x, attr_index, i) = attr;
+    }
+}
+
+/* Consumes spaces and tabs.  */
+
+static inline void
+skip_spaces (const char **str)
+{
+  while (ISBLANK (**str))
+    (*str)++;
+}
+
+/* Consumes the given character, if it's there.  */
+
+static inline bool
+expect_char (const char **str, char c)
+{
+  if (**str != c)
+    return false;
+  (*str)++;
+  return true;
+}
+
+/* Parses the section layout that follows a "{@" if using new syntax. Builds
+   a vector for a single section. E.g. if we have "attrs: length, arch]..."
+   then list will have two elements, the first for "length" and the second
+   for "arch".  */
+
+static void
+parse_section_layout (file_location loc, const char **templ, const char *label,
+		      vec_conlist &list, bool numeric)
+{
+  const char *name_start;
+  size_t label_len = strlen (label);
+  if (strncmp (label, *templ, label_len) == 0)
+    {
+      *templ += label_len;
+
+      /* Gather the names.  */
+      while (**templ != ';' && **templ != ']')
+	{
+	  skip_spaces (templ);
+	  name_start = *templ;
+	  int len = 0;
+	  char val = (*templ)[len];
+	  while (val != ',' && val != ';' && val != ']')
+	    {
+	      if (val == 0 || val == '\n')
+	        fatal_at (loc, "missing ']'");
+	      val = (*templ)[++len];
+	    }
+	  *templ += len;
+	  if (val == ',')
+	    (*templ)++;
+	  list.push_back (conlist (name_start, len, numeric));
+	}
+    }
+}
+
+/* Parse a section, a section is defined as a named space separated list, e.g.
+
+   foo: a, b, c
+
+   is a section named "foo" with entries a, b and c.  */
+
+static void
+parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no,
+	       vec_conlist &list, file_location loc, const char *name)
+{
+  unsigned int i;
+
+  /* Go through the list, one character at a time, adding said character
+     to the correct string.  */
+  for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
+    if (!ISBLANK (**templ))
+      {
+	list[i].add (**templ);
+	if (**templ == ',')
+	  {
+	    ++i;
+	    if (i == n_elems)
+	      fatal_at (loc, "too many %ss in alternative %d: expected %d",
+			name, alt_no, n_elems);
+	  }
+	if (**templ == 0 || **templ == '\n')
+	  fatal_at (loc, "missing ']'");
+      }
+
+  if (i + 1 < n_elems)
+    fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
+	      name, alt_no, n_elems, i);
+
+  list[i].add (',');
+}
+
+/* The compact syntax has more convience syntaxes.  As such we post process
+   the lines to get them back to something the normal syntax understands.  */
+
+static void
+preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
+			   std::string &last_line)
+{
+  /* Check if we're copying the last statement.  */
+  if (line.find ("^") == 0 && line.size () == 1)
+    {
+      if (last_line.empty ())
+	fatal_at (loc, "found instruction to copy previous line (^) in"
+		       "alternative %d but no previous line to copy", alt_no);
+      line = last_line;
+      return;
+    }
+
+  std::string result;
+  std::string buffer;
+  /* Check if we have << which means return c statement.  */
+  if (line.find ("<<") == 0)
+    {
+      result.append ("* return ");
+      const char *chunk = line.c_str () + 2;
+      skip_spaces (&chunk);
+      result.append (chunk);
+    }
+  else
+    result.append (line);
+
+  line = result;
+  return;
+}
+
+/* Converts an rtx from compact syntax to normal syntax if possible.  */
+
+static void
+convert_syntax (rtx x, file_location loc)
+{
+  int alt_no;
+  unsigned int templ_index;
+  const char *templ;
+  vec_conlist tconvec, convec, attrvec;
+
+  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
+
+  templ = XTMPL (x, templ_index);
+
+  /* Templates with constraints start with "{@".  */
+  if (strncmp ("*{@", templ, 3))
+    return;
+
+  /* Get the layout for the template.  */
+  templ += 3;
+  skip_spaces (&templ);
+
+  if (!expect_char (&templ, '['))
+    fatal_at (loc, "expecing `[' to begin section list");
+
+  parse_section_layout (loc, &templ, "cons:", tconvec, true);
+  convec.resize (tconvec.size ());
+
+  /* Check for any duplicate cons entries and sort based on i.  */
+  for (auto e : tconvec)
+    {
+      unsigned idx = e.idx;
+      if (idx >= convec.size ())
+	convec.resize (idx + 1);
+
+      if (convec[idx].idx >= 0)
+	fatal_at (loc, "duplicate cons number found: %d", idx);
+      convec[idx] = e;
+    }
+  tconvec.clear ();
+
+  if (*templ != ']')
+    {
+      if (*templ == ';')
+	skip_spaces (&(++templ));
+      parse_section_layout (loc, &templ, "attrs:", attrvec, false);
+    }
+
+  if (!expect_char (&templ, ']'))
+    fatal_at (loc, "expecting `]` to end section list - section list must have "
+		   "cons first, attrs second");
+
+  /* We will write the un-constrainified template into new_templ.  */
+  std::string new_templ;
+  new_templ.append ("@");
+
+  /* Skip to the first proper line.  */
+  while (*templ++ != '\n');
+
+  alt_no = 0;
+  std::string last_line;
+
+  /* Process the alternatives.  */
+  while (*(templ - 1) != '\0')
+    {
+      /* Skip leading whitespace.  */
+      std::string buffer;
+      skip_spaces (&templ);
+
+      /* Check if we're at the end.  */
+      if (templ[0] == '}' && templ[1] == '\0')
+	break;
+
+      if (expect_char (&templ, '['))
+	{
+	  new_templ += '\n';
+	  new_templ.append (buffer);
+	  /* Parse the constraint list, then the attribute list.  */
+	  if (convec.size () > 0)
+	    parse_section (&templ, convec.size (), alt_no, convec, loc,
+			   "constraint");
+
+	  if (attrvec.size () > 0)
+	    {
+	      if (convec.size () > 0 && !expect_char (&templ, ';'))
+		fatal_at (loc, "expected `;' to separate constraints "
+			       "and attributes in alternative %d", alt_no);
+
+	      parse_section (&templ, attrvec.size (), alt_no,
+			     attrvec, loc, "attribute");
+	    }
+
+	  if (!expect_char (&templ, ']'))
+	    fatal_at (loc, "expected end of constraint/attribute list but "
+			   "missing an ending `]' in alternative %d", alt_no);
+	}
+      else if (templ[0] == '/' && templ[1] == '/')
+	{
+	  templ += 2;
+ 	  /* Glob till newline or end of string.  */
+	  while (*templ != '\n' || *templ != '\0')
+	    templ++;
+	  templ++;
+
+	  /* Skip any newlines or whitespaces needed.  */
+	  while (ISSPACE(*templ))
+	    templ++;
+	  continue;
+	}
+      else if (templ[0] == '/' && templ[1] == '*')
+	{
+	  templ += 2;
+ 	  /* Glob till newline or end of multiline comment.  */
+	  while (templ[0] != 0 && templ[0] != '*' && templ[1] != '/')
+	    templ++;
+	  templ += 2;
+
+	  /* Skip any newlines or whitespaces needed.  */
+	  while (ISSPACE(*templ))
+	    templ++;
+	  continue;
+	}
+      else
+	fatal_at (loc, "expected constraint/attribute list at beginning of "
+		       "alternative %d but missing a starting `['", alt_no);
+
+      /* Skip whitespace between list and asm.  */
+      ++templ;
+      skip_spaces (&templ);
+
+      /* Copy asm to new template.  */
+      std::string line;
+      while (*templ != '\n' && *templ != '\0')
+	line += *templ++;
+
+      /* Apply any pre-processing needed to the line.  */
+      preprocess_compact_syntax (loc, alt_no, line, last_line);
+      new_templ.append (line);
+      last_line = line;
+
+      /* Normal "*..." syntax expects the closing quote to be on the final
+	 line of asm, whereas we allow the closing "}" to be on its own line.
+	 Postpone copying the '\n' until we know that there is another
+	 alternative in the list.  */
+      while (ISSPACE (*templ))
+	templ++;
+      ++alt_no;
+    }
+
+  /* Write the constraints and attributes into their proper places.  */
+  if (convec.size () > 0)
+    add_constraints (x, loc, convec);
+
+  if (attrvec.size () > 0)
+    add_attributes (x, attrvec);
+
+  /* Copy over the new un-constrainified template.  */
+  XTMPL (x, templ_index) = xstrdup (new_templ.c_str ());
+
+  /* Register for later checks during iterator expansions.  */
+  compact_syntax.add (x);
+}
+
 /* Process a top level rtx in some way, queuing as appropriate.  */
 
 static void
@@ -553,10 +1036,12 @@ process_rtx (rtx desc, file_location loc)
   switch (GET_CODE (desc))
     {
     case DEFINE_INSN:
+      convert_syntax (desc, loc);
       queue_pattern (desc, &define_insn_tail, loc);
       break;
 
     case DEFINE_COND_EXEC:
+      convert_syntax (desc, loc);
       queue_pattern (desc, &define_cond_exec_tail, loc);
       break;
 
@@ -631,6 +1116,7 @@ process_rtx (rtx desc, file_location loc)
 	attr = XVEC (desc, split_code + 1);
 	PUT_CODE (desc, DEFINE_INSN);
 	XVEC (desc, 4) = attr;
+	convert_syntax (desc, loc);
 
 	/* Queue them.  */
 	insn_elem = queue_pattern (desc, &define_insn_tail, loc);
  
Richard Sandiford June 14, 2023, 7:41 p.m. UTC | #15
Tamar Christina <Tamar.Christina@arm.com> writes:
> +The syntax rules are as follows:
> +@itemize @bullet
> +@item
> +Templates must start with @samp{@{@@} to use the new syntax.
> +
> +@item
> +@samp{@{@@} is followed by a layout in parentheses which is @samp{cons:}

s/parentheses/square brackets/

> +followed by a comma-separated list of @code{match_operand}/@code{match_scratch}
> +operand numbers, then a semicolon, followed by the same for attributes
> +(@samp{attrs:}).  Operand modifiers can be placed in this section group as well.

How about:

  Operand modifiers like @code{=} and @code{+} can be placed before
  an operand number.

> +Both sections are optional (so you can use only @samp{cons}, or only
> +@samp{attrs}, or both), and @samp{cons} must come before @samp{attrs} if
> +present.
> +
> +@item
> +Each alternative begins with any amount of whitespace.
> +
> +@item
> +Following the whitespace is a comma-separated list of "constraints" and/or
> +"attributes" within brackets @code{[]}, with sections separated by a semicolon.
> +
> +@item
> +Should you want to copy the previous asm line, the symbol @code{^} can be used.
> +This allows less copy pasting between alternative and reduces the number of
> +lines to update on changes.
> +
> +@item
> +When using C functions for output, the idiom @samp{* return @var{function};}
> +can be replaced with the shorthand @samp{<< @var{function};}.
> +
> +@item
> +Following the closing @samp{]} is any amount of whitespace, and then the actual
> +asm output.
> +
> +@item
> +Spaces are allowed in the list (they will simply be removed).
> +
> +@item
> +All constraint alternatives should be specified.  For example, a list of
> +of three blank alternatives should be written @samp{[,,]} rather than
> +@samp{[]}.
> +
> +@item
> +All attribute alternatives should be non-empty, with @samp{*}
> +representing the default attribute value.  For example, a list of three
> +default attribute values should be written @samp{[*,*,*]} rather than
> +@samp{[]}.
> +
> +

Nit: too many blank lines.

> +@item
> +Within an @samp{@{@@} block both multiline and singleline C comments are
> +allowed, but when used outside of a C block they must be the only non-whitespace
> +blocks on the line.
> +
> +@item
> +Within an @samp{@{@@} block, any iterators that do not get expanded will result
> +in an error.  If for some reason it is required to have @code{<} or @code{>} in
> +the output then these must be escaped using @backslashchar{}.
> +
> +@item
> +It is possible to use the @samp{attrs} list to specify some attributes and to
> +use the normal @code{set_attr} syntax to specify other attributes.  There must
> +not be any overlap between the two lists.
> +
> +In other words, the following is valid:
> +@smallexample
> +@group
> +(define_insn_and_split ""
> +  [(set (match_operand:SI 0 "nonimmediate_operand")
> +       (match_operand:SI 1 "aarch64_mov_operand"))]
> +  ""
> +  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
> +  @dots{}
> +  [(set_attr "foo" "mov_imm")]
> +)
> +@end group
> +@end smallexample
> +
> +but this is not valid:
> +@smallexample
> +@group
> +(define_insn_and_split ""
> +  [(set (match_operand:SI 0 "nonimmediate_operand")
> +       (match_operand:SI 1 "aarch64_mov_operand"))]
> +  ""
> +  @{@@ [cons: 0, 1; attrs: type, arch, length]@}
> +  @dots{}
> +  [(set_attr "arch" "bar")
> +   (set_attr "foo" "mov_imm")]
> +)
> +@end group
> +@end smallexample
> +
> +because you can't mix and match new and old syntax.

Maybe “because it specifies @code{arch} twice”?  Suggesting that because
“new” and “old” tend not to age well.

> +/* Add constraints to an rtx.  This function is similar to remove_constraints.
> +   Errors if adding the constraints would overwrite existing constraints.  */
> +
> +static void
> +add_constraints (rtx part, file_location loc, vec_conlist &cons)
> +{
> +  const char *format_ptr;
> +
> +  if (part == NULL_RTX)
> +    return;
> +
> +  /* If match_op or match_scr, check if we have the right one, and if so, copy
> +     over the constraint list.  */
> +  if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH)
> +    {
> +      int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1;
> +      unsigned id = XINT (part, 0);
> +
> +      if (id >= cons.size ())
> +       fatal_at (loc, "could not find match_operand/scratch with id %d", id);

Is this an error?  I thought it should be treated like...

> +
> +      if (cons[id].idx == -1)
> +       return;

...cons[id].idx == -1 is here.  I.e. I think they could be combined to:

  if (ids >= cons.size () || cons[id].idx == -1)
    return;

> +
> +      if (XSTR (part, field)[0] != '\0')
> +       {
> +         error_at (loc, "can't mix normal and compact constraint syntax");
> +         return;
> +       }
> +      XSTR (part, field) = cons[id].out ();
> +      cons[id].idx = -1;
> +    }
> +
> +  format_ptr = GET_RTX_FORMAT (GET_CODE (part));
> +
> +  /* Recursively search the rtx.  */
> +  for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++)
> +    switch (*format_ptr++)
> +      {
> +      case 'e':
> +      case 'u':
> +       add_constraints (XEXP (part, i), loc, cons);
> +       break;
> +      case 'E':
> +       if (XVEC (part, i) != NULL)
> +         for (int j = 0; j < XVECLEN (part, i); j++)
> +           add_constraints (XVECEXP (part, i, j), loc, cons);
> +       break;
> +      default:
> +       continue;
> +      }
> +}
> +
> +/* Add ATTRS to definition X's attribute list.  */
> +
> +static void
> +add_attributes (rtx x, vec_conlist &attrs)
> +{
> +  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
> +  rtvec orig = XVEC (x, attr_index);
> +  size_t n_curr = orig ? XVECLEN (x, attr_index) : 0;
> +  rtvec copy = rtvec_alloc (n_curr + attrs.size ());
> +
> +  /* Create a shallow copy of existing entries.  */
> +  memcpy (&copy->elem[attrs.size ()], &orig->elem[0], sizeof (rtx) * n_curr);

Sorry for not noticing last time, but I think this should strictly
be guarded by:

  if (orig)

to avoid calculating &orig->elem[0] on a null pointer.

> +  XVEC (x, attr_index) = copy;
> +
> +  /* Create the new elements.  */
> +  for (unsigned i = 0; i < attrs.size (); i++)
> +    {
> +      rtx attr = rtx_alloc (SET_ATTR);
> +      XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
> +      XSTR (attr, 1) = attrs[i].out ();
> +      XVECEXP (x, attr_index, i) = attr;
> +    }
> +}
> +
> +/* Consumes spaces and tabs.  */
> +
> +static inline void
> +skip_spaces (const char **str)
> +{
> +  while (ISBLANK (**str))
> +    (*str)++;
> +}
> +
> +/* Consumes the given character, if it's there.  */
> +
> +static inline bool
> +expect_char (const char **str, char c)
> +{
> +  if (**str != c)
> +    return false;
> +  (*str)++;
> +  return true;
> +}
> +
> +/* Parses the section layout that follows a "{@" if using new syntax. Builds
> +   a vector for a single section. E.g. if we have "attrs: length, arch]..."
> +   then list will have two elements, the first for "length" and the second
> +   for "arch".  */
> +
> +static void
> +parse_section_layout (file_location loc, const char **templ, const char *label,
> +                     vec_conlist &list, bool numeric)
> +{
> +  const char *name_start;
> +  size_t label_len = strlen (label);
> +  if (strncmp (label, *templ, label_len) == 0)
> +    {
> +      *templ += label_len;
> +
> +      /* Gather the names.  */
> +      while (**templ != ';' && **templ != ']')
> +       {
> +         skip_spaces (templ);
> +         name_start = *templ;
> +         int len = 0;
> +         char val = (*templ)[len];
> +         while (val != ',' && val != ';' && val != ']')
> +           {
> +             if (val == 0 || val == '\n')
> +               fatal_at (loc, "missing ']'");
> +             val = (*templ)[++len];
> +           }
> +         *templ += len;
> +         if (val == ',')
> +           (*templ)++;
> +         list.push_back (conlist (name_start, len, numeric));
> +       }
> +    }
> +}
> +
> +/* Parse a section, a section is defined as a named space separated list, e.g.
> +
> +   foo: a, b, c
> +
> +   is a section named "foo" with entries a, b and c.  */
> +
> +static void
> +parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no,
> +              vec_conlist &list, file_location loc, const char *name)
> +{
> +  unsigned int i;
> +
> +  /* Go through the list, one character at a time, adding said character
> +     to the correct string.  */
> +  for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
> +    if (!ISBLANK (**templ))
> +      {
> +       list[i].add (**templ);
> +       if (**templ == ',')
> +         {
> +           ++i;
> +           if (i == n_elems)
> +             fatal_at (loc, "too many %ss in alternative %d: expected %d",
> +                       name, alt_no, n_elems);
> +         }
> +       if (**templ == 0 || **templ == '\n')
> +         fatal_at (loc, "missing ']'");
> +      }

I think it'd be more obvious for the NIL and EOL check to come first,
so that we don't do anything with invalid chars:

  /* Go through the list, one character at a time, adding said character
     to the correct string.  */
  for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
    if (!ISBLANK (**templ))
      {
        if (**templ == 0 || **templ == '\n')
          fatal_at (loc, "missing ']'");
        list[i].add (**templ);
        if (**templ == ',')
          {
            ++i;
            if (i == n_elems)
              fatal_at (loc, "too many %ss in alternative %d: expected %d",
                        name, alt_no, n_elems);
          }
      }

> +
> +  if (i + 1 < n_elems)
> +    fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
> +             name, alt_no, n_elems, i);
> +
> +  list[i].add (',');
> +}
> +
> +/* The compact syntax has more convience syntaxes.  As such we post process
> +   the lines to get them back to something the normal syntax understands.  */
> +
> +static void
> +preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
> +                          std::string &last_line)
> +{
> +  /* Check if we're copying the last statement.  */
> +  if (line.find ("^") == 0 && line.size () == 1)
> +    {
> +      if (last_line.empty ())
> +       fatal_at (loc, "found instruction to copy previous line (^) in"
> +                      "alternative %d but no previous line to copy", alt_no);
> +      line = last_line;
> +      return;
> +    }
> +
> +  std::string result;
> +  std::string buffer;
> +  /* Check if we have << which means return c statement.  */
> +  if (line.find ("<<") == 0)
> +    {
> +      result.append ("* return ");
> +      const char *chunk = line.c_str () + 2;
> +      skip_spaces (&chunk);
> +      result.append (chunk);
> +    }
> +  else
> +    result.append (line);
> +
> +  line = result;
> +  return;
> +}
> +
> +/* Converts an rtx from compact syntax to normal syntax if possible.  */
> +
> +static void
> +convert_syntax (rtx x, file_location loc)
> +{
> +  int alt_no;
> +  unsigned int templ_index;
> +  const char *templ;
> +  vec_conlist tconvec, convec, attrvec;
> +
> +  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
> +
> +  templ = XTMPL (x, templ_index);
> +
> +  /* Templates with constraints start with "{@".  */
> +  if (strncmp ("*{@", templ, 3))
> +    return;
> +
> +  /* Get the layout for the template.  */
> +  templ += 3;
> +  skip_spaces (&templ);
> +
> +  if (!expect_char (&templ, '['))
> +    fatal_at (loc, "expecing `[' to begin section list");
> +
> +  parse_section_layout (loc, &templ, "cons:", tconvec, true);
> +  convec.resize (tconvec.size ());

IMO it'd be better to leave the last line out, since there's nothing
particularly special about tconvec.size() for this mapping.

> +
> +  /* Check for any duplicate cons entries and sort based on i.  */
> +  for (auto e : tconvec)
> +    {
> +      unsigned idx = e.idx;
> +      if (idx >= convec.size ())
> +       convec.resize (idx + 1);
> +
> +      if (convec[idx].idx >= 0)
> +       fatal_at (loc, "duplicate cons number found: %d", idx);
> +      convec[idx] = e;
> +    }
> +  tconvec.clear ();
> +
> +  if (*templ != ']')
> +    {
> +      if (*templ == ';')
> +       skip_spaces (&(++templ));
> +      parse_section_layout (loc, &templ, "attrs:", attrvec, false);
> +    }
> +
> +  if (!expect_char (&templ, ']'))
> +    fatal_at (loc, "expecting `]` to end section list - section list must have "
> +                  "cons first, attrs second");
> +
> +  /* We will write the un-constrainified template into new_templ.  */
> +  std::string new_templ;
> +  new_templ.append ("@");
> +
> +  /* Skip to the first proper line.  */
> +  while (*templ++ != '\n');

This seems to have reverted to an earlier version, but it has the problems
discussed previously: it never terminates if the string doesn't have a '\n',
and it allows anything to come between the ']' and the '\n'.

I think it should be:

  /* Skip to the first proper line.  */
  skip_spaces (&templ);
  if (*templ == 0)
    fatal_at (loc, "'{@...}' blocks must have at least one alternative");
  if (*templ != '\n')
    fatal_at (loc, "unexpected character '%c' after ']'", *templ);
  templ++;

> +
> +  alt_no = 0;
> +  std::string last_line;
> +
> +  /* Process the alternatives.  */
> +  while (*(templ - 1) != '\0')
> +    {
> +      /* Skip leading whitespace.  */
> +      std::string buffer;
> +      skip_spaces (&templ);
> +
> +      /* Check if we're at the end.  */
> +      if (templ[0] == '}' && templ[1] == '\0')
> +       break;
> +
> +      if (expect_char (&templ, '['))
> +       {
> +         new_templ += '\n';
> +         new_templ.append (buffer);
> +         /* Parse the constraint list, then the attribute list.  */
> +         if (convec.size () > 0)
> +           parse_section (&templ, convec.size (), alt_no, convec, loc,
> +                          "constraint");
> +
> +         if (attrvec.size () > 0)
> +           {
> +             if (convec.size () > 0 && !expect_char (&templ, ';'))
> +               fatal_at (loc, "expected `;' to separate constraints "
> +                              "and attributes in alternative %d", alt_no);
> +
> +             parse_section (&templ, attrvec.size (), alt_no,
> +                            attrvec, loc, "attribute");
> +           }
> +
> +         if (!expect_char (&templ, ']'))
> +           fatal_at (loc, "expected end of constraint/attribute list but "
> +                          "missing an ending `]' in alternative %d", alt_no);
> +       }
> +      else if (templ[0] == '/' && templ[1] == '/')
> +       {
> +         templ += 2;
> +         /* Glob till newline or end of string.  */
> +         while (*templ != '\n' || *templ != '\0')
> +           templ++;
> +         templ++;

This should be deleted, since it will skip over '\0' as well as '\n'.
The loop below handles '\n' correctly.

> +
> +         /* Skip any newlines or whitespaces needed.  */
> +         while (ISSPACE(*templ))
> +           templ++;
> +         continue;
> +       }
> +      else if (templ[0] == '/' && templ[1] == '*')
> +       {
> +         templ += 2;
> +         /* Glob till newline or end of multiline comment.  */
> +         while (templ[0] != 0 && templ[0] != '*' && templ[1] != '/')
> +           templ++;
> +         templ += 2;

Same problem about moving past '\0' here.  But the break condition would
stop on things like "*]" or "//", not just "*/".  I think it should be:

   for (; templ[0] != 0; ++templ)
     if (templ[0] == '*' && templ[1] == '/')
       {
         templ += 2;
         break;
       }

> +
> +         /* Skip any newlines or whitespaces needed.  */
> +         while (ISSPACE(*templ))
> +           templ++;
> +         continue;
> +       }
> +      else
> +       fatal_at (loc, "expected constraint/attribute list at beginning of "
> +                      "alternative %d but missing a starting `['", alt_no);
> +
> +      /* Skip whitespace between list and asm.  */
> +      ++templ;

Looks like this line should be deleted.  We only get here after
expect_char (&templ, ']'), which has already skipped the ']'.

OK for trunk with those changes, thanks.

Richard

> +      skip_spaces (&templ);
> +
> +      /* Copy asm to new template.  */
> +      std::string line;
> +      while (*templ != '\n' && *templ != '\0')
> +       line += *templ++;
> +
> +      /* Apply any pre-processing needed to the line.  */
> +      preprocess_compact_syntax (loc, alt_no, line, last_line);
> +      new_templ.append (line);
> +      last_line = line;
> +
> +      /* Normal "*..." syntax expects the closing quote to be on the final
> +        line of asm, whereas we allow the closing "}" to be on its own line.
> +        Postpone copying the '\n' until we know that there is another
> +        alternative in the list.  */
> +      while (ISSPACE (*templ))
> +       templ++;
> +      ++alt_no;
> +    }
> +
> +  /* Write the constraints and attributes into their proper places.  */
> +  if (convec.size () > 0)
> +    add_constraints (x, loc, convec);
> +
> +  if (attrvec.size () > 0)
> +    add_attributes (x, attrvec);
> +
> +  /* Copy over the new un-constrainified template.  */
> +  XTMPL (x, templ_index) = xstrdup (new_templ.c_str ());
> +
> +  /* Register for later checks during iterator expansions.  */
> +  compact_syntax.add (x);
> +}
> +
>  /* Process a top level rtx in some way, queuing as appropriate.  */
>
>  static void
> @@ -553,10 +1036,12 @@ process_rtx (rtx desc, file_location loc)
>    switch (GET_CODE (desc))
>      {
>      case DEFINE_INSN:
> +      convert_syntax (desc, loc);
>        queue_pattern (desc, &define_insn_tail, loc);
>        break;
>
>      case DEFINE_COND_EXEC:
> +      convert_syntax (desc, loc);
>        queue_pattern (desc, &define_cond_exec_tail, loc);
>        break;
>
> @@ -631,6 +1116,7 @@ process_rtx (rtx desc, file_location loc)
>         attr = XVEC (desc, split_code + 1);
>         PUT_CODE (desc, DEFINE_INSN);
>         XVEC (desc, 4) = attr;
> +       convert_syntax (desc, loc);
>
>         /* Queue them.  */
>         insn_elem = queue_pattern (desc, &define_insn_tail, loc);
  
Richard Sandiford June 15, 2023, 6:24 a.m. UTC | #16
Richard Sandiford <richard.sandiford@arm.com> writes:
>> +
>> +         /* Skip any newlines or whitespaces needed.  */
>> +         while (ISSPACE(*templ))
>> +           templ++;
>> +         continue;
>> +       }
>> +      else if (templ[0] == '/' && templ[1] == '*')
>> +       {
>> +         templ += 2;
>> +         /* Glob till newline or end of multiline comment.  */
>> +         while (templ[0] != 0 && templ[0] != '*' && templ[1] != '/')
>> +           templ++;
>> +         templ += 2;
>
> Same problem about moving past '\0' here.  But the break condition would
> stop on things like "*]" or "//", not just "*/".  I think it should be:
>
>    for (; templ[0] != 0; ++templ)
>      if (templ[0] == '*' && templ[1] == '/')
>        {
>          templ += 2;
>          break;
>        }

Actually, I guess it should be:

   while (templ[0] != '*' || templ[1] != '/')
     {
       if (templ[0] == 0)
         fatal_at (loc, "unterminated '/*'");
       templ++;
     }
   templ += 2;

so that we don't accept unterminated /*.

Thanks,
Richard
  

Patch

--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -375,7 +375,7 @@  (define_constants
 ;; As a convenience, "fp_q" means "fp" + the ability to move between
 ;; Q registers and is equivalent to "simd".
 
-(define_enum "arches" [ any rcpc8_4 fp fp_q simd sve fp16])
+(define_enum "arches" [ any rcpc8_4 fp fp_q simd nosimd sve fp16])
 
 (define_enum_attr "arch" "arches" (const_string "any"))
 
@@ -406,6 +406,9 @@  (define_attr "arch_enabled" "no,yes"
 	(and (eq_attr "arch" "fp_q, simd")
 	     (match_test "TARGET_SIMD"))
 
+	(and (eq_attr "arch" "nosimd")
+	     (match_test "!TARGET_SIMD"))
+
 	(and (eq_attr "arch" "fp16")
 	     (match_test "TARGET_FP_F16INST"))
 
@@ -1215,44 +1218,26 @@  (define_expand "mov<mode>"
 )
 
 (define_insn "*mov<mode>_aarch64"
-  [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,    w,r  ,r,w, m,m,r,w,w")
-	(match_operand:SHORT 1 "aarch64_mov_operand"  " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))]
+  [(set (match_operand:SHORT 0 "nonimmediate_operand")
+	(match_operand:SHORT 1 "aarch64_mov_operand"))]
   "(register_operand (operands[0], <MODE>mode)
     || aarch64_reg_or_zero (operands[1], <MODE>mode))"
-{
-   switch (which_alternative)
-     {
-     case 0:
-       return "mov\t%w0, %w1";
-     case 1:
-       return "mov\t%w0, %1";
-     case 2:
-       return aarch64_output_scalar_simd_mov_immediate (operands[1],
-							<MODE>mode);
-     case 3:
-       return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
-     case 4:
-       return "ldr<size>\t%w0, %1";
-     case 5:
-       return "ldr\t%<size>0, %1";
-     case 6:
-       return "str<size>\t%w1, %0";
-     case 7:
-       return "str\t%<size>1, %0";
-     case 8:
-       return TARGET_SIMD ? "umov\t%w0, %1.<v>[0]" : "fmov\t%w0, %s1";
-     case 9:
-       return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1";
-     case 10:
-       return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>[0]" : "fmov\t%s0, %s1";
-     default:
-       gcc_unreachable ();
-     }
-}
+  "@@ (cons: 0 1; attrs: type arch)
+  [=r, r    ; mov_reg        , *     ] mov\t%w0, %w1
+  [r , M    ; mov_imm        , *     ] mov\t%w0, %1
+  [w , D<hq>; neon_move      , simd  ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
+  [r , Usv  ; mov_imm        , sve   ] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
+  [r , m    ; load_4         , *     ] ldr<size>\t%w0, %1
+  [w , m    ; load_4         , *     ] ldr\t%<size>0, %1
+  [m , rZ   ; store_4        , *     ] str<size>\\t%w1, %0
+  [m , w    ; store_4        , *     ] str\t%<size>1, %0
+  [r , w    ; neon_to_gp<q>  , simd  ] umov\t%w0, %1.<v>[0]
+  [r , w    ; neon_to_gp<q>  , nosimd] fmov\t%w0, %s1
+  [w , rZ   ; neon_from_gp<q>, simd  ] dup\t%0.<Vallxd>, %w1
+  [w , rZ   ; neon_from_gp<q>, nosimd] fmov\t%s0, %w1
+  [w , w    ; neon_dup       , simd  ] dup\t%<Vetype>0, %1.<v>[0]
+  [w , w    ; neon_dup       , nosimd] fmov\t%s0, %s1"
   ;; The "mov_imm" type for CNT is just a placeholder.
-  [(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
-		     store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
-   (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")]
 )
 
 (define_expand "mov<mode>"
@@ -1289,79 +1274,69 @@  (define_expand "mov<mode>"
 )
 
 (define_insn_and_split "*movsi_aarch64"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m,  r,  r,  r, w,r,w, w")
-	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
   "(register_operand (operands[0], SImode)
     || aarch64_reg_or_zero (operands[1], SImode))"
-  "@
-   mov\\t%w0, %w1
-   mov\\t%w0, %w1
-   mov\\t%w0, %w1
-   mov\\t%w0, %1
-   #
-   * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
-   ldr\\t%w0, %1
-   ldr\\t%s0, %1
-   str\\t%w1, %0
-   str\\t%s1, %0
-   adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
-   adr\\t%x0, %c1
-   adrp\\t%x0, %A1
-   fmov\\t%s0, %w1
-   fmov\\t%w0, %s1
-   fmov\\t%s0, %s1
-   * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
+  "@@ (cons: 0 1; attrs: type arch length)
+   [=r, r  ; mov_reg  , *   , 4] mov\t%w0, %w1
+   [k , r  ; mov_reg  , *   , 4] ^
+   [r , k  ; mov_reg  , *   , 4] ^
+   [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
+   [r , n  ; mov_imm  , *   ,16] #
+   [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
+   [r , m  ; load_4   , *   , 4] ldr\t%w0, %1
+   [w , m  ; load_4   , fp  , 4] ldr\t%s0, %1
+   [m , rZ ; store_4  , *   , 4] str\t%w1, %0
+   [m , w  ; store_4  , fp  , 4] str\t%s1, %0
+   [r , Usw; load_4   , *   , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
+   [r , Usa; adr      , *   , 4] adr\t%x0, %c1
+   [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
+   [w , rZ ; f_mcr    , fp  , 4] fmov\t%s0, %w1
+   [r , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
+   [w , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
+   [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
     && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
-   [(const_int 0)]
-   "{
-       aarch64_expand_mov_immediate (operands[0], operands[1]);
-       DONE;
-    }"
+  [(const_int 0)]
+  {
+    aarch64_expand_mov_immediate (operands[0], operands[1]);
+    DONE;
+  }
   ;; The "mov_imm" type for CNT is just a placeholder.
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
-		    load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
-   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
-   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")
-]
 )
 
 (define_insn_and_split "*movdi_aarch64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m,   r,  r,  r, w,r,w, w")
-	(match_operand:DI 1 "aarch64_mov_operand"  " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(match_operand:DI 1 "aarch64_mov_operand"))]
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
-  "@
-   mov\\t%x0, %x1
-   mov\\t%0, %x1
-   mov\\t%x0, %1
-   * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";
-   #
-   * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
-   ldr\\t%x0, %1
-   ldr\\t%d0, %1
-   str\\t%x1, %0
-   str\\t%d1, %0
-   * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\";
-   adr\\t%x0, %c1
-   adrp\\t%x0, %A1
-   fmov\\t%d0, %x1
-   fmov\\t%x0, %d1
-   fmov\\t%d0, %d1
-   * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
-   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
-    && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
-   [(const_int 0)]
-   "{
-       aarch64_expand_mov_immediate (operands[0], operands[1]);
-       DONE;
-    }"
+  "@@ (cons: 0 1; attrs: type arch length)
+   [=r, r  ; mov_reg  , *   , 4] mov\t%x0, %x1
+   [k , r  ; mov_reg  , *   , 4] mov\t%0, %x1
+   [r , k  ; mov_reg  , *   , 4] mov\t%x0, %1
+   [r , O  ; mov_imm  , *   , 4] << aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? 'mov\t%x0, %1' : 'mov\t%w0, %1';
+   [r , n  ; mov_imm  , *   ,16] #
+   [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);
+   [r , m  ; load_8   , *   , 4] ldr\t%x0, %1
+   [w , m  ; load_8   , fp  , 4] ldr\t%d0, %1
+   [m , rZ ; store_8  , *   , 4] str\t%x1, %0
+   [m , w  ; store_8  , fp  , 4] str\t%d1, %0
+   [r , Usw; load_8   , *   , 8] << TARGET_ILP32 ? 'adrp\t%0, %A1;ldr\t%w0, [%0, %L1]' : 'adrp\t%0, %A1;ldr\t%0, [%0, %L1]';
+   [r , Usa; adr      , *   , 4] adr\t%x0, %c1
+   [r , Ush; adr      , *   , 4] adrp\t%x0, %A1
+   [w , rZ ; f_mcr    , fp  , 4] fmov\t%d0, %x1
+   [r , w  ; f_mrc    , fp  , 4] fmov\t%x0, %d1
+   [w , w  ; fmov     , fp  , 4] fmov\t%d0, %d1
+   [w , Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
+  "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
+   && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+  [(const_int 0)]
+  {
+      aarch64_expand_mov_immediate (operands[0], operands[1]);
+      DONE;
+  }
   ;; The "mov_imm" type for CNTD is just a placeholder.
-  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
-		     load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
-		     fmov,neon_move")
-   (set_attr "arch"   "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
-   (set_attr "length" "4,4,4,4,*,  4,4, 4,4, 4,8,4,4, 4, 4, 4,   4")]
 )
 
 (define_insn "insv_imm<mode>"
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index cbfc4543531452b0708a38bdf4abf5105b54f8b7..16c50b4a7c414a72b234cef7745a37745e6a41fc 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -924,27 +924,27 @@  (define_peephole2
 ;;  (plus (reg rN) (reg sp)) into (reg rN).  In this case reload will
 ;; put the duplicated register first, and not try the commutative version.
 (define_insn_and_split "*arm_addsi3"
-  [(set (match_operand:SI          0 "s_register_operand" "=rk,l,l ,l ,r ,k ,r,k ,r ,k ,r ,k,k,r ,k ,r")
-	(plus:SI (match_operand:SI 1 "s_register_operand" "%0 ,l,0 ,l ,rk,k ,r,r ,rk,k ,rk,k,r,rk,k ,rk")
-		 (match_operand:SI 2 "reg_or_int_operand" "rk ,l,Py,Pd,rI,rI,k,rI,Pj,Pj,L ,L,L,PJ,PJ,?n")))]
-  "TARGET_32BIT"
-  "@
-   add%?\\t%0, %0, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %1, %2
-   add%?\\t%0, %2, %1
-   add%?\\t%0, %1, %2
-   addw%?\\t%0, %1, %2
-   addw%?\\t%0, %1, %2
-   sub%?\\t%0, %1, #%n2
-   sub%?\\t%0, %1, #%n2
-   sub%?\\t%0, %1, #%n2
-   subw%?\\t%0, %1, #%n2
-   subw%?\\t%0, %1, #%n2
-   #"
+  [(set (match_operand:SI 0 "s_register_operand")
+        (plus:SI (match_operand:SI 1 "s_register_operand")
+                 (match_operand:SI 2 "reg_or_int_operand")))]
+  "TARGET_32BIT"
+  "@@ (cons: 0 1 2; attrs: length predicable_short_it arch)
+   [=rk, %0, rk; 2,  yes, t2] add%?\\t%0, %0, %2
+   [l,   l,  l ; 4,  yes, t2] add%?\\t%0, %1, %2
+   [l,   0,  Py; 4,  yes, t2] add%?\\t%0, %1, %2
+   [l,   l,  Pd; 4,  yes, t2] add%?\\t%0, %1, %2
+   [r,   rk, rI; 4,  no,  * ] add%?\\t%0, %1, %2
+   [k,   k,  rI; 4,  no,  * ] add%?\\t%0, %1, %2
+   [r,   r,  k ; 4,  no,  * ] add%?\\t%0, %2, %1
+   [k,   r,  rI; 4,  no,  a ] add%?\\t%0, %1, %2
+   [r,   rk, Pj; 4,  no,  t2] addw%?\\t%0, %1, %2
+   [k,   k,  Pj; 4,  no,  t2] addw%?\\t%0, %1, %2
+   [r,   rk, L ; 4,  no,  * ] sub%?\\t%0, %1, #%n2
+   [k,   k,  L ; 4,  no,  * ] sub%?\\t%0, %1, #%n2
+   [k,   r,  L ; 4,  no,  a ] sub%?\\t%0, %1, #%n2
+   [r,   rk, PJ; 4,  no,  t2] subw%?\\t%0, %1, #%n2
+   [k,   k,  PJ; 4,  no,  t2] subw%?\\t%0, %1, #%n2
+   [r,   rk, ?n; 16, no,  * ] #"
   "TARGET_32BIT
    && CONST_INT_P (operands[2])
    && !const_ok_for_op (INTVAL (operands[2]), PLUS)
@@ -956,10 +956,10 @@  (define_insn_and_split "*arm_addsi3"
 		      operands[1], 0);
   DONE;
   "
-  [(set_attr "length" "2,4,4,4,4,4,4,4,4,4,4,4,4,4,4,16")
+  [(set_attr "length")
    (set_attr "predicable" "yes")
-   (set_attr "predicable_short_it" "yes,yes,yes,yes,no,no,no,no,no,no,no,no,no,no,no,no")
-   (set_attr "arch" "t2,t2,t2,t2,*,*,*,a,t2,t2,*,*,a,t2,t2,*")
+   (set_attr "predicable_short_it")
+   (set_attr "arch")
    (set (attr "type") (if_then_else (match_operand 2 "const_int_operand" "")
 		      (const_string "alu_imm")
 		      (const_string "alu_sreg")))
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 07bf8bdebffb2e523f25a41f2b57e43c0276b745..199f2315432dc56cadfdfc03a8ab381fe02a43b3 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -27,6 +27,7 @@  See the next chapter for information on the C header file.
                         from such an insn.
 * Output Statement::    For more generality, write C code to output
                         the assembler code.
+* Compact Syntax::      Compact syntax for writing Machine descriptors.
 * Predicates::          Controlling what kinds of operands can be used
                         for an insn.
 * Constraints::         Fine-tuning operand selection.
@@ -713,6 +714,211 @@  you can use @samp{*} inside of a @samp{@@} multi-alternative template:
 @end group
 @end smallexample
 
+@node Compact Syntax
+@section Compact Syntax
+@cindex compact syntax
+
+In cases where the number of alternatives in a @code{define_insn} or
+@code{define_insn_and_split} are large then it may be beneficial to use the
+compact syntax when specifying alternatives.
+
+This syntax puts the constraints and attributes on the same horizontal line as
+the instruction assembly template.
+
+As an example
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r")
+	(match_operand:SI 1 "aarch64_mov_operand"  " r,r,k,M,n,Usv"))]
+  ""
+  "@
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %w1
+   mov\\t%w0, %1
+   #
+   * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
+  "&& true"
+   [(const_int 0)]
+  @{
+     aarch64_expand_mov_immediate (operands[0], operands[1]);
+     DONE;
+  @}
+  [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm")
+   (set_attr "arch"   "*,*,*,*,*,sve")
+   (set_attr "length" "4,4,4,4,*,  4")
+]
+)
+@end group
+@end smallexample
+
+can be better expressed as:
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)
+   [=r, r  ; mov_reg  , *   , 4] mov\t%w0, %w1
+   [k , r  ; mov_reg  , *   , 4] ^
+   [r , k  ; mov_reg  , *   , 4] ^
+   [r , M  ; mov_imm  , *   , 4] mov\t%w0, %1
+   [r , n  ; mov_imm  , *   , *] #
+   [r , Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
+  "&& true"
+  [(const_int 0)]
+  @{
+    aarch64_expand_mov_immediate (operands[0], operands[1]);
+    DONE;
+  @}
+)
+@end group
+@end smallexample
+
+The syntax rules are as follows:
+@itemize @bullet
+@item
+Template must start with "@@" to use the new syntax.
+
+@item
+"@@" is followed by a layout in parentheses which is @samp{"cons:"} followed by
+a list of @code{match_operand}/@code{match_scratch} operand numbers, then a
+semicolon, followed by the same for attributes (@samp{"attrs:"}). Both sections
+are optional (so you can use only @samp{cons}, or only @samp{attrs}, or both),
+and @samp{cons} must come before @samp{attrs} if present.
+
+@item
+Each alternative begins with any amount of whitespace.
+
+@item
+Following the whitespace is a comma-separated list of @samp{constraints} and/or
+@samp{attributes} within brackets @code{[]}, with sections separated by a
+semicolon.
+
+@item
+Should you want to copy the previous asm line, the symbol @code{^} can be used.
+This allows less copy pasting between alternative and reduces the number of
+lines to update on changes.
+
+@item
+When using C functions for output, the idiom @code{* return <function>;} can be
+replaced with the shorthand @code{<< <function>;}.
+
+@item
+Following the closing ']' is any amount of whitespace, and then the actual asm
+output.
+
+@item
+Spaces are allowed in the list (they will simply be removed).
+
+@item
+All alternatives should be specified: a blank list should be "[,,]", "[,,;,]"
+etc., not "[]" or "".
+
+@item
+Within an @@ block, @code{''} is treated the same as @code{""} in cases where a
+single character would be invalid in C.  This means a multicharacter string can
+be created using @code{''} which allows for less escaping.
+
+@item
+Any unexpanded iterators within the block will result in a compile time error
+rather than accepting the generating the @code{<..>} in the output asm.  If the
+literal @code{<..>} is required it should be escaped as @code{\<..\>}.
+
+@item
+Within an @@ block, any iterators that do not get expanded will result in an
+error.  If for some reason it is required to have @code{<>} in the output then
+these must be escaped using @backslashchar{}.
+
+@item
+The actual constraint string in the @code{match_operand} or
+@code{match_scratch}, and the attribute string in the @code{set_attr}, must be
+blank or an empty string (you can't combine the old and new syntaxes).
+
+@item
+@code{set_attr} are optional.  If a @code{set_attr} is defined in the
+@samp{attrs} section then that declaration can be both definition and
+declaration.  If both @samp{attrs} and @code{set_attr} are defined for the same
+entry then the attribute string must be empty or blank.
+
+@item
+Additional @code{set_attr} can be specified other than the ones in the
+@samp{attrs} list.  These must use the @samp{normal} syntax and must be defined
+after all @samp{attrs} specified.
+
+In other words, the following are valid:
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)"
+  ...
+  [(set_attr "type")]
+  [(set_attr "arch")]
+  [(set_attr "length")]
+  [(set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+and
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)"
+  ...
+  [(set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+but these are not valid:
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)"
+  ...
+  [(set_attr "type")]
+  [(set_attr "arch")]
+  [(set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+and
+
+@smallexample
+@group
+(define_insn_and_split ""
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "aarch64_mov_operand"))]
+  ""
+  "@@ (cons: 0 1; attrs: type arch length)"
+  ...
+  [(set_attr "type")]
+  [(set_attr "foo" "mov_imm")]
+  [(set_attr "arch")]
+  [(set_attr "length")]
+)
+@end group
+@end smallexample
+
+because the order of the entries don't match and new entries must be last.
+@end itemize
+
 @node Predicates
 @section Predicates
 @cindex predicates
diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
index 163e8dfef4ca2c2c92ce1cf001ee6be40a54ca3e..4e67cd6ca5356c62165382de01da6bbc6f3c5fa2 100644
--- a/gcc/genoutput.cc
+++ b/gcc/genoutput.cc
@@ -91,6 +91,7 @@  along with GCC; see the file COPYING3.  If not see
 #include "errors.h"
 #include "read-md.h"
 #include "gensupport.h"
+#include <string>
 
 /* No instruction can have more operands than this.  Sorry for this
    arbitrary limit, but what machine will have an instruction with
@@ -157,6 +158,7 @@  public:
   int n_alternatives;		/* Number of alternatives in each constraint */
   int operand_number;		/* Operand index in the big array.  */
   int output_format;		/* INSN_OUTPUT_FORMAT_*.  */
+  bool compact_syntax_p;
   struct operand_data operand[MAX_MAX_OPERANDS];
 };
 
@@ -700,12 +702,37 @@  process_template (class data *d, const char *template_code)
 	  if (sp != ep)
 	    message_at (d->loc, "trailing whitespace in output template");
 
-	  while (cp < sp)
+	  /* Check for any unexpanded iterators.  */
+	  std::string buff (cp, sp - cp);
+	  if (bp[0] != '*' && d->compact_syntax_p)
 	    {
-	      putchar (*cp);
-	      cp++;
+	      size_t start = buff.find ('<');
+	      size_t end = buff.find ('>', start + 1);
+	      if (end != std::string::npos || start != std::string::npos)
+		{
+		  if (end == std::string::npos || start == std::string::npos)
+		    fatal_at (d->loc, "unmatched angle brackets, likely an "
+			      "error in iterator syntax in %s", buff.c_str ());
+
+		  if (start != 0
+		      && buff[start-1] == '\\'
+		      && buff[end-1] == '\\')
+		    {
+		      /* Found a valid escape sequence, erase the characters for
+			 output.  */
+		      buff.erase (end-1, 1);
+		      buff.erase (start-1, 1);
+		    }
+		  else
+		    fatal_at (d->loc, "unresolved iterator '%s' in '%s'",
+			      buff.substr(start+1, end - start-1).c_str (),
+			      buff.c_str ());
+		}
 	    }
 
+	  printf ("%s", buff.c_str ());
+	  cp = sp;
+
 	  if (!found_star)
 	    puts ("\",");
 	  else if (*bp != '*')
@@ -881,6 +908,8 @@  gen_insn (md_rtx_info *info)
   else
     d->name = 0;
 
+  d->compact_syntax_p = compact_syntax.contains (insn);
+
   /* Build up the list in the same order as the insns are seen
      in the machine description.  */
   d->next = 0;
diff --git a/gcc/gensupport.h b/gcc/gensupport.h
index a1edfbd71908b6244b40f801c6c01074de56777e..7925e22ed418767576567cad583bddf83c0846b1 100644
--- a/gcc/gensupport.h
+++ b/gcc/gensupport.h
@@ -20,6 +20,7 @@  along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_GENSUPPORT_H
 #define GCC_GENSUPPORT_H
 
+#include "hash-set.h"
 #include "read-md.h"
 
 struct obstack;
@@ -218,6 +219,8 @@  struct pattern_stats
   int num_operand_vars;
 };
 
+extern hash_set<rtx> compact_syntax;
+
 extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec);
 extern void compute_test_codes (rtx, file_location, char *);
 extern file_location get_file_location (rtx);
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index f9efc6eb7572a44b8bb154b0b22be3815bd0d244..c6a731968d2d6c7c9b01ad00e9dabb2b6d5f173e 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -27,12 +27,16 @@ 
 #include "read-md.h"
 #include "gensupport.h"
 #include "vec.h"
+#include <string>
+#include <vector>
 
 #define MAX_OPERANDS 40
 
 static rtx operand_data[MAX_OPERANDS];
 static rtx match_operand_entries_in_pattern[MAX_OPERANDS];
 static char used_operands_numbers[MAX_OPERANDS];
+/* List of entries which are part of the new syntax.  */
+hash_set<rtx> compact_syntax;
 
 
 /* In case some macros used by files we include need it, define this here.  */
@@ -545,6 +549,532 @@  gen_rewrite_sequence (rtvec vec)
   return new_vec;
 }
 
+/* The following is for handling the compact syntax for constraints and
+   attributes.
+
+   The normal syntax looks like this:
+
+       ...
+       (match_operand: 0 "s_register_operand" "r,I,k")
+       (match_operand: 2 "s_register_operand" "r,k,I")
+       ...
+       "@
+	<asm>
+	<asm>
+	<asm>"
+       ...
+       (set_attr "length" "4,8,8")
+
+   The compact syntax looks like this:
+
+       ...
+       (match_operand: 0 "s_register_operand")
+       (match_operand: 2 "s_register_operand")
+       ...
+       "@@ (cons: 0 2; attrs: length)
+	[r,r; 4] <asm>
+	[I,k; 8] <asm>
+	[k,I; 8] <asm>"
+       ...
+       (set_attr "length")
+
+   This is the only place where this syntax needs to be handled.  Relevant
+   patterns are transformed from compact to the normal syntax before they are
+   queued, so none of the gen* programs need to know about this syntax at all.
+
+   Conversion process (convert_syntax):
+
+   0) Check that pattern actually uses new syntax (check for "@@").
+
+   1) Get the "layout", i.e. the "(cons: 0 2; attrs: length)" from the above
+      example.  cons must come first; both are optional. Set up two vecs,
+      convec and attrvec, for holding the results of the transformation.
+
+   2) For each alternative: parse the list of constraints and/or attributes,
+      and enqueue them in the relevant lists in convec and attrvec.  By the end
+      of this process, convec[N].con and attrvec[N].con should contain regular
+      syntax constraint/attribute lists like "r,I,k".  Copy the asm to a string
+      as we go.
+
+   3) Search the rtx and write the constraint and attribute lists into the
+      correct places. Write the asm back into the template.  */
+
+/* Helper class for shuffling constraints/attributes in convert_syntax and
+   add_constraints/add_attributes.  This includes commas but not whitespace.  */
+
+class conlist {
+private:
+  std::string con;
+
+public:
+  std::string name;
+
+  /* [ns..ns + len) should be a string with the id of the rtx to match
+     i.e. if rtx is the relevant match_operand or match_scratch then
+     [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then
+     [ns..ns + len) should equal XSTR (rtx, 0).  */
+  conlist (const char *ns, unsigned int len)
+  {
+    name.assign (ns, len);
+  }
+
+  /* Adds a character to the end of the string.  */
+  void add (char c)
+  {
+    con += c;
+  }
+
+  /* Output the string in the form of a brand-new char *, then effectively
+     clear the internal string by resetting len to 0.  */
+  char * out ()
+  {
+    /* Final character is always a trailing comma, so strip it out.  */
+    char * q = xstrndup (con.c_str (), con.size () - 1);
+    con.clear ();
+    return q;
+  }
+};
+
+typedef std::vector<conlist> vec_conlist;
+
+/* Add constraints to an rtx. The match_operand/match_scratch that are matched
+   must be in depth-first order i.e. read from top to bottom in the pattern.
+   index is the index of the conlist we are up to so far.
+   This function is similar to remove_constraints.
+   Errors if adding the constraints would overwrite existing constraints.
+   Returns 1 + index of last conlist to be matched.  */
+
+static unsigned int
+add_constraints (rtx part, file_location loc, unsigned int index,
+		 vec_conlist &cons)
+{
+  const char *format_ptr;
+  char id[3];
+
+  if (part == NULL_RTX || index == cons.size ())
+    return index;
+
+  /* If match_op or match_scr, check if we have the right one, and if so, copy
+     over the constraint list.  */
+  if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH)
+    {
+      int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1;
+
+      snprintf (id, 3, "%d", XINT (part, 0));
+      if (cons[index].name.compare (id) == 0)
+	{
+	  if (XSTR (part, field)[0] != '\0')
+	    {
+	      error_at (loc, "can't mix normal and compact constraint syntax");
+	      return cons.size ();
+	    }
+	  XSTR (part, field) = cons[index].out ();
+
+	  ++index;
+	}
+    }
+
+  format_ptr = GET_RTX_FORMAT (GET_CODE (part));
+
+  /* Recursively search the rtx.  */
+  for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++)
+    switch (*format_ptr++)
+      {
+      case 'e':
+      case 'u':
+	index = add_constraints (XEXP (part, i), loc, index, cons);
+	break;
+      case 'E':
+	if (XVEC (part, i) != NULL)
+	  for (int j = 0; j < XVECLEN (part, i); j++)
+	    index = add_constraints (XVECEXP (part, i, j), loc, index, cons);
+	break;
+      default:
+	continue;
+      }
+
+  return index;
+}
+
+/* Add attributes to an rtx. The attributes that are matched must be in order
+   i.e. read from top to bottom in the pattern.
+   Errors if adding the attributes would overwrite existing attributes.
+   Returns 1 + index of last conlist to be matched.  */
+
+static unsigned int
+add_attributes (rtx x, file_location loc, vec_conlist &attrs)
+{
+  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
+  unsigned int index = 0;
+
+  if (XVEC (x, attr_index) == NULL)
+    return index;
+
+  for (int i = 0; i < XVECLEN (x, attr_index); ++i)
+    {
+      rtx part = XVECEXP (x, attr_index, i);
+
+      if (GET_CODE (part) != SET_ATTR)
+	continue;
+
+      if (attrs[index].name.compare (XSTR (part, 0)) == 0)
+	{
+	  if (XSTR (part, 1) && XSTR (part, 1)[0] != '\0')
+	    {
+	      error_at (loc, "can't mix normal and compact attribute syntax");
+	      break;
+	    }
+	  XSTR (part, 1) = attrs[index].out ();
+
+	  ++index;
+	  if (index == attrs.size ())
+	    break;
+	}
+    }
+
+  return index;
+}
+
+/* Modify the attributes list to make space for the implicitly declared
+   attributes in the attrs: list.  */
+
+static void
+create_missing_attributes (rtx x, file_location /* loc */, vec_conlist &attrs)
+{
+  if (attrs.empty ())
+    return;
+
+  unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
+  vec_conlist missing;
+
+  /* This is an O(n*m) loop but it's fine, both n and m will always be very
+     small.  */
+  for (conlist cl : attrs)
+    {
+      bool found = false;
+      for (int i = 0; XVEC (x, attr_index) && i < XVECLEN (x, attr_index); ++i)
+	{
+	  rtx part = XVECEXP (x, attr_index, i);
+
+	  if (GET_CODE (part) != SET_ATTR
+	      || cl.name.compare (XSTR (part, 0)) == 0)
+	    {
+	      found = true;
+	      break;
+	    }
+	}
+
+      if (!found)
+	missing.push_back (cl);
+    }
+
+  rtvec orig = XVEC (x, attr_index);
+  size_t n_curr = orig ? XVECLEN (x, attr_index) : 0;
+  rtvec copy = rtvec_alloc (n_curr + missing.size ());
+
+  /* Create a shallow copy of existing entries.  */
+  memcpy (&copy->elem[missing.size ()], &orig->elem[0], sizeof (rtx) * n_curr);
+  XVEC (x, attr_index) = copy;
+
+  /* Create the new elements.  */
+  for (unsigned i = 0; i < missing.size (); i++)
+    {
+      rtx attr = rtx_alloc (SET_ATTR);
+      XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
+      XSTR (attr, 1) = NULL;
+      XVECEXP (x, attr_index, i) = attr;
+    }
+
+  return;
+}
+
+/* Consumes spaces and tabs.  */
+
+static inline void
+skip_spaces (const char **str)
+{
+  while (**str == ' ' || **str == '\t')
+    (*str)++;
+}
+
+/* Consumes the given character, if it's there.  */
+
+static inline bool
+expect_char (const char **str, char c)
+{
+  if (**str != c)
+    return false;
+  (*str)++;
+  return true;
+}
+
+/* Parses the section layout that follows a "@@" if using new syntax. Builds
+   a vector for a single section. E.g. if we have "attrs: length arch)..."
+   then list will have two elements, the first for "length" and the second
+   for "arch".  */
+
+static void
+parse_section_layout (const char **templ, const char *label,
+		      vec_conlist &list)
+{
+  const char *name_start;
+  size_t label_len = strlen (label);
+  if (strncmp (label, *templ, label_len) == 0)
+    {
+      *templ += label_len;
+
+      /* Gather the names.  */
+      while (**templ != ';' && **templ != ')')
+	{
+	  skip_spaces (templ);
+	  name_start = *templ;
+	  int len = 0;
+	  while ((*templ)[len] != ' ' && (*templ)[len] != '\t'
+		 && (*templ)[len] != ';' && (*templ)[len] != ')')
+	    len++;
+	  *templ += len;
+	  list.push_back (conlist (name_start, len));
+	}
+    }
+}
+
+/* Parse a section, a section is defined as a named space separated list, e.g.
+
+   foo: a b c
+
+   is a section named "foo" with entries a,b and c.  */
+
+static void
+parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no,
+	       vec_conlist &list, file_location loc, const char *name)
+{
+  unsigned int i;
+
+  /* Go through the list, one character at a time, adding said character
+     to the correct string.  */
+  for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
+    {
+      if (**templ != ' ' && **templ != '\t')
+	{
+	  list[i].add(**templ);
+	  if (**templ == ',')
+	    {
+	      ++i;
+	      if (i == n_elems)
+		fatal_at (loc, "too many %ss in alternative %d: expected %d",
+			  name, alt_no, n_elems);
+	    }
+	}
+    }
+
+  if (i + 1 < n_elems)
+    fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
+	      name, alt_no, n_elems, i);
+
+  list[i].add(',');
+}
+
+/* The compact syntax has more convience syntaxes.  As such we post process
+   the lines to get them back to something the normal syntax understands.  */
+
+static void
+preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
+			   std::string &last_line)
+{
+  /* Check if we're copying the last statement.  */
+  if (line.find ("^") == 0 && line.size () == 1)
+    {
+      if (last_line.empty ())
+	fatal_at (loc, "found instruction to copy previous line (^) in"
+		       "alternative %d but no previous line to copy", alt_no);
+      line = last_line;
+      return;
+    }
+
+  std::string result;
+  std::string buffer;
+  /* Check if we have << which means return c statement.  */
+  if (line.find ("<<") == 0)
+    {
+      result.append ("* return ");
+      buffer.append (line.substr (3));
+    }
+  else
+    buffer.append (line);
+
+  /* Now perform string expansion.  Replace ' with " if more than one character
+     in the string.  "*/
+  bool double_quoted = false;
+  bool quote_open = false;
+  for (unsigned i = 0; i < buffer.length (); i++)
+    {
+      char chr = buffer[i];
+      if (chr == '\'')
+	{
+	  if (quote_open)
+	    {
+	      if (double_quoted)
+		result += '"';
+	      else
+		result += chr;
+	      quote_open = false;
+	    }
+	  else
+	    {
+	      if (i + 2 < buffer.length ()
+		  && buffer[i+1] != '\''
+		  && buffer[i+2] != '\'')
+		{
+		  double_quoted = true;
+		  result += '"';
+		}
+	      else
+		result += chr;
+	      quote_open = true;
+	    }
+	}
+      else
+	result += chr;
+    }
+
+  /* Braces were mismatched.  Abort.  */
+  if (quote_open)
+    fatal_at (loc, "brace mismatch in instruction template '%s'",
+	      line.c_str ());
+
+  line = result;
+  return;
+}
+
+/* Converts an rtx from compact syntax to normal syntax if possible.  */
+
+static void
+convert_syntax (rtx x, file_location loc)
+{
+  int alt_no;
+  unsigned int index, templ_index;
+  const char *templ;
+  vec_conlist convec, attrvec;
+
+  templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
+
+  templ = XTMPL (x, templ_index);
+
+  /* Templates with constraints start with "@@".  */
+  if (strncmp ("@@", templ, 2))
+    return;
+
+  /* Get the layout for the template.  */
+  templ += 2;
+  skip_spaces (&templ);
+
+  if (!expect_char (&templ, '('))
+    fatal_at (loc, "expecing `(' to begin section list");
+
+  parse_section_layout (&templ, "cons:", convec);
+
+  if (*templ != ')')
+    {
+      if (*templ == ';')
+	skip_spaces (&(++templ));
+      parse_section_layout (&templ, "attrs:", attrvec);
+      create_missing_attributes (x, loc, attrvec);
+    }
+
+  if (!expect_char (&templ, ')'))
+    {
+      fatal_at (loc, "expecting `)` to end section list - section list "
+		"must have cons first, attrs second");
+    }
+
+  /* We will write the un-constrainified template into new_templ.  */
+  std::string new_templ;
+  new_templ.append ("@\n");
+
+  /* Skip to the first proper line.  */
+  while (*templ++ != '\n');
+  alt_no = 0;
+
+  std::string last_line;
+
+  /* Process the alternatives.  */
+  while (*(templ - 1) != '\0')
+    {
+      /* Copy leading whitespace.  */
+      while (*templ == ' ' || *templ == '\t')
+	new_templ += *templ++;
+
+      if (expect_char (&templ, '['))
+	{
+	  /* Parse the constraint list, then the attribute list.  */
+	  if (convec.size () > 0)
+	    parse_section (&templ, convec.size (), alt_no, convec, loc,
+			   "constraint");
+
+	  if (attrvec.size () > 0)
+	    {
+	      if (convec.size () > 0 && !expect_char (&templ, ';'))
+		fatal_at (loc, "expected `;' to separate constraints "
+			       "and attributes in alternative %d", alt_no);
+
+	      parse_section (&templ, attrvec.size (), alt_no,
+			     attrvec, loc, "attribute");
+	    }
+
+	  if (!expect_char (&templ, ']'))
+	    fatal_at (loc, "expected end of constraint/attribute list but "
+			   "missing an ending `]' in alternative %d", alt_no);
+	}
+      else
+	fatal_at (loc, "expected constraint/attribute list at beginning of "
+		       "alternative %d but missing a starting `['", alt_no);
+
+      /* Skip whitespace between list and asm.  */
+      ++templ;
+      skip_spaces (&templ);
+
+      /* Copy asm to new template.  */
+      std::string line;
+      while (*templ != '\n' && *templ != '\0')
+	line += *templ++;
+
+      /* Apply any pre-processing needed to the line.  */
+      preprocess_compact_syntax (loc, alt_no, line, last_line);
+      new_templ.append (line);
+      last_line = line;
+
+      new_templ += *templ++;
+      ++alt_no;
+    }
+
+  /* Write the constraints and attributes into their proper places.  */
+  if (convec.size () > 0)
+    {
+      index = add_constraints (x, loc, 0, convec);
+      if (index < convec.size ())
+	fatal_at (loc, "could not find match_operand/scratch with id %s",
+		  convec[index].name.c_str ());
+    }
+
+  if (attrvec.size () > 0)
+    {
+      index = add_attributes (x, loc, attrvec);
+      if (index < attrvec.size ())
+	fatal_at (loc, "could not find set_attr for attribute %s",
+		  attrvec[index].name.c_str ());
+    }
+
+  /* Copy over the new un-constrainified template.  */
+  XTMPL (x, templ_index) = xstrdup (new_templ.c_str ());
+
+  /* Register for later checks during iterator expansions.  */
+  compact_syntax.add (x);
+
+#if DEBUG
+  print_rtl_single (stderr, x);
+#endif
+}
+
 /* Process a top level rtx in some way, queuing as appropriate.  */
 
 static void
@@ -553,10 +1083,12 @@  process_rtx (rtx desc, file_location loc)
   switch (GET_CODE (desc))
     {
     case DEFINE_INSN:
+      convert_syntax (desc, loc);
       queue_pattern (desc, &define_insn_tail, loc);
       break;
 
     case DEFINE_COND_EXEC:
+      convert_syntax (desc, loc);
       queue_pattern (desc, &define_cond_exec_tail, loc);
       break;
 
@@ -631,6 +1163,7 @@  process_rtx (rtx desc, file_location loc)
 	attr = XVEC (desc, split_code + 1);
 	PUT_CODE (desc, DEFINE_INSN);
 	XVEC (desc, 4) = attr;
+	convert_syntax (desc, loc);
 
 	/* Queue them.  */
 	insn_elem = queue_pattern (desc, &define_insn_tail, loc);