"Maciej W. Rozycki" <macro@orcam.me.uk> writes:
...
> There are notable regressions between a plain `-mno-bwx' configuration
> and a `-mno-bwx -msafe-partial' one:
>
> FAIL: gm2/iso/run/pass/strcons.mod execution, -g
> FAIL: gm2/iso/run/pass/strcons.mod execution, -O
> FAIL: gm2/iso/run/pass/strcons.mod execution, -O -g
> FAIL: gm2/iso/run/pass/strcons.mod execution, -Os
> FAIL: gm2/iso/run/pass/strcons.mod execution, -O3 -fomit-frame-pointer
> FAIL: gm2/iso/run/pass/strcons.mod execution, -O3 -fomit-frame-pointer -finline-functions
> FAIL: gm2/iso/run/pass/strcons4.mod execution, -g
> FAIL: gm2/iso/run/pass/strcons4.mod execution, -O
> FAIL: gm2/iso/run/pass/strcons4.mod execution, -O -g
> FAIL: gm2/iso/run/pass/strcons4.mod execution, -Os
> FAIL: gm2/iso/run/pass/strcons4.mod execution, -O3 -fomit-frame-pointer
> FAIL: gm2/iso/run/pass/strcons4.mod execution, -O3 -fomit-frame-pointer -finline-functions
>
> Just as with `-msafe-bwa' regressions they come from the fact that these
> test cases end up calling code that expects a reference to aligned data
> but is handed one to unaligned data, causing an alignment exception with
> LDL_L or LDQ_L, which will eventually be fixed up by Linux.
>
> In some cases GCC chooses to open-code block memory write operations, so
> with non-BWX targets `-msafe-partial' will in the usual case have to be
> used together with `-msafe-bwa'.
>
I've logged PR 118600
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118600
and have an experimental proposed patch and changelog below. In summary
the patch tests every assignment (of a constructor to a designator) to
ensure the types are GCC equivalent. If they are equivalent then it
uses assignment and if not then it copies a structure by field and uses
strncpy to copy a string cst into an array. I wonder if these changes
fix the regression test failures seen on Alpha above?
regards,
Gaius
--
PR modula2/118600 Assigning to a record causes alignment exception
This patch recursively tests every assignment (of a constructor
to a designator) to ensure the types are GCC equivalent. If they
are equivalent then it uses gimple assignment and if not then it
copies a structure by field and uses __builtin_strncpy to copy a
string cst into an array. Unions are copied by __builtin_memcpy.
gcc/m2/ChangeLog:
* gm2-compiler/M2GenGCC.mod (PerformCodeBecomes): New procedure.
(CodeBecomes): Refactor and call PerformCodeBecomes.
* gm2-gcc/m2builtins.cc (gm2_strncpy_node): New global variable.
(DoBuiltinStrNCopy): New function.
(m2builtins_BuiltinStrNCopy): New function.
(m2builtins_init): Initialize gm2_strncpy_node.
* gm2-gcc/m2builtins.def (BuiltinStrNCopy): New procedure
function.
* gm2-gcc/m2builtins.h (m2builtins_BuiltinStrNCopy): New
function.
* gm2-gcc/m2statement.cc (copy_record_fields): New function.
(copy_array): Ditto.
(copy_strncpy): Ditto.
(copy_memcpy): Ditto.
(CopyByField_Lower): Ditto.
(m2statement_CopyByField): Ditto.
* gm2-gcc/m2statement.def (CopyByField): New procedure function.
* gm2-gcc/m2statement.h (m2statement_CopyByField): New function.
* gm2-gcc/m2type.cc (check_record_fields): Ditto.
(check_array_types): Ditto.
(m2type_IsGccStrictTypeEquivalent): Ditto.
* gm2-gcc/m2type.def (IsGccStrictTypeEquivalent): New procedure
function.
* gm2-gcc/m2type.h (m2type_IsAddress): Replace return type int
with bool.
diff --git a/gcc/m2/gm2-compiler/M2GenGCC.mod b/gcc/m2/gm2-compiler/M2GenGCC.mod
index bba77ff12e1..912dfe7b8e8 100644
--- a/gcc/m2/gm2-compiler/M2GenGCC.mod
+++ b/gcc/m2/gm2-compiler/M2GenGCC.mod
@@ -43,7 +43,7 @@ FROM SymbolTable IMPORT PushSize, PopSize, PushValue, PopValue,
IsConst, IsConstSet, IsProcedure, IsProcType,
IsVar, IsVarParamAny, IsTemporary, IsTuple,
IsEnumeration,
- IsUnbounded, IsArray, IsSet, IsConstructor,
+ IsUnbounded, IsArray, IsSet, IsConstructor, IsConstructorConstant,
IsProcedureVariable,
IsUnboundedParamAny,
IsRecordField, IsFieldVarient, IsVarient, IsRecord,
@@ -231,7 +231,7 @@ FROM m2statement IMPORT BuildAsm, BuildProcedureCallTree, BuildParam, BuildFunct
BuildReturnValueCode, SetLastFunction,
BuildIncludeVarConst, BuildIncludeVarVar,
BuildExcludeVarConst, BuildExcludeVarVar,
- BuildBuiltinCallTree,
+ BuildBuiltinCallTree, CopyByField,
GetParamTree, BuildCleanUp,
BuildTryFinally,
GetLastFunction, SetLastFunction,
@@ -240,7 +240,7 @@ FROM m2statement IMPORT BuildAsm, BuildProcedureCallTree, BuildParam, BuildFunct
FROM m2type IMPORT ChainOnParamValue, GetPointerType, GetIntegerType, AddStatement,
GetCardinalType, GetWordType, GetM2ZType, GetM2RType, GetM2CType,
BuildCharConstant, AddStringToTreeList, BuildArrayStringConstructor,
- GetArrayNoOfElements, GetTreeType ;
+ GetArrayNoOfElements, GetTreeType, IsGccStrictTypeEquivalent ;
FROM m2block IMPORT RememberConstant, pushGlobalScope, popGlobalScope, finishFunctionDecl,
pushFunctionScope, popFunctionScope,
@@ -3492,6 +3492,29 @@ BEGIN
END checkDeclare ;
+(*
+ PerformCodeBecomes -
+*)
+
+PROCEDURE PerformCodeBecomes (location: location_t;
+ virtpos: CARDINAL; des, expr: CARDINAL) ;
+VAR
+ destree, exprtree: tree ;
+BEGIN
+ destree := Mod2Gcc (des) ;
+ exprtree := FoldConstBecomes (virtpos, des, expr) ;
+ IF IsVar (des) AND IsVariableSSA (des)
+ THEN
+ Replace (des, exprtree)
+ ELSIF IsGccStrictTypeEquivalent (destree, exprtree)
+ THEN
+ BuildAssignmentStatement (location, destree, exprtree)
+ ELSE
+ CopyByField (location, destree, exprtree)
+ END
+END PerformCodeBecomes ;
+
+
(*
------------------------------------------------------------------------------
:= Operator
@@ -3571,14 +3594,7 @@ BEGIN
ELSE
IF checkBecomes (des, expr, virtpos, despos, exprpos)
THEN
- IF IsVar (des) AND IsVariableSSA (des)
- THEN
- Replace (des, FoldConstBecomes (virtpos, des, expr))
- ELSE
- BuildAssignmentStatement (location,
- Mod2Gcc (des),
- FoldConstBecomes (virtpos, des, expr))
- END
+ PerformCodeBecomes (location, virtpos, des, expr)
ELSE
SubQuad (quad) (* We don't want multiple errors for the quad. *)
END
diff --git a/gcc/m2/gm2-gcc/m2builtins.cc b/gcc/m2/gm2-gcc/m2builtins.cc
index 2c02d9781a7..3eab269ba0f 100644
--- a/gcc/m2/gm2-gcc/m2builtins.cc
+++ b/gcc/m2/gm2-gcc/m2builtins.cc
@@ -417,6 +417,7 @@ static GTY (()) tree ldouble_ftype_ldouble;
static GTY (()) tree gm2_alloca_node;
static GTY (()) tree gm2_memcpy_node;
static GTY (()) tree gm2_memset_node;
+static GTY (()) tree gm2_strncpy_node;
static GTY (()) tree gm2_isfinite_node;
static GTY (()) tree gm2_isnan_node;
static GTY (()) tree gm2_huge_valf_node;
@@ -1035,6 +1036,18 @@ DoBuiltinMemCopy (location_t location, tree dest, tree src, tree bytes)
return call;
}
+static tree
+DoBuiltinStrNCopy (location_t location, tree dest, tree src, tree bytes)
+{
+ tree functype = TREE_TYPE (gm2_strncpy_node);
+ tree rettype = TREE_TYPE (functype);
+ tree funcptr
+ = build1 (ADDR_EXPR, build_pointer_type (functype), gm2_strncpy_node);
+ tree call
+ = m2treelib_DoCall3 (location, rettype, funcptr, dest, src, bytes);
+ return call;
+}
+
static tree
DoBuiltinAlloca (location_t location, tree bytes)
{
@@ -1101,6 +1114,14 @@ m2builtins_BuiltInHugeValLong (location_t location)
return call;
}
+/* BuiltinStrNCopy copy at most n chars from address src to dest. */
+
+tree
+m2builtins_BuiltinStrNCopy (location_t location, tree dest, tree src, tree n)
+{
+ return DoBuiltinStrNCopy (location, dest, src, n);
+}
+
static void
create_function_prototype (location_t location,
struct builtin_function_entry *fe)
@@ -1544,6 +1565,7 @@ m2builtins_init (location_t location)
gm2_alloca_node = find_builtin_tree ("__builtin_alloca");
gm2_memcpy_node = find_builtin_tree ("__builtin_memcpy");
gm2_memset_node = find_builtin_tree ("__builtin_memset");
+ gm2_strncpy_node = find_builtin_tree ("__builtin_strncpy");
gm2_huge_valf_node = find_builtin_tree ("__builtin_huge_valf");
gm2_huge_val_node = find_builtin_tree ("__builtin_huge_val");
gm2_huge_vall_node = find_builtin_tree ("__builtin_huge_vall");
diff --git a/gcc/m2/gm2-gcc/m2builtins.def b/gcc/m2/gm2-gcc/m2builtins.def
index 61f769d91b6..5ab5a6d816f 100644
--- a/gcc/m2/gm2-gcc/m2builtins.def
+++ b/gcc/m2/gm2-gcc/m2builtins.def
@@ -24,12 +24,6 @@ DEFINITION MODULE FOR "C" m2builtins ;
FROM CDataTypes IMPORT CharStar, ConstCharStar ;
FROM gcctypes IMPORT location_t, tree ;
-EXPORT QUALIFIED GetBuiltinConst, GetBuiltinConstType,
- GetBuiltinTypeInfoType, GetBuiltinTypeInfo,
- BuiltinExists, BuildBuiltinTree,
- BuiltinMemCopy, BuiltinMemSet, BuiltInAlloca,
- BuiltInIsfinite ;
-
(*
GetBuiltinConst - returns the gcc tree of a built in constant, name.
@@ -124,4 +118,11 @@ PROCEDURE BuiltInAlloca (location: location_t; n: tree) : tree ;
PROCEDURE BuiltInIsfinite (location: location_t; e: tree) : tree ;
+(*
+ BuiltinStrNCopy - copy at most n characters from src to dest.
+*)
+
+PROCEDURE BuiltinStrNCopy (location: location_t; dest, src, n: tree) : tree ;
+
+
END m2builtins.
diff --git a/gcc/m2/gm2-gcc/m2builtins.h b/gcc/m2/gm2-gcc/m2builtins.h
index 37bdbfa0695..017d2df38f6 100644
--- a/gcc/m2/gm2-gcc/m2builtins.h
+++ b/gcc/m2/gm2-gcc/m2builtins.h
@@ -54,6 +54,8 @@ EXTERN tree m2builtins_BuildBuiltinTree (location_t location, char *name);
EXTERN tree m2builtins_BuiltInHugeVal (location_t location);
EXTERN tree m2builtins_BuiltInHugeValShort (location_t location);
EXTERN tree m2builtins_BuiltInHugeValLong (location_t location);
+EXTERN tree m2builtins_BuiltinStrNCopy (location_t location, tree dest, tree src, tree n);
+
EXTERN void m2builtins_init (location_t location);
#undef EXTERN
diff --git a/gcc/m2/gm2-gcc/m2statement.cc b/gcc/m2/gm2-gcc/m2statement.cc
index d42183f349c..795298435e7 100644
--- a/gcc/m2/gm2-gcc/m2statement.cc
+++ b/gcc/m2/gm2-gcc/m2statement.cc
@@ -36,6 +36,7 @@ along with GNU Modula-2; see the file COPYING3. If not see
#include "m2treelib.h"
#include "m2type.h"
#include "m2convert.h"
+#include "m2builtins.h"
#include "m2pp.h"
static GTY (()) tree param_list = NULL_TREE; /* Ready for the next time we
@@ -154,6 +155,120 @@ m2statement_SetEndLocation (location_t location)
cfun->function_end_locus = location;
}
+/* copy_record_fields copy each record field from right to left. */
+
+static
+void
+copy_record_fields (location_t location, tree left, tree right)
+{
+ unsigned int i;
+ tree right_value;
+ tree left_type = TREE_TYPE (left);
+ vec<constructor_elt, va_gc> *values = CONSTRUCTOR_ELTS (right);
+ FOR_EACH_CONSTRUCTOR_VALUE (values, i, right_value)
+ {
+ tree left_field = m2treelib_get_field_no (left_type, NULL_TREE, false, i);
+ tree left_ref = m2expr_BuildComponentRef (location, left, left_field);
+ m2statement_CopyByField (location, left_ref, right_value);
+ }
+}
+
+/* copy_array copy each element of an array from array right to array left. */
+
+static
+void
+copy_array (location_t location, tree left, tree right)
+{
+ unsigned int i;
+ tree value;
+ vec<constructor_elt, va_gc> *values = CONSTRUCTOR_ELTS (right);
+ tree array_type = TREE_TYPE (left);
+ tree index_type = TYPE_DOMAIN (array_type);
+ tree elt_type = TREE_TYPE (array_type);
+ tree low_indice = TYPE_MIN_VALUE (index_type);
+ low_indice
+ = m2convert_BuildConvert (location, index_type, low_indice, false);
+ FOR_EACH_CONSTRUCTOR_VALUE (values, i, value)
+ {
+ tree idx = m2decl_BuildIntegerConstant (i);
+ idx = m2convert_BuildConvert (location, index_type, idx, false);
+ tree array_ref = build4_loc (location, ARRAY_REF, elt_type, left,
+ idx, low_indice, NULL_TREE);
+ m2statement_CopyByField (location, array_ref, value);
+ }
+}
+
+/* copy_array cst into left using strncpy. */
+
+static
+void
+copy_strncpy (location_t location, tree left, tree cst)
+{
+ tree result = m2builtins_BuiltinStrNCopy (location,
+ m2expr_BuildAddr (location, left, false),
+ m2expr_BuildAddr (location, cst, false),
+ m2decl_BuildIntegerConstant (m2expr_StringLength (cst)));
+ TREE_SIDE_EFFECTS (result) = true;
+ TREE_USED (left) = true;
+ TREE_USED (cst) = true;
+ add_stmt (location, result);
+}
+
+/* copy_memcpy copy right into left using builtin_memcpy. */
+
+static
+void
+copy_memcpy (location_t location, tree left, tree right)
+{
+ tree result = m2builtins_BuiltinMemCopy (location,
+ m2expr_BuildAddr (location, left, false),
+ m2expr_BuildAddr (location, right, false),
+ m2expr_GetSizeOf (location, left));
+ TREE_SIDE_EFFECTS (result) = true;
+ TREE_USED (left) = true;
+ TREE_USED (right) = true;
+ add_stmt (location, result);
+}
+
+/* CopyByField_Lower copy right to left using memcpy for unions,
+ strncpy for string cst, field assignment for records,
+ array element assignment for array constructors. For all
+ other types it uses BuildAssignmentStatement. */
+
+static
+void
+CopyByField_Lower (location_t location,
+ tree left, tree right)
+{
+ tree left_type = TREE_TYPE (left);
+ enum tree_code right_code = TREE_CODE (right);
+ enum tree_code left_code = TREE_CODE (left_type);
+
+ if (left_code == RECORD_TYPE && right_code == CONSTRUCTOR)
+ copy_record_fields (location, left, right);
+ else if (left_code == ARRAY_TYPE && right_code == CONSTRUCTOR)
+ copy_array (location, left, right);
+ else if (left_code == UNION_TYPE && right_code == CONSTRUCTOR)
+ copy_memcpy (location, left, right);
+ else if (right_code == STRING_CST)
+ copy_strncpy (location, left, right);
+ else
+ m2statement_BuildAssignmentStatement (location, left, right);
+}
+
+/* CopyByField recursively checks each field to ensure GCC
+ type equivalence and if so it uses assignment.
+ Otherwise use strncpy or memcpy depending upon type. */
+
+void
+m2statement_CopyByField (location_t location, tree des, tree expr)
+{
+ if (m2type_IsGccStrictTypeEquivalent (des, expr))
+ m2statement_BuildAssignmentStatement (location, des, expr);
+ else
+ CopyByField_Lower (location, des, expr);
+}
+
/* BuildAssignmentTree builds the assignment of, des, and, expr.
It returns, des. */
diff --git a/gcc/m2/gm2-gcc/m2statement.def b/gcc/m2/gm2-gcc/m2statement.def
index 074b76860ba..ffaf69784ee 100644
--- a/gcc/m2/gm2-gcc/m2statement.def
+++ b/gcc/m2/gm2-gcc/m2statement.def
@@ -314,4 +314,16 @@ PROCEDURE SetEndLocation (location: location_t) ;
PROCEDURE BuildBuiltinCallTree (func: tree) : tree ;
+(*
+ CopyByField - copy expr to des, if des is a record, union or an array
+ then check fields for GCC type equivalence and if necessary
+ call __builtin_strncpy and __builtin_memcpy.
+ This can occur if an expr contains a constant string
+ which is to be assigned into a field declared as
+ an ARRAY [0..n] OF CHAR.
+*)
+
+PROCEDURE CopyByField (location: location_t; des, expr: tree) ;
+
+
END m2statement.
diff --git a/gcc/m2/gm2-gcc/m2statement.h b/gcc/m2/gm2-gcc/m2statement.h
index db2daf37b6f..0076b32dc8e 100644
--- a/gcc/m2/gm2-gcc/m2statement.h
+++ b/gcc/m2/gm2-gcc/m2statement.h
@@ -108,6 +108,7 @@ EXTERN tree m2statement_BuildBuiltinCallTree (tree func);
EXTERN tree m2statement_BuildTryFinally (location_t location, tree call,
tree cleanups);
EXTERN tree m2statement_BuildCleanUp (tree param);
+EXTERN void m2statement_CopyByField (location_t location, tree des, tree expr);
#undef EXTERN
#endif /* m2statement_h. */
diff --git a/gcc/m2/gm2-gcc/m2type.cc b/gcc/m2/gm2-gcc/m2type.cc
index 9f7a433e980..348e0bfdb01 100644
--- a/gcc/m2/gm2-gcc/m2type.cc
+++ b/gcc/m2/gm2-gcc/m2type.cc
@@ -3101,10 +3101,68 @@ m2type_gm2_signed_or_unsigned_type (int unsignedp, tree type)
/* IsAddress returns true if the type is an ADDRESS. */
-int
+bool
m2type_IsAddress (tree type)
{
return type == ptr_type_node;
}
+/* check_record_fields return true if all the fields in left and right
+ are GCC equivalent. */
+
+static
+bool
+check_record_fields (tree left, tree right)
+{
+ unsigned int i;
+ tree right_value;
+ vec<constructor_elt, va_gc> *values = CONSTRUCTOR_ELTS (right);
+ FOR_EACH_CONSTRUCTOR_VALUE (values, i, right_value)
+ {
+ tree left_field = TREE_TYPE (m2treelib_get_field_no (left, NULL_TREE, false, i));
+ if (! m2type_IsGccStrictTypeEquivalent (left_field, right_value))
+ return false;
+ }
+ return true;
+}
+
+/* check_array_types return true if left and right have the same type and right
+ is not a CST_STRING. */
+
+static
+bool
+check_array_types (tree right)
+{
+ unsigned int i;
+ tree value;
+ vec<constructor_elt, va_gc> *values = CONSTRUCTOR_ELTS (right);
+ FOR_EACH_CONSTRUCTOR_VALUE (values, i, value)
+ {
+ enum tree_code right_code = TREE_CODE (value);
+ if (right_code == STRING_CST)
+ return false;
+ }
+ return true;
+}
+
+bool
+m2type_IsGccStrictTypeEquivalent (tree left, tree right)
+{
+ enum tree_code right_code = TREE_CODE (right);
+ enum tree_code left_code = TREE_CODE (left);
+ if (left_code == VAR_DECL)
+ return m2type_IsGccStrictTypeEquivalent (TREE_TYPE (left), right);
+ if (right_code == VAR_DECL)
+ return m2type_IsGccStrictTypeEquivalent (left, TREE_TYPE (right));
+ if (left_code == RECORD_TYPE && right_code == CONSTRUCTOR)
+ return check_record_fields (left, right);
+ if (left_code == UNION_TYPE && right_code == CONSTRUCTOR)
+ return false;
+ if (left_code == ARRAY_TYPE && right_code == CONSTRUCTOR)
+ return check_array_types (right);
+ if (right_code == STRING_CST)
+ return false;
+ return true;
+}
+
#include "gt-m2-m2type.h"
diff --git a/gcc/m2/gm2-gcc/m2type.def b/gcc/m2/gm2-gcc/m2type.def
index 797335e0070..f74888e315e 100644
--- a/gcc/m2/gm2-gcc/m2type.def
+++ b/gcc/m2/gm2-gcc/m2type.def
@@ -996,4 +996,12 @@ PROCEDURE IsAddress (type: tree) : BOOLEAN ;
PROCEDURE SameRealType (a, b: tree) : BOOLEAN ;
+(*
+ IsGccStrictTypeEquivalent - return true if left and right and
+ all their contents have the same type.
+*)
+
+PROCEDURE IsGccStrictTypeEquivalent (left, right: tree) : BOOLEAN ;
+
+
END m2type.
diff --git a/gcc/m2/gm2-gcc/m2type.h b/gcc/m2/gm2-gcc/m2type.h
index 04370d63e2b..663af3ce7eb 100644
--- a/gcc/m2/gm2-gcc/m2type.h
+++ b/gcc/m2/gm2-gcc/m2type.h
@@ -210,10 +210,10 @@ EXTERN tree m2type_gm2_type_for_size (unsigned int bits, int unsignedp);
EXTERN tree m2type_BuildProcTypeParameterDeclaration (location_t location,
tree type,
bool isreference);
-EXTERN int m2type_IsAddress (tree type);
+EXTERN bool m2type_IsAddress (tree type);
EXTERN tree m2type_GetCardinalAddressType (void);
EXTERN bool m2type_SameRealType (tree a, tree b);
-
+EXTERN bool m2type_IsGccStrictTypeEquivalent (tree left, tree right);
#undef EXTERN
#endif /* m2type_h */
===================================================================
@@ -54,6 +54,9 @@ extern void alpha_expand_unaligned_load
HOST_WIDE_INT, int);
extern void alpha_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT,
HOST_WIDE_INT);
+extern void alpha_expand_unaligned_store_safe_partial (rtx, rtx, HOST_WIDE_INT,
+ HOST_WIDE_INT,
+ HOST_WIDE_INT);
extern int alpha_expand_block_move (rtx []);
extern int alpha_expand_block_clear (rtx []);
extern rtx alpha_expand_zap_mask (HOST_WIDE_INT);
===================================================================
@@ -2481,7 +2481,11 @@ alpha_expand_movmisalign (machine_mode m
{
if (!reg_or_0_operand (operands[1], mode))
operands[1] = force_reg (mode, operands[1]);
- alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
+ if (TARGET_SAFE_PARTIAL)
+ alpha_expand_unaligned_store_safe_partial (operands[0], operands[1],
+ 8, 0, BITS_PER_UNIT);
+ else
+ alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
}
else
gcc_unreachable ();
@@ -3673,6 +3677,310 @@ alpha_expand_unaligned_store (rtx dst, r
emit_move_insn (meml, dstl);
}
+/* Store data SRC of size SIZE using unaligned methods to location
+ referred by base DST plus offset OFS and of alignment ALIGN. This is
+ a multi-thread and async-signal safe implementation for all sizes from
+ 8 down to 1.
+
+ For BWX targets it is straightforward, we just write data piecemeal,
+ taking any advantage of the alignment known and observing that we
+ shouldn't have been called for alignments of 32 or above in the first
+ place (though adding support for that would be easy).
+
+ For non-BWX targets we need to load data from memory, mask it such as
+ to keep any part outside the area written, insert data to be stored,
+ and write the result back atomically. For sizes that are not a power
+ of 2 there are no byte mask or insert machine instructions available
+ so the mask required has to be built by hand, however ZAP and ZAPNOT
+ instructions can then be used to apply the mask. Since LL/SC loops
+ are used, the high and low parts have to be disentangled from each
+ other and handled sequentially except for size 1 where there is only
+ the low part to be written. */
+
+void
+alpha_expand_unaligned_store_safe_partial (rtx dst, rtx src,
+ HOST_WIDE_INT size,
+ HOST_WIDE_INT ofs,
+ HOST_WIDE_INT align)
+{
+ if (TARGET_BWX)
+ {
+ machine_mode mode = align >= 2 * BITS_PER_UNIT ? HImode : QImode;
+ HOST_WIDE_INT step = mode == HImode ? 2 : 1;
+
+ while (1)
+ {
+ rtx dstl = src == const0_rtx ? const0_rtx : gen_lowpart (mode, src);
+ rtx meml = adjust_address (dst, mode, ofs);
+ emit_move_insn (meml, dstl);
+
+ ofs += step;
+ size -= step;
+ if (size == 0)
+ return;
+
+ if (size < step)
+ {
+ mode = QImode;
+ step = 1;
+ }
+
+ if (src != const0_rtx)
+ src = expand_simple_binop (DImode, LSHIFTRT, src,
+ GEN_INT (step * BITS_PER_UNIT),
+ NULL, 1, OPTAB_WIDEN);
+ }
+ }
+
+ rtx dsta = XEXP (dst, 0);
+ if (GET_CODE (dsta) == LO_SUM)
+ dsta = force_reg (Pmode, dsta);
+
+ rtx addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
+
+ rtx byte_mask = NULL_RTX;
+ switch (size)
+ {
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ /* If size is not a power of 2 we need to build the byte mask from
+ size by hand. This is SIZE consecutive bits starting from bit 0. */
+ byte_mask = force_reg (DImode, GEN_INT (~(HOST_WIDE_INT_M1U << size)));
+
+ /* Unlike with machine INSxx and MSKxx operations there is no
+ implicit mask applied to addr with corresponding operations
+ made by hand, so extract the byte index now. */
+ emit_insn (gen_rtx_SET (addr,
+ gen_rtx_AND (DImode, addr, GEN_INT (~-8))));
+ }
+
+ /* Must handle high before low for degenerate case of aligned. */
+ if (size != 1)
+ {
+ rtx addrh = gen_reg_rtx (DImode);
+ rtx aligned_addrh = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (addrh,
+ plus_constant (DImode, dsta, ofs + size - 1)));
+ emit_insn (gen_rtx_SET (aligned_addrh,
+ gen_rtx_AND (DImode, addrh, GEN_INT (-8))));
+
+ /* AND addresses cannot be in any alias set, since they may implicitly
+ alias surrounding code. Ideally we'd have some alias set that
+ covered all types except those with alignment 8 or higher. */
+ rtx memh = change_address (dst, DImode, aligned_addrh);
+ set_mem_alias_set (memh, 0);
+
+ rtx insh = gen_reg_rtx (DImode);
+ rtx maskh = NULL_RTX;
+ switch (size)
+ {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ if (src != CONST0_RTX (GET_MODE (src)))
+ emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
+ GEN_INT (size * 8), addr));
+ break;
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ {
+ /* For the high part we shift the byte mask right by 8 minus
+ the byte index in addr, so we need an extra calculation. */
+ rtx shamt = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (shamt,
+ gen_rtx_MINUS (DImode,
+ force_reg (DImode,
+ GEN_INT (8)),
+ addr)));
+
+ maskh = gen_reg_rtx (DImode);
+ rtx shift = gen_rtx_LSHIFTRT (DImode, byte_mask, shamt);
+ emit_insn (gen_rtx_SET (maskh, shift));
+
+ /* Insert any bytes required by hand, by doing a byte-wise
+ shift on SRC right by the same number and then zap the
+ bytes outside the byte mask. */
+ if (src != CONST0_RTX (GET_MODE (src)))
+ {
+ rtx byte_loc = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (byte_loc,
+ gen_rtx_ASHIFT (DImode,
+ shamt, GEN_INT (3))));
+ rtx bytes = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (bytes,
+ gen_rtx_LSHIFTRT (DImode,
+ gen_lowpart (DImode,
+ src),
+ byte_loc)));
+
+ rtx zapmask = gen_rtx_NOT (QImode,
+ gen_rtx_SUBREG (QImode, maskh, 0));
+ rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask),
+ UNSPEC_ZAP);
+ emit_insn (gen_rtx_SET (insh,
+ gen_rtx_AND (DImode, zap, bytes)));
+ }
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ rtx labelh = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+ emit_label (XEXP (labelh, 0));
+
+ rtx dsth = gen_reg_rtx (DImode);
+ emit_insn (gen_load_locked (DImode, dsth, memh));
+
+ switch (size)
+ {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size * 8), addr));
+ break;
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ {
+ rtx zapmask = gen_rtx_SUBREG (QImode, maskh, 0);
+ rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask),
+ UNSPEC_ZAP);
+ emit_insn (gen_rtx_SET (dsth, gen_rtx_AND (DImode, zap, dsth)));
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (src != CONST0_RTX (GET_MODE (src)))
+ dsth = expand_simple_binop (DImode, IOR, insh, dsth, dsth, 0,
+ OPTAB_WIDEN);
+
+ emit_insn (gen_store_conditional (DImode, dsth, memh, dsth));
+
+ alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, dsth, const0_rtx), labelh);
+ }
+
+ /* Now handle low. */
+ rtx addrl = gen_reg_rtx (DImode);
+ rtx aligned_addrl = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (addrl, plus_constant (DImode, dsta, ofs)));
+ emit_insn (gen_rtx_SET (aligned_addrl,
+ gen_rtx_AND (DImode, addrl, GEN_INT (-8))));
+
+ /* AND addresses cannot be in any alias set, since they may implicitly
+ alias surrounding code. Ideally we'd have some alias set that
+ covered all types except those with alignment 8 or higher. */
+ rtx meml = change_address (dst, DImode, aligned_addrl);
+ set_mem_alias_set (meml, 0);
+
+ rtx insl = gen_reg_rtx (DImode);
+ rtx maskl;
+ switch (size)
+ {
+ case 1:
+ if (src != CONST0_RTX (GET_MODE (src)))
+ emit_insn (gen_insbl (insl, gen_lowpart (QImode, src), addr));
+ break;
+ case 2:
+ if (src != CONST0_RTX (GET_MODE (src)))
+ emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
+ break;
+ case 4:
+ if (src != CONST0_RTX (GET_MODE (src)))
+ emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
+ break;
+ case 8:
+ if (src != CONST0_RTX (GET_MODE (src)))
+ emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
+ break;
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ /* For the low part we shift the byte mask left by the byte index,
+ which is already in ADDR. */
+ maskl = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (maskl,
+ gen_rtx_ASHIFT (DImode, byte_mask, addr)));
+
+ /* Insert any bytes required by hand, by doing a byte-wise shift
+ on SRC left by the same number and then zap the bytes outside
+ the byte mask. */
+ if (src != CONST0_RTX (GET_MODE (src)))
+ {
+ rtx byte_loc = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (byte_loc,
+ gen_rtx_ASHIFT (DImode,
+ force_reg (DImode, addr),
+ GEN_INT (3))));
+ rtx bytes = gen_reg_rtx (DImode);
+ emit_insn (gen_rtx_SET (bytes,
+ gen_rtx_ASHIFT (DImode,
+ gen_lowpart (DImode, src),
+ byte_loc)));
+
+ rtx zapmask = gen_rtx_NOT (QImode,
+ gen_rtx_SUBREG (QImode, maskl, 0));
+ rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask),
+ UNSPEC_ZAP);
+ emit_insn (gen_rtx_SET (insl, gen_rtx_AND (DImode, zap, bytes)));
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ rtx labell = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+ emit_label (XEXP (labell, 0));
+
+ rtx dstl = gen_reg_rtx (DImode);
+ emit_insn (gen_load_locked (DImode, dstl, meml));
+
+ switch (size)
+ {
+ case 1:
+ emit_insn (gen_mskbl (dstl, dstl, addr));
+ break;
+ case 2:
+ emit_insn (gen_mskwl (dstl, dstl, addr));
+ break;
+ case 4:
+ emit_insn (gen_mskll (dstl, dstl, addr));
+ break;
+ case 8:
+ emit_insn (gen_mskql (dstl, dstl, addr));
+ break;
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ {
+ rtx zapmask = gen_rtx_SUBREG (QImode, maskl, 0);
+ rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), UNSPEC_ZAP);
+ emit_insn (gen_rtx_SET (dstl, gen_rtx_AND (DImode, zap, dstl)));
+ }
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (src != CONST0_RTX (GET_MODE (src)))
+ dstl = expand_simple_binop (DImode, IOR, insl, dstl, dstl, 0, OPTAB_WIDEN);
+
+ emit_insn (gen_store_conditional (DImode, dstl, meml, dstl));
+
+ alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, dstl, const0_rtx), labell);
+}
+
/* The block move code tries to maximize speed by separating loads and
stores at the expense of register pressure: we load all of the data
before we store it back out. There are two secondary effects worth
@@ -3838,6 +4146,117 @@ alpha_expand_unaligned_store_words (rtx
emit_move_insn (st_addr_1, st_tmp_1);
}
+/* Store an integral number of consecutive unaligned quadwords. DATA_REGS
+ may be NULL to store zeros. This is a multi-thread and async-signal
+ safe implementation. */
+
+static void
+alpha_expand_unaligned_store_words_safe_partial (rtx *data_regs, rtx dmem,
+ HOST_WIDE_INT words,
+ HOST_WIDE_INT ofs,
+ HOST_WIDE_INT align)
+{
+ rtx const im8 = GEN_INT (-8);
+ rtx ins_tmps[MAX_MOVE_WORDS];
+ HOST_WIDE_INT i;
+
+ /* Generate all the tmp registers we need. */
+ for (i = 0; i < words; i++)
+ ins_tmps[i] = data_regs != NULL ? gen_reg_rtx (DImode) : const0_rtx;
+
+ if (ofs != 0)
+ dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
+
+ /* For BWX store the ends before we start fiddling with data registers
+ to fill the middle. Also if we have no more than two quadwords,
+ then obviously we're done. */
+ if (TARGET_BWX)
+ {
+ rtx datan = data_regs ? data_regs[words - 1] : const0_rtx;
+ rtx data0 = data_regs ? data_regs[0] : const0_rtx;
+ HOST_WIDE_INT e = (words - 1) * 8;
+
+ alpha_expand_unaligned_store_safe_partial (dmem, data0, 8, 0, align);
+ alpha_expand_unaligned_store_safe_partial (dmem, datan, 8, e, align);
+ if (words <= 2)
+ return;
+ }
+
+ rtx dmema = XEXP (dmem, 0);
+ if (GET_CODE (dmema) == LO_SUM)
+ dmema = force_reg (Pmode, dmema);
+
+ /* Shift the input data into place. */
+ rtx dreg = copy_addr_to_reg (dmema);
+ if (data_regs != NULL)
+ {
+ for (i = words - 1; i >= 0; i--)
+ {
+ emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
+ emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
+ }
+ for (i = words - 1; i > 0; i--)
+ ins_tmps[i - 1] = expand_simple_binop (DImode, IOR, data_regs[i],
+ ins_tmps[i - 1],
+ ins_tmps[i - 1],
+ 1, OPTAB_DIRECT);
+ }
+
+ if (!TARGET_BWX)
+ {
+ rtx temp = gen_reg_rtx (DImode);
+ rtx mem = gen_rtx_MEM (DImode,
+ expand_simple_binop (Pmode, AND, dreg, im8,
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ rtx label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+ emit_label (XEXP (label, 0));
+
+ emit_insn (gen_load_locked (DImode, temp, mem));
+ emit_insn (gen_mskql (temp, temp, dreg));
+ if (data_regs != NULL)
+ temp = expand_simple_binop (DImode, IOR, temp, data_regs[0],
+ temp, 1, OPTAB_DIRECT);
+ emit_insn (gen_store_conditional (DImode, temp, mem, temp));
+
+ alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, temp, const0_rtx), label);
+ }
+
+ for (i = words - 1; i > 0; --i)
+ {
+ rtx temp = change_address (dmem, Pmode,
+ gen_rtx_AND (Pmode,
+ plus_constant (Pmode,
+ dmema, i * 8),
+ im8));
+ set_mem_alias_set (temp, 0);
+ emit_move_insn (temp, ins_tmps[i - 1]);
+ }
+
+ if (!TARGET_BWX)
+ {
+ rtx temp = gen_reg_rtx (DImode);
+ rtx addr = expand_simple_binop (Pmode, PLUS, dreg,
+ GEN_INT (words * 8 - 1),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ rtx mem = gen_rtx_MEM (DImode,
+ expand_simple_binop (Pmode, AND, addr, im8,
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ rtx label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+ emit_label (XEXP (label, 0));
+
+ emit_insn (gen_load_locked (DImode, temp, mem));
+ emit_insn (gen_mskqh (temp, temp, dreg));
+ if (data_regs != NULL)
+ temp = expand_simple_binop (DImode, IOR, temp, ins_tmps[words - 1],
+ temp, 1, OPTAB_DIRECT);
+ emit_insn (gen_store_conditional (DImode, temp, mem, temp));
+
+ alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, temp, const0_rtx), label);
+ }
+}
+
/* Get the base alignment and offset of EXPR in A and O respectively.
Check for any pseudo register pointer alignment and for any tree
node information and return the largest alignment determined and
@@ -4147,26 +4566,74 @@ alpha_expand_block_move (rtx operands[])
if (GET_MODE (data_regs[i + words]) != DImode)
break;
- if (words == 1)
- alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
+ if (TARGET_SAFE_PARTIAL)
+ {
+ if (words == 1)
+ alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i],
+ 8, ofs, dst_align);
+ else
+ alpha_expand_unaligned_store_words_safe_partial (data_regs + i,
+ orig_dst, words,
+ ofs, dst_align);
+ }
else
- alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
- words, ofs);
-
+ {
+ if (words == 1)
+ alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
+ else
+ alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
+ words, ofs);
+ }
i += words;
ofs += words * 8;
}
- /* Due to the above, this won't be aligned. */
+ /* If we are in the partial memory access safety mode with a non-BWX
+ target, then coalesce data loaded of different widths so as to
+ minimize the number of safe partial stores as they are expensive. */
+ if (!TARGET_BWX && TARGET_SAFE_PARTIAL)
+ {
+ HOST_WIDE_INT size = 0;
+ unsigned int n;
+
+ for (n = i; i < nregs; i++)
+ {
+ if (i != n)
+ {
+ /* Don't widen SImode data where obtained by extraction. */
+ rtx data = data_regs[n];
+ if (GET_MODE (data) == SImode && src_align < 32)
+ data = gen_rtx_SUBREG (DImode, data, 0);
+ rtx field = expand_simple_binop (DImode, ASHIFT, data_regs[i],
+ GEN_INT (size * BITS_PER_UNIT),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ data_regs[n] = expand_simple_binop (DImode, IOR, data, field,
+ data, 1, OPTAB_WIDEN);
+ }
+ size += GET_MODE_SIZE (GET_MODE (data_regs[i]));
+ gcc_assert (size < 8);
+ }
+ if (size > 0)
+ alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[n],
+ size, ofs, dst_align);
+ ofs += size;
+ }
+
+ /* We've done aligned stores above, this won't be aligned. */
while (i < nregs && GET_MODE (data_regs[i]) == SImode)
{
- alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
+ gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL);
+ if (TARGET_SAFE_PARTIAL)
+ alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i],
+ 4, ofs, dst_align);
+ else
+ alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
ofs += 4;
i++;
gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode);
}
- if (dst_align >= 16)
+ if (TARGET_BWX && dst_align >= 16)
while (i < nregs && GET_MODE (data_regs[i]) == HImode)
{
emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
@@ -4176,7 +4643,12 @@ alpha_expand_block_move (rtx operands[])
else
while (i < nregs && GET_MODE (data_regs[i]) == HImode)
{
- alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
+ gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL);
+ if (TARGET_SAFE_PARTIAL)
+ alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i],
+ 2, ofs, dst_align);
+ else
+ alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
i++;
ofs += 2;
}
@@ -4185,6 +4657,7 @@ alpha_expand_block_move (rtx operands[])
while (i < nregs)
{
gcc_assert (GET_MODE (data_regs[i]) == QImode);
+ gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL);
emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
i++;
ofs += 1;
@@ -4193,6 +4666,27 @@ alpha_expand_block_move (rtx operands[])
return 1;
}
+/* Expand a multi-thread and async-signal safe partial clear of a longword
+ or a quadword quantity indicated by MODE at aligned memory location MEM
+ according to MASK. */
+
+static void
+alpha_expand_clear_safe_partial_nobwx (rtx mem, machine_mode mode,
+ HOST_WIDE_INT mask)
+{
+ rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+ emit_label (XEXP (label, 0));
+
+ rtx temp = gen_reg_rtx (mode);
+ rtx status = mode == DImode ? temp : gen_rtx_SUBREG (DImode, temp, 0);
+
+ emit_insn (gen_load_locked (mode, temp, mem));
+ emit_insn (gen_rtx_SET (temp, gen_rtx_AND (mode, temp, GEN_INT (mask))));
+ emit_insn (gen_store_conditional (mode, status, mem, temp));
+
+ alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, status, const0_rtx), label);
+}
+
int
alpha_expand_block_clear (rtx operands[])
{
@@ -4237,8 +4731,9 @@ alpha_expand_block_clear (rtx operands[]
{
/* Given that alignofs is bounded by align, the only time BWX could
generate three stores is for a 7 byte fill. Prefer two individual
- stores over a load/mask/store sequence. */
- if ((!TARGET_BWX || alignofs == 7)
+ stores over a load/mask/store sequence. In the partial safety
+ mode always do individual stores regardless of their count. */
+ if ((!TARGET_BWX || (!TARGET_SAFE_PARTIAL && alignofs == 7))
&& align >= 32
&& !(alignofs == 4 && bytes >= 4))
{
@@ -4264,10 +4759,15 @@ alpha_expand_block_clear (rtx operands[]
}
alignofs = 0;
- tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
- NULL_RTX, 1, OPTAB_WIDEN);
+ if (TARGET_SAFE_PARTIAL)
+ alpha_expand_clear_safe_partial_nobwx (mem, mode, mask);
+ else
+ {
+ tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
+ NULL_RTX, 1, OPTAB_WIDEN);
- emit_move_insn (mem, tmp);
+ emit_move_insn (mem, tmp);
+ }
}
if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
@@ -4372,7 +4872,11 @@ alpha_expand_block_clear (rtx operands[]
{
words = bytes / 8;
- alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
+ if (TARGET_SAFE_PARTIAL)
+ alpha_expand_unaligned_store_words_safe_partial (NULL, orig_dst,
+ words, ofs, align);
+ else
+ alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
bytes -= words * 8;
ofs += words * 8;
@@ -4389,7 +4893,7 @@ alpha_expand_block_clear (rtx operands[]
/* If we have appropriate alignment (and it wouldn't take too many
instructions otherwise), mask out the bytes we need. */
- if ((TARGET_BWX ? words > 2 : bytes > 0)
+ if ((TARGET_BWX ? !TARGET_SAFE_PARTIAL && words > 2 : bytes > 0)
&& (align >= 64 || (align >= 32 && bytes < 4)))
{
machine_mode mode = (align >= 64 ? DImode : SImode);
@@ -4401,18 +4905,46 @@ alpha_expand_block_clear (rtx operands[]
mask = HOST_WIDE_INT_M1U << (bytes * 8);
- tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
- NULL_RTX, 1, OPTAB_WIDEN);
+ if (TARGET_SAFE_PARTIAL)
+ alpha_expand_clear_safe_partial_nobwx (mem, mode, mask);
+ else
+ {
+ tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
+ NULL_RTX, 1, OPTAB_WIDEN);
- emit_move_insn (mem, tmp);
+ emit_move_insn (mem, tmp);
+ }
return 1;
}
- if (!TARGET_BWX && bytes >= 4)
+ if (bytes >= 4)
{
- alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
- bytes -= 4;
- ofs += 4;
+ if (align >= 32)
+ do
+ {
+ emit_move_insn (adjust_address (orig_dst, SImode, ofs),
+ const0_rtx);
+ bytes -= 4;
+ ofs += 4;
+ }
+ while (bytes >= 4);
+ else if (!TARGET_BWX)
+ {
+ gcc_assert (bytes < 8);
+ if (TARGET_SAFE_PARTIAL)
+ {
+ alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx,
+ bytes, ofs, align);
+ ofs += bytes;
+ bytes = 0;
+ }
+ else
+ {
+ alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
+ bytes -= 4;
+ ofs += 4;
+ }
+ }
}
if (bytes >= 2)
@@ -4428,18 +4960,38 @@ alpha_expand_block_clear (rtx operands[]
}
else if (! TARGET_BWX)
{
- alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
- bytes -= 2;
- ofs += 2;
+ gcc_assert (bytes < 4);
+ if (TARGET_SAFE_PARTIAL)
+ {
+ alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx,
+ bytes, ofs, align);
+ ofs += bytes;
+ bytes = 0;
+ }
+ else
+ {
+ alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
+ bytes -= 2;
+ ofs += 2;
+ }
}
}
while (bytes > 0)
- {
- emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
- bytes -= 1;
- ofs += 1;
- }
+ if (TARGET_BWX || !TARGET_SAFE_PARTIAL)
+ {
+ emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
+ bytes -= 1;
+ ofs += 1;
+ }
+ else
+ {
+ gcc_assert (bytes < 2);
+ alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx,
+ bytes, ofs, align);
+ ofs += bytes;
+ bytes = 0;
+ }
return 1;
}
===================================================================
@@ -4781,9 +4781,15 @@
&& INTVAL (operands[1]) != 64))
FAIL;
- alpha_expand_unaligned_store (operands[0], operands[3],
- INTVAL (operands[1]) / 8,
- INTVAL (operands[2]) / 8);
+ if (TARGET_SAFE_PARTIAL)
+ alpha_expand_unaligned_store_safe_partial (operands[0], operands[3],
+ INTVAL (operands[1]) / 8,
+ INTVAL (operands[2]) / 8,
+ BITS_PER_UNIT);
+ else
+ alpha_expand_unaligned_store (operands[0], operands[3],
+ INTVAL (operands[1]) / 8,
+ INTVAL (operands[2]) / 8);
DONE;
})
===================================================================
@@ -73,6 +73,10 @@ msafe-bwa
Target Mask(SAFE_BWA)
Emit multi-thread and async-signal safe code for byte and word memory accesses.
+msafe-partial
+Target Mask(SAFE_PARTIAL)
+Emit multi-thread and async-signal safe code for partial memory accesses.
+
mexplicit-relocs
Target Mask(EXPLICIT_RELOCS)
Emit code using explicit relocation directives.
===================================================================
@@ -38,6 +38,9 @@ UrlSuffix(gcc/DEC-Alpha-Options.html#ind
msafe-bwa
UrlSuffix(gcc/DEC-Alpha-Options.html#index-msafe-bwa)
+msafe-partial
+UrlSuffix(gcc/DEC-Alpha-Options.html#index-msafe-partial)
+
mexplicit-relocs
UrlSuffix(gcc/DEC-Alpha-Options.html#index-mexplicit-relocs)
===================================================================
@@ -976,7 +976,7 @@ Objective-C and Objective-C++ Dialects}.
-mtrap-precision=@var{mode} -mbuild-constants
-mcpu=@var{cpu-type} -mtune=@var{cpu-type}
-mbwx -mmax -mfix -mcix
--msafe-bwa
+-msafe-bwa -msafe-partial
-mfloat-vax -mfloat-ieee
-mexplicit-relocs -msmall-data -mlarge-data
-msmall-text -mlarge-text
@@ -25700,6 +25700,16 @@ Indicate whether in the absence of the o
GCC should generate multi-thread and async-signal safe code for byte
and aligned word memory accesses.
+@opindex msafe-partial
+@opindex mno-safe-partial
+@item -msafe-partial
+@itemx -mno-safe-partial
+Indicate whether GCC should generate multi-thread and async-signal
+safe code for partial memory accesses, including piecemeal accesses
+to unaligned data as well as block accesses to leading and trailing
+parts of aggregate types or other objects in memory that do not
+respectively start and end on an aligned 64-bit data boundary.
+
@opindex mfloat-vax
@opindex mfloat-ieee
@item -mfloat-vax
===================================================================
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-mbwx -msafe-partial" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "memclr-a2-o1-c9-ptr.c"
+
+/* Expect assembly such as:
+
+ stb $31,1($16)
+ stw $31,2($16)
+ stw $31,4($16)
+ stw $31,6($16)
+ stw $31,8($16)
+
+ that is with a byte store at offset 1, followed by word stores at
+ offsets 2, 4, 6, and 8. */
+
+/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31,1\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstw\\s\\\$31,2\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstw\\s\\\$31,4\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstw\\s\\\$31,6\\\(\\\$16\\\)\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstw\\s\\\$31,8\\\(\\\$16\\\)\\s" 1 } } */
===================================================================
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mbwx" } */
+/* { dg-options "-mbwx -mno-safe-partial" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
typedef unsigned int __attribute__ ((mode (QI))) int08_t;
===================================================================
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-msafe-partial -mbwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "memcpy-di-unaligned-dst.c"
+
+/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */
+/* { dg-final { scan-assembler-times "\\sstb\\s" 16 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 6 } } */
+/* { dg-final { scan-assembler-not "\\sldq_l\\s" } } */
+/* { dg-final { scan-assembler-not "\\sldq_u\\s" } } */
+/* { dg-final { scan-assembler-not "\\sstq\\s" } } */
+/* { dg-final { scan-assembler-not "\\sstq_c\\s" } } */
===================================================================
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-msafe-partial -mno-bwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "memcpy-di-unaligned-dst.c"
+
+/* { dg-final { scan-assembler-times "\\sldq\\s" 7 } } */
+/* { dg-final { scan-assembler-times "\\sldq_l\\s" 2 } } */
+/* { dg-final { scan-assembler-times "\\sstq_c\\s" 2 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 6 } } */
+/* { dg-final { scan-assembler-not "\\sldq_u\\s" } } */
+/* { dg-final { scan-assembler-not "\\sstq\\s" } } */
===================================================================
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "" } */
+/* { dg-options "-mno-safe-partial" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
unsigned long unaligned_src_di[9] = { [0 ... 8] = 0xfefdfcfbfaf9f8f7 };
===================================================================
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-msafe-partial -mbwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "memcpy-si-unaligned-dst.c"
+
+/* { dg-final { scan-assembler-times "\\sldl\\s" 15 } } */
+/* { dg-final { scan-assembler-times "\\sstb\\s" 20 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 6 } } */
+/* { dg-final { scan-assembler-not "\\sldq_l\\s" } } */
+/* { dg-final { scan-assembler-not "\\sldq_u\\s" } } */
+/* { dg-final { scan-assembler-not "\\sstl\\s" } } */
+/* { dg-final { scan-assembler-not "\\sstq_c\\s" } } */
===================================================================
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-msafe-partial -mno-bwx" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "memcpy-si-unaligned-dst.c"
+
+/* { dg-final { scan-assembler-times "\\sldl\\s" 15 } } */
+/* { dg-final { scan-assembler-times "\\sldq_l\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\sstq_c\\s" 4 } } */
+/* { dg-final { scan-assembler-times "\\sstq_u\\s" 6 } } */
+/* { dg-final { scan-assembler-not "\\sldq_u\\s" } } */
+/* { dg-final { scan-assembler-not "\\sstl\\s" } } */
===================================================================
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "" } */
+/* { dg-options "-mno-safe-partial" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
unsigned int unaligned_src_si[17] = { [0 ... 16] = 0xfefdfcfb };
===================================================================
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-mbwx -msafe-partial" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "stlx0.c"
+
+/* Expect assembly such as:
+
+ stb $31,0($16)
+ stb $31,1($16)
+ stb $31,2($16)
+ stb $31,3($16)
+
+ without any LDQ_U or STQ_U instructions. */
+
+/* { dg-final { scan-assembler-times "\\sstb\\s" 4 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */
===================================================================
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mno-bwx -msafe-partial" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "stlx0.c"
+
+/* Expect assembly such as:
+
+ lda $2,3($16)
+ bic $2,7,$2
+$L2:
+ ldq_l $1,0($2)
+ msklh $1,$16,$1
+ stq_c $1,0($2)
+ beq $1,$L2
+ bic $16,7,$2
+$L3:
+ ldq_l $1,0($2)
+ mskll $1,$16,$1
+ stq_c $1,0($2)
+ beq $1,$L3
+
+ without any INSLH, INSLL, BIS, LDQ_U, or STQ_U instructions. */
+
+/* { dg-final { scan-assembler-times "\\sldq_l\\s" 2 } } */
+/* { dg-final { scan-assembler-times "\\smsklh\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\smskll\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq_c\\s" 2 } } */
+/* { dg-final { scan-assembler-not "\\s(?:bis|inslh|insll|ldq_u|stq_u)\\s" } } */
===================================================================
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "" } */
+/* { dg-options "-mno-safe-partial" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
typedef struct { int v __attribute__ ((packed)); } intx;
===================================================================
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-mbwx -msafe-partial" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "stqx0.c"
+
+/* Expect assembly such as:
+
+ stb $31,0($16)
+ stb $31,1($16)
+ stb $31,2($16)
+ stb $31,3($16)
+ stb $31,4($16)
+ stb $31,5($16)
+ stb $31,6($16)
+ stb $31,7($16)
+
+ without any LDQ_U or STQ_U instructions. */
+
+/* { dg-final { scan-assembler-times "\\sstb\\s" 8 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */
===================================================================
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mno-bwx -msafe-partial" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "stqx0.c"
+
+/* Expect assembly such as:
+
+ lda $2,7($16)
+ bic $2,7,$2
+$L2:
+ ldq_l $1,0($2)
+ mskqh $1,$16,$1
+ stq_c $1,0($2)
+ beq $1,$L2
+ bic $16,7,$2
+$L3:
+ ldq_l $1,0($2)
+ mskql $1,$16,$1
+ stq_c $1,0($2)
+ beq $1,$L3
+
+ without any INSLH, INSLL, BIS, LDQ_U, or STQ_U instructions. */
+
+/* { dg-final { scan-assembler-times "\\sldq_l\\s" 2 } } */
+/* { dg-final { scan-assembler-times "\\smskqh\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\smskql\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq_c\\s" 2 } } */
+/* { dg-final { scan-assembler-not "\\s(?:bis|insqh|insql|ldq_u|stq_u)\\s" } } */
===================================================================
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "" } */
+/* { dg-options "-mno-safe-partial" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
typedef struct { long v __attribute__ ((packed)); } longx;
===================================================================
@@ -1,19 +1,15 @@
/* { dg-do compile } */
-/* { dg-options "-mbwx" } */
+/* { dg-options "-mbwx -mno-safe-partial" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
-typedef struct { short v __attribute__ ((packed)); } shortx;
-
-void
-stwx0 (shortx *p)
-{
- p->v = 0;
-}
+#include "stwx0.c"
/* Expect assembly such as:
stb $31,0($16)
stb $31,1($16)
- */
+
+ without any LDQ_U or STQ_U instructions. */
/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31," 2 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */
===================================================================
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-mbwx -msafe-partial" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "stwx0.c"
+
+/* Expect assembly such as:
+
+ stb $31,0($16)
+ stb $31,1($16)
+
+ without any LDQ_U or STQ_U instructions. */
+
+/* { dg-final { scan-assembler-times "\\sstb\\s\\\$31," 2 } } */
+/* { dg-final { scan-assembler-not "\\s(?:ldq_u|stq_u)\\s" } } */
===================================================================
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-mno-bwx -msafe-partial" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+#include "stwx0.c"
+
+/* Expect assembly such as:
+
+ lda $2,1($16)
+ bic $2,7,$2
+$L2:
+ ldq_l $1,0($2)
+ mskwh $1,$16,$1
+ stq_c $1,0($2)
+ beq $1,$L2
+ bic $16,7,$2
+$L3:
+ ldq_l $1,0($2)
+ mskwl $1,$16,$1
+ stq_c $1,0($2)
+ beq $1,$L3
+
+ without any INSWH, INSWL, BIS, LDQ_U, or STQ_U instructions. */
+
+/* { dg-final { scan-assembler-times "\\sldq_l\\s" 2 } } */
+/* { dg-final { scan-assembler-times "\\smskwh\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\smskwl\\s" 1 } } */
+/* { dg-final { scan-assembler-times "\\sstq_c\\s" 2 } } */
+/* { dg-final { scan-assembler-not "\\s(?:bis|inswh|inswl|ldq_u|stq_u)\\s" } } */
===================================================================
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-mno-bwx" } */
+/* { dg-options "-mno-bwx -mno-safe-partial" } */
/* { dg-skip-if "" { *-*-* } { "-O0" } } */
typedef struct { short v __attribute__ ((packed)); } shortx;