[2.24] Add VZEROUPPER to memset-vec-unaligned-erms.S [BZ #21081]

Message ID 20170224175317.GA15039@intel.com
State New, archived
Headers

Commit Message

Lu, Hongjiu Feb. 24, 2017, 5:53 p.m. UTC
  I am checking this into 2.24 branch.

H.J.
---
Since memset-vec-unaligned-erms.S has VDUP_TO_VEC0_AND_SET_RETURN at
function entry, memset optimized for AVX2 and AVX512 will always use
ymm/zmm register. VZEROUPPER should be placed before ret in

L(stosb):
        movq    %rdx, %rcx
        movzbl  %sil, %eax
        movq    %rdi, %rdx
        rep stosb
        movq    %rdx, %rax
        ret

since it can be reached from

L(stosb_more_2x_vec):
        cmpq    $REP_STOSB_THRESHOLD, %rdx
        ja      L(stosb)

	[BZ #21081]
	* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
	(L(stosb)): Add VZEROUPPER before ret.

(cherry picked from commit 02b78ff749f0c88771713368dbb2a09b1979814f)
---
 ChangeLog                                            | 6 ++++++
 sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S | 2 ++
 2 files changed, 8 insertions(+)
  

Patch

diff --git a/ChangeLog b/ChangeLog
index a9b7540..1b7d40a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@ 
+2017-01-30  H.J. Lu  <hongjiu.lu@intel.com>
+
+	[BZ #21081]
+	* sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+	(L(stosb)): Add VZEROUPPER before ret.
+
 2016-11-28  H.J. Lu  <hongjiu.lu@intel.com>
 
 	[BZ #20750]
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
index 28e71fd..acf448c 100644
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
@@ -110,6 +110,8 @@  ENTRY (__memset_erms)
 ENTRY (MEMSET_SYMBOL (__memset, erms))
 # endif
 L(stosb):
+	/* Issue vzeroupper before rep stosb.  */
+	VZEROUPPER
 	movq	%rdx, %rcx
 	movzbl	%sil, %eax
 	movq	%rdi, %rdx