powerpc: Optimized strncat for POWER7/PPC64

Message ID 549AAECD.2050408@linux.vnet.ibm.com
State Committed
Delegated to: Adhemerval Zanella Netto
Headers

Commit Message

Adhemerval Zanella Netto Dec. 24, 2014, 12:17 p.m. UTC
  Hi

The commit 3eb38795dbbbd816 (Simplify strncat) implements a new 
strncat algorithm that uses strlen, strnlen, and memcpy.  This is 
faster than POWER7 current implementation, especially for unaligned 
strings (where POWER7 code uses byte-byte operations).

This patch removes the assembly implementation and uses a multiarch
especialization based on default algorithm calling optimized POWER7
symbols. Benchtests results in attachments (ran on a POWER8 machine).

Tested on powerpc64 and powerpc64le.

--

	* sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c: New file.
	* sysdeps/powerpc/powerpc64/multiarch/strncat-power7.S: Remove file.
	* sysdeps/powerpc/powerpc64/power7/strncat.S: Likewise.

--
stupid_strncat	__strncat_power7	__strncat_ppc
Length    2/   2, alignment  0/ 2, N    2:	3.9375	11.5469	9.20312
Length    4/   4, alignment  0/ 0, N    2:	4.59375	11.5625	9.03125
Length    4/   4, alignment  4/ 0, N    2:	4.45312	11.9375	9.04688
Length    8/   8, alignment  0/ 0, N    2:	6.34375	11.4844	9.20312
Length    8/   8, alignment  0/ 0, N    2:	6.29688	11.4844	8.875
Length   16/  16, alignment  0/ 0, N    2:	17.1562	11.4375	9.78125
Length   16/  16, alignment  7/ 2, N    2:	16.9844	11.3438	9.35938
Length   16/   4, alignment  0/ 0, N    2:	4.53125	10.8438	9
Length   16/   4, alignment  7/ 2, N    2:	4.39062	10.5781	8.84375
Length   32/  32, alignment  0/ 0, N    2:	28.6562	11.6875	10.0312
Length   32/  32, alignment  6/ 4, N    2:	28.625	11.3125	10.1094
Length   32/   8, alignment  0/ 0, N    2:	6.20312	11.8281	8.85938
Length   32/   8, alignment  6/ 4, N    2:	6	11.1406	8.875
Length   64/  64, alignment  0/ 0, N    2:	50.8281	9.4375	10.9375
Length   64/  64, alignment  5/ 6, N    2:	50.9219	9.95312	10.7344
Length   64/  16, alignment  0/ 0, N    2:	16.8438	11.7188	9.67188
Length   64/  16, alignment  5/ 6, N    2:	16.7344	13.1406	9.51562
Length  128/ 128, alignment  0/ 0, N    2:	96.2656	9.17188	12.9375
Length  128/ 128, alignment  4/ 0, N    2:	95.25	8.46875	12.2188
Length  128/  32, alignment  0/ 0, N    2:	28.5625	10.9531	10.0781
Length  128/  32, alignment  4/ 0, N    2:	28.5469	11.0625	9.875
Length  256/ 256, alignment  0/ 0, N    2:	185.828	12.7656	15.6406
Length  256/ 256, alignment  3/ 2, N    2:	184.469	11.8438	15.3594
Length  256/  64, alignment  0/ 0, N    2:	50.875	9.28125	10.9062
Length  256/  64, alignment  3/ 2, N    2:	51.1094	8.71875	10.7031
Length  512/ 512, alignment  0/ 0, N    2:	363.922	22.6094	26.4688
Length  512/ 512, alignment  2/ 4, N    2:	362.391	22.5312	26.3594
Length  512/ 128, alignment  0/ 0, N    2:	95.875	8.67188	12.4844
Length  512/ 128, alignment  2/ 4, N    2:	95.7344	8.5	12.2812
Length 1024/1024, alignment  0/ 0, N    2:	721.781	35.6406	39.875
Length 1024/1024, alignment  1/ 6, N    2:	721.438	35.4219	39.4688
Length 1024/ 256, alignment  0/ 0, N    2:	185.469	12.0469	15.5469
Length 1024/ 256, alignment  1/ 6, N    2:	184.047	11.6406	15.3125
Length   16/   1, alignment  1/ 2, N    2:	3.64062	11.6719	8.82812
Length   16/   1, alignment  2/ 1, N    2:	3.29688	11.7344	8.82812
Length   16/  10, alignment  1/ 1, N    2:	7.15625	11.7812	8.98438
Length   32/   1, alignment  2/ 4, N    2:	3.20312	12.3125	8.82812
Length   32/   1, alignment  4/ 2, N    2:	3.17188	11.3594	8.8125
Length   32/  10, alignment  2/ 2, N    2:	6.85938	86.6562	8.84375
Length   64/   1, alignment  3/ 6, N    2:	3.17188	10.1719	8.8125
Length   64/   1, alignment  6/ 3, N    2:	3.17188	11.5938	8.76562
Length   64/  10, alignment  3/ 3, N    2:	6.9375	11.3906	8.84375
Length  128/   1, alignment  4/ 0, N    2:	3.10938	11.7188	8.85938
Length  128/   1, alignment  0/ 4, N    2:	3.0625	11.5938	8.79688
Length  128/  10, alignment  4/ 4, N    2:	6.92188	10.7344	8.875
Length  256/   1, alignment  5/ 2, N    2:	3.09375	11.2969	8.875
Length  256/   1, alignment  2/ 5, N    2:	3.03125	10.8125	8.78125
Length  256/  10, alignment  5/ 5, N    2:	6.90625	9.57812	8.8125
Length  512/   1, alignment  6/ 4, N    2:	3.14062	11.6562	8.78125
Length  512/   1, alignment  4/ 6, N    2:	3.04688	10	8.85938
Length  512/  10, alignment  6/ 6, N    2:	6.89062	11.4375	9.625
Length 1024/   1, alignment  7/ 6, N    2:	3.07812	10.2344	8.6875
Length 1024/   1, alignment  6/ 7, N    2:	3	11.375	8.875
Length 1024/  10, alignment  7/ 7, N    2:	6.98438	10.9844	9.48438
Length    2/   2, alignment  0/ 2, N    8:	4.29688	14.2344	9.25
Length    4/   4, alignment  0/ 0, N    8:	5.96875	20.0781	9.10938
Length    4/   4, alignment  4/ 0, N    8:	5.48438	10.0625	9.25
Length    8/   8, alignment  0/ 0, N    8:	9.45312	7.85938	8.45312
Length    8/   8, alignment  0/ 0, N    8:	9	7.84375	8.42188
Length   16/  16, alignment  0/ 0, N    8:	19.7031	8.71875	8.98438
Length   16/  16, alignment  7/ 2, N    8:	19.7812	9.64062	8.76562
Length   16/   4, alignment  0/ 0, N    8:	7.28125	8.14062	8.34375
Length   16/   4, alignment  7/ 2, N    8:	7.09375	10.2344	7.98438
Length   32/  32, alignment  0/ 0, N    8:	31.25	9.15625	9.45312
Length   32/  32, alignment  6/ 4, N    8:	30.7344	8.4375	9.1875
Length   32/   8, alignment  0/ 0, N    8:	8.95312	7.85938	8.28125
Length   32/   8, alignment  6/ 4, N    8:	10.625	10.1406	8.34375
Length   64/  64, alignment  0/ 0, N    8:	53.625	9.875	10.0781
Length   64/  64, alignment  5/ 6, N    8:	53.3125	8.59375	9.70312
Length   64/  16, alignment  0/ 0, N    8:	19.4219	8.75	9.0625
Length   64/  16, alignment  5/ 6, N    8:	19.6719	8.5625	8.6875
Length  128/ 128, alignment  0/ 0, N    8:	98.1562	11.5312	12.0781
Length  128/ 128, alignment  4/ 0, N    8:	98.4531	10.4688	11.4531
Length  128/  32, alignment  0/ 0, N    8:	30.9375	8.90625	9.1875
Length  128/  32, alignment  4/ 0, N    8:	30.9531	8.125	9.04688
Length  256/ 256, alignment  0/ 0, N    8:	187.969	14.8281	14.7969
Length  256/ 256, alignment  3/ 2, N    8:	187.844	13.25	14.3594
Length  256/  64, alignment  0/ 0, N    8:	53.6719	9.65625	10
Length  256/  64, alignment  3/ 2, N    8:	53.5625	8.21875	9.6875
Length  512/ 512, alignment  0/ 0, N    8:	365.609	25.4375	25.6875
Length  512/ 512, alignment  2/ 4, N    8:	366.609	24.0781	25.4219
Length  512/ 128, alignment  0/ 0, N    8:	98.2188	11.4375	11.7031
Length  512/ 128, alignment  2/ 4, N    8:	98.6719	10.0156	11.4375
Length 1024/1024, alignment  0/ 0, N    8:	723.891	38.5625	38.8438
Length 1024/1024, alignment  1/ 6, N    8:	725.547	37.0312	38.5781
Length 1024/ 256, alignment  0/ 0, N    8:	187.859	14.4688	14.8438
Length 1024/ 256, alignment  1/ 6, N    8:	187.719	13.0781	14.3438
Length   16/   1, alignment  1/ 2, N    8:	6.03125	11.4062	7.8125
Length   16/   1, alignment  2/ 1, N    8:	5.64062	11.25	7.95312
Length   16/  10, alignment  1/ 1, N    8:	10	10.2969	8.10938
Length   32/   1, alignment  2/ 4, N    8:	5.82812	11.0312	7.79688
Length   32/   1, alignment  4/ 2, N    8:	5.65625	11.2344	8.01562
Length   32/  10, alignment  2/ 2, N    8:	9.85938	10.4375	7.95312
Length   64/   1, alignment  3/ 6, N    8:	5.75	9.3125	7.9375
Length   64/   1, alignment  6/ 3, N    8:	5.5625	10.9844	7.92188
Length   64/  10, alignment  3/ 3, N    8:	9.875	9.375	8.04688
Length  128/   1, alignment  4/ 0, N    8:	5.8125	11.5156	7.90625
Length  128/   1, alignment  0/ 4, N    8:	5.51562	7.85938	8.0625
Length  128/  10, alignment  4/ 4, N    8:	9.92188	9.5625	8.07812
Length  256/   1, alignment  5/ 2, N    8:	5.67188	11.1406	7.76562
Length  256/   1, alignment  2/ 5, N    8:	5.5	9.75	7.85938
Length  256/  10, alignment  5/ 5, N    8:	9.8125	8.48438	8.03125
Length  512/   1, alignment  6/ 4, N    8:	5.73438	10.2188	7.9375
Length  512/   1, alignment  4/ 6, N    8:	5.51562	9.32812	7.82812
Length  512/  10, alignment  6/ 6, N    8:	9.92188	9.5625	8.64062
Length 1024/   1, alignment  7/ 6, N    8:	5.65625	9.125	7.75
Length 1024/   1, alignment  6/ 7, N    8:	5.5	10.4531	7.9375
Length 1024/  10, alignment  7/ 7, N    8:	9.85938	11.7969	8.54688
Length    2/   2, alignment  0/ 2, N   32:	4	14.0625	9.15625
Length    4/   4, alignment  0/ 0, N   32:	5.46875	20.1094	9.0625
Length    4/   4, alignment  4/ 0, N   32:	5.32812	10.2188	9.21875
Length    8/   8, alignment  0/ 0, N   32:	9.5625	11.4531	8.625
Length    8/   8, alignment  0/ 0, N   32:	9.25	11.5156	8.46875
Length   16/  16, alignment  0/ 0, N   32:	23.4375	15.5156	11.2812
Length   16/  16, alignment  7/ 2, N   32:	23.0938	9.28125	10.6562
Length   16/   4, alignment  0/ 0, N   32:	11.2969	14.8594	10.7969
Length   16/   4, alignment  7/ 2, N   32:	10.8594	8.75	9.84375
Length   32/  32, alignment  0/ 0, N   32:	44.6406	19.2656	12.6562
Length   32/  32, alignment  6/ 4, N   32:	44.3906	15.3125	13.2344
Length   32/   8, alignment  0/ 0, N   32:	22.6719	18.3438	11.8906
Length   32/   8, alignment  6/ 4, N   32:	22.3125	14.3906	12.4688
Length   64/  64, alignment  0/ 0, N   32:	67.1094	19.6719	13.25
Length   64/  64, alignment  5/ 6, N   32:	67.3594	15.7031	13.125
Length   64/  16, alignment  0/ 0, N   32:	33.3438	19.125	12.25
Length   64/  16, alignment  5/ 6, N   32:	33.0625	15.1094	12.0156
Length  128/ 128, alignment  0/ 0, N   32:	111.156	21.7344	19.875
Length  128/ 128, alignment  4/ 0, N   32:	112.5	17.6562	13.7656
Length  128/  32, alignment  0/ 0, N   32:	44.2969	19.4375	12.7344
Length  128/  32, alignment  4/ 0, N   32:	44.6406	14.7188	11.4531
Length  256/ 256, alignment  0/ 0, N   32:	200.656	24.9531	17.9844
Length  256/ 256, alignment  3/ 2, N   32:	201.438	20.4375	18
Length  256/  64, alignment  0/ 0, N   32:	66.8125	19.9062	13.2031
Length  256/  64, alignment  3/ 2, N   32:	67.0781	15.4219	13.0312
Length  512/ 512, alignment  0/ 0, N   32:	379.641	35.7031	28.5469
Length  512/ 512, alignment  2/ 4, N   32:	379.656	30.9219	28.7812
Length  512/ 128, alignment  0/ 0, N   32:	111.516	21.5625	18.25
Length  512/ 128, alignment  2/ 4, N   32:	111.297	17.7344	14.6719
Length 1024/1024, alignment  0/ 0, N   32:	737.406	48.9375	41.9062
Length 1024/1024, alignment  1/ 6, N   32:	738.172	45.2812	41.625
Length 1024/ 256, alignment  0/ 0, N   32:	200.25	24.6875	17.9531
Length 1024/ 256, alignment  1/ 6, N   32:	200.516	20.5469	17.5156
Length   16/   1, alignment  1/ 2, N   32:	9.98438	8.5625	10.1875
Length   16/   1, alignment  2/ 1, N   32:	9.51562	8.45312	9.70312
Length   16/  10, alignment  1/ 1, N   32:	13.4688	9	10.1094
Length   32/   1, alignment  2/ 4, N   32:	19.5938	15.3125	12.2344
Length   32/   1, alignment  4/ 2, N   32:	19.4219	14.7031	11.7344
Length   32/  10, alignment  2/ 2, N   32:	23.5156	14.2344	12.1562
Length   64/   1, alignment  3/ 6, N   32:	19.4688	15.0625	11.5625
Length   64/   1, alignment  6/ 3, N   32:	19.4531	14.8594	11.4219
Length   64/  10, alignment  3/ 3, N   32:	23.6094	14.2656	11.5938
Length  128/   1, alignment  4/ 0, N   32:	19.5156	14.7969	11.4688
Length  128/   1, alignment  0/ 4, N   32:	19.4375	18.2344	11.0312
Length  128/  10, alignment  4/ 4, N   32:	23.6094	14.0781	11.3125
Length  256/   1, alignment  5/ 2, N   32:	19.3594	14.8125	11.4062
Length  256/   1, alignment  2/ 5, N   32:	19.3438	14.8281	11.1094
Length  256/  10, alignment  5/ 5, N   32:	23.5781	14.3438	11.5781
Length  512/   1, alignment  6/ 4, N   32:	19.4844	14.6406	11.25
Length  512/   1, alignment  4/ 6, N   32:	19.2031	14.6406	11.2344
Length  512/  10, alignment  6/ 6, N   32:	23.625	14.9219	10.875
Length 1024/   1, alignment  7/ 6, N   32:	19.6406	14.6719	11.5
Length 1024/   1, alignment  6/ 7, N   32:	19.4062	14.0156	11.75
Length 1024/  10, alignment  7/ 7, N   32:	23.5625	15.0312	12.2812
Length    2/   2, alignment  0/ 2, N  128:	4.04688	14	8.875
Length    4/   4, alignment  0/ 0, N  128:	5.57812	20.1562	8.70312
Length    4/   4, alignment  4/ 0, N  128:	5.5625	10.3438	8.79688
Length    8/   8, alignment  0/ 0, N  128:	9.54688	11.4844	8.3125
Length    8/   8, alignment  0/ 0, N  128:	9.15625	11.4531	8.21875
Length   16/  16, alignment  0/ 0, N  128:	23.4688	15.5469	10.8281
Length   16/  16, alignment  7/ 2, N  128:	23.1562	8.75	10.4688
Length   16/   4, alignment  0/ 0, N  128:	11.0781	14.7188	10.1406
Length   16/   4, alignment  7/ 2, N  128:	10.7812	8.65625	9.5625
Length   32/  32, alignment  0/ 0, N  128:	44.4531	22.7656	12.5625
Length   32/  32, alignment  6/ 4, N  128:	43.9531	15.5625	12.2812
Length   32/   8, alignment  0/ 0, N  128:	22.125	21.8125	11.8125
Length   32/   8, alignment  6/ 4, N  128:	21.9844	15.0469	11.4844
Length   64/  64, alignment  0/ 0, N  128:	80.1406	36.7812	15.0625
Length   64/  64, alignment  5/ 6, N  128:	81.0312	22.8281	14.2031
Length   64/  16, alignment  0/ 0, N  128:	46.5156	35.9844	13.3906
Length   64/  16, alignment  5/ 6, N  128:	46.5625	23.6562	12.9062
Length  128/ 128, alignment  0/ 0, N  128:	150.859	64.1094	18.1406
Length  128/ 128, alignment  4/ 0, N  128:	150.234	37.0938	17.2812
Length  128/  32, alignment  0/ 0, N  128:	83.1719	61.5625	15.5156
Length  128/  32, alignment  4/ 0, N  128:	83.25	33.5938	15.2031
Length  256/ 256, alignment  0/ 0, N  128:	239.625	67.2656	20.9219
Length  256/ 256, alignment  3/ 2, N  128:	239.578	40.5781	21.1094
Length  256/  64, alignment  0/ 0, N  128:	106.125	62.3438	15.9531
Length  256/  64, alignment  3/ 2, N  128:	106.078	34.0781	16.0625
Length  512/ 512, alignment  0/ 0, N  128:	418.906	78.125	31.3281
Length  512/ 512, alignment  2/ 4, N  128:	419.016	51.8438	31.5156
Length  512/ 128, alignment  0/ 0, N  128:	151.859	63.8281	17.6562
Length  512/ 128, alignment  2/ 4, N  128:	150.703	37.3125	17.4531
Length 1024/1024, alignment  0/ 0, N  128:	773.594	91.4844	44.3594
Length 1024/1024, alignment  1/ 6, N  128:	776.109	64.375	44.4688
Length 1024/ 256, alignment  0/ 0, N  128:	238.75	66.9219	20.6406
Length 1024/ 256, alignment  1/ 6, N  128:	239.656	40.8906	20.1562
Length   16/   1, alignment  1/ 2, N  128:	9.84375	8.70312	9.8125
Length   16/   1, alignment  2/ 1, N  128:	9.54688	8.48438	9.5
Length   16/  10, alignment  1/ 1, N  128:	13.4219	8.65625	9.82812
Length   32/   1, alignment  2/ 4, N  128:	19.1719	15.8906	11.6094
Length   32/   1, alignment  4/ 2, N  128:	19.0938	15.5	11.6562
Length   32/  10, alignment  2/ 2, N  128:	23.4531	15.2969	11.7031
Length   64/   1, alignment  3/ 6, N  128:	32.7344	22.1406	13.1406
Length   64/   1, alignment  6/ 3, N  128:	32.8281	22.0312	12.5938
Length   64/  10, alignment  3/ 3, N  128:	37.2969	21.4688	12.5312
Length  128/   1, alignment  4/ 0, N  128:	57.9688	33.4688	15.9531
Length  128/   1, alignment  0/ 4, N  128:	57.875	60.3281	14.625
Length  128/  10, alignment  4/ 4, N  128:	62.6406	33.2188	15.3281
Length  256/   1, alignment  5/ 2, N  128:	57.8594	35.6719	14.875
Length  256/   1, alignment  2/ 5, N  128:	58.0156	36.0938	14.4531
Length  256/  10, alignment  5/ 5, N  128:	62.7188	35.2188	14.7344
Length  512/   1, alignment  6/ 4, N  128:	58.1719	35.5938	14.6875
Length  512/   1, alignment  4/ 6, N  128:	57.7969	35.7656	14.8594
Length  512/  10, alignment  6/ 6, N  128:	62.9219	35.6719	14.1875
Length 1024/   1, alignment  7/ 6, N  128:	57.8281	35.6094	13.4219
Length 1024/   1, alignment  6/ 7, N  128:	57.9219	35.4062	14.8906
Length 1024/  10, alignment  7/ 7, N  128:	62.8281	35.7031	15.8438
Length    2/   2, alignment  0/ 2, N  512:	3.89062	13.9688	8.6875
Length    4/   4, alignment  0/ 0, N  512:	5.54688	20.0625	8.59375
Length    4/   4, alignment  4/ 0, N  512:	5.35938	10.7188	8.82812
Length    8/   8, alignment  0/ 0, N  512:	9.51562	11.4844	8.28125
Length    8/   8, alignment  0/ 0, N  512:	9.1875	11.5156	8.04688
Length   16/  16, alignment  0/ 0, N  512:	23.6719	15.5625	10.6094
Length   16/  16, alignment  7/ 2, N  512:	23.125	8.71875	10.125
Length   16/   4, alignment  0/ 0, N  512:	11.1719	14.7031	10.2344
Length   16/   4, alignment  7/ 2, N  512:	10.7344	8.57812	9.51562
Length   32/  32, alignment  0/ 0, N  512:	44.6562	22.4688	12.4219
Length   32/  32, alignment  6/ 4, N  512:	44.4375	15.7344	12.1875
Length   32/   8, alignment  0/ 0, N  512:	22.4844	21.5938	11.5469
Length   32/   8, alignment  6/ 4, N  512:	21.7969	15.0469	11.3125
Length   64/  64, alignment  0/ 0, N  512:	80.7812	36.6406	14.1719
Length   64/  64, alignment  5/ 6, N  512:	80.0469	22.4688	13.8438
Length   64/  16, alignment  0/ 0, N  512:	46.6562	35.9844	13.1875
Length   64/  16, alignment  5/ 6, N  512:	46.6562	23.3281	12.7344
Length  128/ 128, alignment  0/ 0, N  512:	150.766	65.5781	18.7031
Length  128/ 128, alignment  4/ 0, N  512:	150.578	36.6406	17.8125
Length  128/  32, alignment  0/ 0, N  512:	83.375	63.3281	16.2031
Length  128/  32, alignment  4/ 0, N  512:	83.25	34.3281	15.2812
Length  256/ 256, alignment  0/ 0, N  512:	290.781	122.859	25.8906
Length  256/ 256, alignment  3/ 2, N  512:	290.688	64.8594	27.6094
Length  256/  64, alignment  0/ 0, N  512:	157.375	117.734	20.4531
Length  256/  64, alignment  3/ 2, N  512:	157.109	60.125	22.6875
Length  512/ 512, alignment  0/ 0, N  512:	572.219	239.688	49.1562
Length  512/ 512, alignment  2/ 4, N  512:	570.75	127.188	56.0625
Length  512/ 128, alignment  0/ 0, N  512:	302.906	225.688	35.2344
Length  512/ 128, alignment  2/ 4, N  512:	303.344	110.562	42.4219
Length 1024/1024, alignment  0/ 0, N  512:	928.359	252.5	61.8594
Length 1024/1024, alignment  1/ 6, N  512:	926.859	140.453	68.7969
Length 1024/ 256, alignment  0/ 0, N  512:	392.641	228.984	38.6094
Length 1024/ 256, alignment  1/ 6, N  512:	392.859	115.938	44.9688
Length   16/   1, alignment  1/ 2, N  512:	9.45312	8.57812	9.625
Length   16/   1, alignment  2/ 1, N  512:	9.125	8.53125	9.39062
Length   16/  10, alignment  1/ 1, N  512:	13.0781	8.65625	9.51562
Length   32/   1, alignment  2/ 4, N  512:	19.2812	15.5156	11.5469
Length   32/   1, alignment  4/ 2, N  512:	19	15.2188	11.4062
Length   32/  10, alignment  2/ 2, N  512:	23.3594	15.0781	11.6094
Length   64/   1, alignment  3/ 6, N  512:	31.75	22.2031	12.7812
Length   64/   1, alignment  6/ 3, N  512:	31.75	21.9062	12.1094
Length   64/  10, alignment  3/ 3, N  512:	36.6406	21.25	12.5625
Length  128/   1, alignment  4/ 0, N  512:	57.1719	34.4531	15.9062
Length  128/   1, alignment  0/ 4, N  512:	57.2812	61.8438	15.1094
Length  128/  10, alignment  4/ 4, N  512:	62.4375	34.1094	15.4219
Length  256/   1, alignment  5/ 2, N  512:	107.828	59.6875	21.6562
Length  256/   1, alignment  2/ 5, N  512:	107.938	60.0156	21.1562
Length  256/  10, alignment  5/ 5, N  512:	114.203	59.3281	21.4531
Length  512/   1, alignment  6/ 4, N  512:	211.094	110.703	38.3438
Length  512/   1, alignment  4/ 6, N  512:	211.469	111.922	38.5
Length  512/  10, alignment  6/ 6, N  512:	218.219	111.062	37.8594
Length 1024/   1, alignment  7/ 6, N  512:	211.141	111.031	31.5156
Length 1024/   1, alignment  6/ 7, N  512:	211.281	111.781	39.1875
Length 1024/  10, alignment  7/ 7, N  512:	218.594	111.594	38.9531
Length    2/   2, alignment  0/ 2, N 2048:	3.85938	13.8125	8.5
Length    4/   4, alignment  0/ 0, N 2048:	5.65625	20.125	8.60938
Length    4/   4, alignment  4/ 0, N 2048:	5.40625	10.6719	8.82812
Length    8/   8, alignment  0/ 0, N 2048:	9.09375	11.3906	8.09375
Length    8/   8, alignment  0/ 0, N 2048:	8.98438	11.5156	8.10938
Length   16/  16, alignment  0/ 0, N 2048:	23.4531	15.4219	10.5625
Length   16/  16, alignment  7/ 2, N 2048:	23.0469	8.9375	10
Length   16/   4, alignment  0/ 0, N 2048:	10.7031	14.7656	10.2344
Length   16/   4, alignment  7/ 2, N 2048:	10.2656	8.67188	9.5
Length   32/  32, alignment  0/ 0, N 2048:	44.2344	22.4531	12.2344
Length   32/  32, alignment  6/ 4, N 2048:	44.0469	15.7344	12.0469
Length   32/   8, alignment  0/ 0, N 2048:	22.2969	21.6719	11.5
Length   32/   8, alignment  6/ 4, N 2048:	21.8594	15.3438	11.2969
Length   64/  64, alignment  0/ 0, N 2048:	79.75	36.7812	14.375
Length   64/  64, alignment  5/ 6, N 2048:	79.2344	22.4375	13.5
Length   64/  16, alignment  0/ 0, N 2048:	45.4531	35.8594	13.3906
Length   64/  16, alignment  5/ 6, N 2048:	45.3594	23.375	12.9219
Length  128/ 128, alignment  0/ 0, N 2048:	150.125	65.6406	18.4688
Length  128/ 128, alignment  4/ 0, N 2048:	149.641	36.8125	17.2656
Length  128/  32, alignment  0/ 0, N 2048:	82	63.3438	16.2031
Length  128/  32, alignment  4/ 0, N 2048:	82.0469	34.2812	15.1719
Length  256/ 256, alignment  0/ 0, N 2048:	290.609	122.922	25.8594
Length  256/ 256, alignment  3/ 2, N 2048:	289.547	64.9375	27.25
Length  256/  64, alignment  0/ 0, N 2048:	155.734	117.781	20.4219
Length  256/  64, alignment  3/ 2, N 2048:	156.109	60.0938	22.7031
Length  512/ 512, alignment  0/ 0, N 2048:	571.188	241.547	49.7812
Length  512/ 512, alignment  2/ 4, N 2048:	570.828	125.438	56.8594
Length  512/ 128, alignment  0/ 0, N 2048:	302.328	227.375	36.0156
Length  512/ 128, alignment  2/ 4, N 2048:	302.562	111.438	43.0312
Length 1024/1024, alignment  0/ 0, N 2048:	1132.56	470.812	85.4375
Length 1024/1024, alignment  1/ 6, N 2048:	1134	239.438	93
Length 1024/ 256, alignment  0/ 0, N 2048:	595.422	446.594	61.0625
Length 1024/ 256, alignment  1/ 6, N 2048:	596.141	215.562	69.8438
Length   16/   1, alignment  1/ 2, N 2048:	9.53125	8.60938	9.5
Length   16/   1, alignment  2/ 1, N 2048:	9.125	8.59375	9.26562
Length   16/  10, alignment  1/ 1, N 2048:	13.0312	8.625	9.60938
Length   32/   1, alignment  2/ 4, N 2048:	19.1875	15.3906	11.375
Length   32/   1, alignment  4/ 2, N 2048:	18.9844	15.2969	11.2812
Length   32/  10, alignment  2/ 2, N 2048:	23.4531	14.9844	11.4844
Length   64/   1, alignment  3/ 6, N 2048:	32.9375	22.1875	12.7812
Length   64/   1, alignment  6/ 3, N 2048:	32.7031	21.9219	12.0781
Length   64/  10, alignment  3/ 3, N 2048:	37.125	21.4531	12.5
Length  128/   1, alignment  4/ 0, N 2048:	58.2656	34.2656	15.8906
Length  128/   1, alignment  0/ 4, N 2048:	58.0469	61.9375	14.8594
Length  128/  10, alignment  4/ 4, N 2048:	63.2812	33.8281	15.1875
Length  256/   1, alignment  5/ 2, N 2048:	109.297	59.6406	21.4688
Length  256/   1, alignment  2/ 5, N 2048:	109.125	60.2031	20.8438
Length  256/  10, alignment  5/ 5, N 2048:	114.656	59.4219	21.375
Length  512/   1, alignment  6/ 4, N 2048:	210.656	109.297	39.5469
Length  512/   1, alignment  4/ 6, N 2048:	211.672	111.109	40.2656
Length  512/  10, alignment  6/ 6, N 2048:	219.219	109.547	38.6406
Length 1024/   1, alignment  7/ 6, N 2048:	414.719	210.953	53.0469
Length 1024/   1, alignment  6/ 7, N 2048:	416.641	213.516	63.3594
Length 1024/  10, alignment  7/ 7, N 2048:	427.016	210.953	63.875
stupid_strncat	__strncat_power7	__strncat_ppc
Length    2/   2, alignment  0/ 2, N    2:	4.01562	7.98438	9.01562
Length    4/   4, alignment  0/ 0, N    2:	4.54688	7.96875	9.17188
Length    4/   4, alignment  4/ 0, N    2:	4.32812	7.90625	8.95312
Length    8/   8, alignment  0/ 0, N    2:	6.42188	7.92188	9.0625
Length    8/   8, alignment  0/ 0, N    2:	6.20312	7.78125	9.03125
Length   16/  16, alignment  0/ 0, N    2:	16.9688	8.4375	9.65625
Length   16/  16, alignment  7/ 2, N    2:	17.1719	8.10938	9.35938
Length   16/   4, alignment  0/ 0, N    2:	4.57812	7.42188	8.90625
Length   16/   4, alignment  7/ 2, N    2:	4.39062	7.46875	8.6875
Length   32/  32, alignment  0/ 0, N    2:	28.5938	9.23438	10.2656
Length   32/  32, alignment  6/ 4, N    2:	28.625	9	10.125
Length   32/   8, alignment  0/ 0, N    2:	6.23438	7.60938	8.9375
Length   32/   8, alignment  6/ 4, N    2:	6.0625	7.60938	8.90625
Length   64/  64, alignment  0/ 0, N    2:	50.9688	10.1562	11.0781
Length   64/  64, alignment  5/ 6, N    2:	50.3281	10.0156	10.7188
Length   64/  16, alignment  0/ 0, N    2:	53.75	8.54688	9.59375
Length   64/  16, alignment  5/ 6, N    2:	17	8.46875	9.60938
Length  128/ 128, alignment  0/ 0, N    2:	95.3438	11.8438	12.5938
Length  128/ 128, alignment  4/ 0, N    2:	95.5312	11.4375	12.2344
Length  128/  32, alignment  0/ 0, N    2:	28.5312	9	10.2344
Length  128/  32, alignment  4/ 0, N    2:	28.7188	8.84375	9.92188
Length  256/ 256, alignment  0/ 0, N    2:	185.281	15.25	15.5938
Length  256/ 256, alignment  3/ 2, N    2:	185.688	14.5781	15.2812
Length  256/  64, alignment  0/ 0, N    2:	51.1406	10.0469	10.9844
Length  256/  64, alignment  3/ 2, N    2:	51.0781	10	10.6875
Length  512/ 512, alignment  0/ 0, N    2:	363.812	25.4062	26.5
Length  512/ 512, alignment  2/ 4, N    2:	362.859	25.2031	26.4375
Length  512/ 128, alignment  0/ 0, N    2:	95.5781	11.5156	12.4688
Length  512/ 128, alignment  2/ 4, N    2:	95.9844	11.4062	12.2812
Length 1024/1024, alignment  0/ 0, N    2:	721.547	38.4531	39.5938
Length 1024/1024, alignment  1/ 6, N    2:	722.141	38.2656	39.6094
Length 1024/ 256, alignment  0/ 0, N    2:	184.797	14.6875	15.5938
Length 1024/ 256, alignment  1/ 6, N    2:	184.828	14.5469	15.3594
Length   16/   1, alignment  1/ 2, N    2:	3.67188	7.57812	8.9375
Length   16/   1, alignment  2/ 1, N    2:	3.21875	7.29688	8.85938
Length   16/  10, alignment  1/ 1, N    2:	7.03125	7.8125	8.92188
Length   32/   1, alignment  2/ 4, N    2:	3.21875	7.34375	8.84375
Length   32/   1, alignment  4/ 2, N    2:	3.07812	7.46875	8.875
Length   32/  10, alignment  2/ 2, N    2:	7	7.82812	8.98438
Length   64/   1, alignment  3/ 6, N    2:	3.17188	7.51562	8.85938
Length   64/   1, alignment  6/ 3, N    2:	3.10938	7.5625	8.84375
Length   64/  10, alignment  3/ 3, N    2:	6.90625	7.67188	8.96875
Length  128/   1, alignment  4/ 0, N    2:	3.07812	7.375	8.875
Length  128/   1, alignment  0/ 4, N    2:	3.07812	7.5	8.92188
Length  128/  10, alignment  4/ 4, N    2:	6.96875	7.5625	8.98438
Length  256/   1, alignment  5/ 2, N    2:	3.03125	7.54688	8.85938
Length  256/   1, alignment  2/ 5, N    2:	3.0625	7.51562	8.90625
Length  256/  10, alignment  5/ 5, N    2:	6.96875	7.73438	8.89062
Length  512/   1, alignment  6/ 4, N    2:	3.03125	7.53125	8.79688
Length  512/   1, alignment  4/ 6, N    2:	3.03125	7.39062	8.90625
Length  512/  10, alignment  6/ 6, N    2:	6.9375	8.5625	9.57812
Length 1024/   1, alignment  7/ 6, N    2:	3.01562	7.21875	8.57812
Length 1024/   1, alignment  6/ 7, N    2:	3.01562	7.71875	8.9375
Length 1024/  10, alignment  7/ 7, N    2:	6.90625	8.1875	9.35938
Length    2/   2, alignment  0/ 2, N    8:	4.35938	7.98438	9.10938
Length    4/   4, alignment  0/ 0, N    8:	5.9375	7.8125	9.0625
Length    4/   4, alignment  4/ 0, N    8:	5.59375	7.79688	9.46875
Length    8/   8, alignment  0/ 0, N    8:	9.375	7.20312	8.48438
Length    8/   8, alignment  0/ 0, N    8:	8.84375	7	8.25
Length   16/  16, alignment  0/ 0, N    8:	19.375	7.67188	8.73438
Length   16/  16, alignment  7/ 2, N    8:	19.4531	7.875	8.65625
Length   16/   4, alignment  0/ 0, N    8:	7.375	7.03125	8.01562
Length   16/   4, alignment  7/ 2, N    8:	7	7.8125	7.95312
Length   32/  32, alignment  0/ 0, N    8:	31.0938	8.34375	9.54688
Length   32/  32, alignment  6/ 4, N    8:	31.1719	8.07812	9.23438
Length   32/   8, alignment  0/ 0, N    8:	9.25	7	8.17188
Length   32/   8, alignment  6/ 4, N    8:	9.125	6.95312	8.20312
Length   64/  64, alignment  0/ 0, N    8:	53.5469	9.4375	10.1094
Length   64/  64, alignment  5/ 6, N    8:	53.6875	8.84375	9.9375
Length   64/  16, alignment  0/ 0, N    8:	19.3906	7.95312	9
Length   64/  16, alignment  5/ 6, N    8:	19.6562	7.64062	8.85938
Length  128/ 128, alignment  0/ 0, N    8:	98.2344	11.0938	11.625
Length  128/ 128, alignment  4/ 0, N    8:	98.2344	10.4844	11.625
Length  128/  32, alignment  0/ 0, N    8:	31.4062	8.3125	9.26562
Length  128/  32, alignment  4/ 0, N    8:	30.8906	7.92188	9.1875
Length  256/ 256, alignment  0/ 0, N    8:	187.906	14.0938	14.8594
Length  256/ 256, alignment  3/ 2, N    8:	188.484	13.625	14.4375
Length  256/  64, alignment  0/ 0, N    8:	53.2969	9.28125	10.1562
Length  256/  64, alignment  3/ 2, N    8:	53.2656	8.57812	9.6875
Length  512/ 512, alignment  0/ 0, N    8:	366.922	24.7656	25.75
Length  512/ 512, alignment  2/ 4, N    8:	366.016	24.1875	25.5
Length  512/ 128, alignment  0/ 0, N    8:	97.9844	10.8125	11.6562
Length  512/ 128, alignment  2/ 4, N    8:	98.6094	10.3281	11.3594
Length 1024/1024, alignment  0/ 0, N    8:	724.938	37.7344	39.0156
Length 1024/1024, alignment  1/ 6, N    8:	723.75	37.2031	38.4844
Length 1024/ 256, alignment  0/ 0, N    8:	187.406	13.9219	14.7812
Length 1024/ 256, alignment  1/ 6, N    8:	187.453	13.4688	14.4062
Length   16/   1, alignment  1/ 2, N    8:	6.21875	7.64062	8.03125
Length   16/   1, alignment  2/ 1, N    8:	5.53125	8.76562	7.92188
Length   16/  10, alignment  1/ 1, N    8:	10	6.85938	7.95312
Length   32/   1, alignment  2/ 4, N    8:	5.73438	7.70312	7.89062
Length   32/   1, alignment  4/ 2, N    8:	5.59375	7.71875	7.9375
Length   32/  10, alignment  2/ 2, N    8:	9.89062	6.875	8
Length   64/   1, alignment  3/ 6, N    8:	5.75	7.48438	7.82812
Length   64/   1, alignment  6/ 3, N    8:	5.59375	7.14062	7.9375
Length   64/  10, alignment  3/ 3, N    8:	9.875	6.84375	8.07812
Length  128/   1, alignment  4/ 0, N    8:	5.73438	7.96875	7.875
Length  128/   1, alignment  0/ 4, N    8:	5.53125	7	8.01562
Length  128/  10, alignment  4/ 4, N    8:	9.96875	6.95312	7.96875
Length  256/   1, alignment  5/ 2, N    8:	5.70312	7.42188	7.875
Length  256/   1, alignment  2/ 5, N    8:	5.51562	7.15625	7.79688
Length  256/  10, alignment  5/ 5, N    8:	9.78125	6.76562	8.14062
Length  512/   1, alignment  6/ 4, N    8:	5.6875	7.15625	7.95312
Length  512/   1, alignment  4/ 6, N    8:	5.51562	7.35938	7.96875
Length  512/  10, alignment  6/ 6, N    8:	9.9375	7.70312	8.65625
Length 1024/   1, alignment  7/ 6, N    8:	5.67188	7.70312	7.85938
Length 1024/   1, alignment  6/ 7, N    8:	5.46875	6.84375	7.98438
Length 1024/  10, alignment  7/ 7, N    8:	9.9375	7.32812	8.57812
Length    2/   2, alignment  0/ 2, N   32:	3.92188	7.89062	9.14062
Length    4/   4, alignment  0/ 0, N   32:	5.65625	7.65625	9.01562
Length    4/   4, alignment  4/ 0, N   32:	5.375	7.79688	9.20312
Length    8/   8, alignment  0/ 0, N   32:	9.48438	7.45312	8.48438
Length    8/   8, alignment  0/ 0, N   32:	9.14062	7.15625	8.39062
Length   16/  16, alignment  0/ 0, N   32:	23.4844	10.2031	11.1562
Length   16/  16, alignment  7/ 2, N   32:	23.3594	9.875	10.625
Length   16/   4, alignment  0/ 0, N   32:	11.0938	9.54688	10.7969
Length   16/   4, alignment  7/ 2, N   32:	10.8594	8.95312	10.0312
Length   32/  32, alignment  0/ 0, N   32:	44.4219	11.4844	12.6719
Length   32/  32, alignment  6/ 4, N   32:	44.6406	12.0781	13.0625
Length   32/   8, alignment  0/ 0, N   32:	22.6406	10.5781	11.7188
Length   32/   8, alignment  6/ 4, N   32:	22.3281	11.4375	12.2344
Length   64/  64, alignment  0/ 0, N   32:	66.75	12.0469	13.3906
Length   64/  64, alignment  5/ 6, N   32:	67.0938	12.8281	13.0156
Length   64/  16, alignment  0/ 0, N   32:	32.8594	11.0938	11.9844
Length   64/  16, alignment  5/ 6, N   32:	33.1719	11.3281	12.1094
Length  128/ 128, alignment  0/ 0, N   32:	111.359	14.2656	17.5781
Length  128/ 128, alignment  4/ 0, N   32:	110.703	13.1562	13.4844
Length  128/  32, alignment  0/ 0, N   32:	44.5469	11.5156	12.5781
Length  128/  32, alignment  4/ 0, N   32:	44.2188	10.0312	11.2344
Length  256/ 256, alignment  0/ 0, N   32:	201.766	17.2656	17.8906
Length  256/ 256, alignment  3/ 2, N   32:	201.906	17.1562	17.875
Length  256/  64, alignment  0/ 0, N   32:	66.7812	12.2656	13.1562
Length  256/  64, alignment  3/ 2, N   32:	66.6875	12.3594	13.25
Length  512/ 512, alignment  0/ 0, N   32:	380.562	27.7031	28.5781
Length  512/ 512, alignment  2/ 4, N   32:	378.703	27.7031	28.8125
Length  512/ 128, alignment  0/ 0, N   32:	111.375	13.9688	18.5
Length  512/ 128, alignment  2/ 4, N   32:	111.844	13.7812	14.625
Length 1024/1024, alignment  0/ 0, N   32:	736.984	40.6875	41.9844
Length 1024/1024, alignment  1/ 6, N   32:	738.75	40.5625	41.8594
Length 1024/ 256, alignment  0/ 0, N   32:	200.766	17.0625	17.9219
Length 1024/ 256, alignment  1/ 6, N   32:	201.172	16.8125	17.6094
Length   16/   1, alignment  1/ 2, N   32:	10.2031	9.125	9.90625
Length   16/   1, alignment  2/ 1, N   32:	9.53125	8.92188	9.67188
Length   16/  10, alignment  1/ 1, N   32:	13.6094	9.10938	9.875
Length   32/   1, alignment  2/ 4, N   32:	19.6719	10.8281	12.375
Length   32/   1, alignment  4/ 2, N   32:	19.3438	10.8906	11.8125
Length   32/  10, alignment  2/ 2, N   32:	23.4844	11.0625	12.0625
Length   64/   1, alignment  3/ 6, N   32:	19.5938	10.4531	11.7188
Length   64/   1, alignment  6/ 3, N   32:	19.4062	10.3906	11.5625
Length   64/  10, alignment  3/ 3, N   32:	23.7031	11.0469	11.9531
Length  128/   1, alignment  4/ 0, N   32:	19.5781	10.375	11.5625
Length  128/   1, alignment  0/ 4, N   32:	19.3594	10.0469	11.0625
Length  128/  10, alignment  4/ 4, N   32:	23.5312	10.8125	11.6406
Length  256/   1, alignment  5/ 2, N   32:	19.5	10.4219	11.2812
Length  256/   1, alignment  2/ 5, N   32:	19.375	10.4531	11.4062
Length  256/  10, alignment  5/ 5, N   32:	23.4375	10.7344	11.4688
Length  512/   1, alignment  6/ 4, N   32:	19.5156	10.4844	11.4219
Length  512/   1, alignment  4/ 6, N   32:	19.3906	10.1875	11.3125
Length  512/  10, alignment  6/ 6, N   32:	23.5469	9.78125	10.7344
Length 1024/   1, alignment  7/ 6, N   32:	19.5625	10.75	11.375
Length 1024/   1, alignment  6/ 7, N   32:	19.5469	11.2969	11.9375
Length 1024/  10, alignment  7/ 7, N   32:	23.5	11.375	12.0938
Length    2/   2, alignment  0/ 2, N  128:	4.1875	8.42188	8.60938
Length    4/   4, alignment  0/ 0, N  128:	5.98438	7.92188	8.40625
Length    4/   4, alignment  4/ 0, N  128:	5.46875	7.85938	8.78125
Length    8/   8, alignment  0/ 0, N  128:	9.48438	8.28125	7.95312
Length    8/   8, alignment  0/ 0, N  128:	9.17188	7.67188	7.875
Length   16/  16, alignment  0/ 0, N  128:	23.2188	9.6875	10.5938
Length   16/  16, alignment  7/ 2, N  128:	23.1562	9.17188	10.2344
Length   16/   4, alignment  0/ 0, N  128:	11.2812	8.78125	10.2344
Length   16/   4, alignment  7/ 2, N  128:	10.7969	8.76562	9.42188
Length   32/  32, alignment  0/ 0, N  128:	44.375	11.5156	12.625
Length   32/  32, alignment  6/ 4, N  128:	44.2188	11.1719	12.0469
Length   32/   8, alignment  0/ 0, N  128:	22.2031	10.9219	12.0625
Length   32/   8, alignment  6/ 4, N  128:	21.8906	10.4531	11.4375
Length   64/  64, alignment  0/ 0, N  128:	80.7344	13.3125	14.75
Length   64/  64, alignment  5/ 6, N  128:	79.9844	13.5	13.9844
Length   64/  16, alignment  0/ 0, N  128:	46.6094	11.75	12.9219
Length   64/  16, alignment  5/ 6, N  128:	46.6406	12.3594	12.7969
Length  128/ 128, alignment  0/ 0, N  128:	150.109	17.3594	18.0781
Length  128/ 128, alignment  4/ 0, N  128:	150.172	16.3906	17.5625
Length  128/  32, alignment  0/ 0, N  128:	83.3281	14.3906	15.3594
Length  128/  32, alignment  4/ 0, N  128:	83.4531	14.2188	14.875
Length  256/ 256, alignment  0/ 0, N  128:	240.203	19.9531	20.6719
Length  256/ 256, alignment  3/ 2, N  128:	240.328	19.875	20.7344
Length  256/  64, alignment  0/ 0, N  128:	105.922	15.4375	16.2344
Length  256/  64, alignment  3/ 2, N  128:	105.594	15.6406	16.1875
Length  512/ 512, alignment  0/ 0, N  128:	418	30.6562	31.0156
Length  512/ 512, alignment  2/ 4, N  128:	418.844	30.2812	31.3281
Length  512/ 128, alignment  0/ 0, N  128:	150.031	16.8281	17.7812
Length  512/ 128, alignment  2/ 4, N  128:	150.047	16.4688	17.1875
Length 1024/1024, alignment  0/ 0, N  128:	777.078	43.3906	44.2656
Length 1024/1024, alignment  1/ 6, N  128:	777.531	43.4531	44.8281
Length 1024/ 256, alignment  0/ 0, N  128:	240.078	19.7188	20.5781
Length 1024/ 256, alignment  1/ 6, N  128:	240.484	19.3125	20.2344
Length   16/   1, alignment  1/ 2, N  128:	9.84375	9.03125	9.73438
Length   16/   1, alignment  2/ 1, N  128:	9.45312	8.59375	9.625
Length   16/  10, alignment  1/ 1, N  128:	13.5469	9	9.64062
Length   32/   1, alignment  2/ 4, N  128:	19.1094	10.6719	11.7031
Length   32/   1, alignment  4/ 2, N  128:	18.9219	10.5625	11.4062
Length   32/  10, alignment  2/ 2, N  128:	23.3906	10.4688	11.4062
Length   64/   1, alignment  3/ 6, N  128:	33.0312	12.3594	12.75
Length   64/   1, alignment  6/ 3, N  128:	32.7188	11.4844	12.0312
Length   64/  10, alignment  3/ 3, N  128:	37.4062	12.2656	12.5781
Length  128/   1, alignment  4/ 0, N  128:	58.0156	14.8281	15.4219
Length  128/   1, alignment  0/ 4, N  128:	57.9062	14.0938	14.6094
Length  128/  10, alignment  4/ 4, N  128:	62.6406	14.9688	15.2031
Length  256/   1, alignment  5/ 2, N  128:	57.8125	14.25	14.6094
Length  256/   1, alignment  2/ 5, N  128:	57.8594	13.7656	14.4062
Length  256/  10, alignment  5/ 5, N  128:	62.7656	14.1406	14.6562
Length  512/   1, alignment  6/ 4, N  128:	58.1719	13.9844	14.6094
Length  512/   1, alignment  4/ 6, N  128:	57.8438	14.1875	14.8906
Length  512/  10, alignment  6/ 6, N  128:	62.875	13.9688	14.4219
Length 1024/   1, alignment  7/ 6, N  128:	58.0312	12.3594	13.2031
Length 1024/   1, alignment  6/ 7, N  128:	58.0312	14.7344	14.6875
Length 1024/  10, alignment  7/ 7, N  128:	62.4375	15.5	15.6562
Length    2/   2, alignment  0/ 2, N  512:	3.95312	7.90625	8.59375
Length    4/   4, alignment  0/ 0, N  512:	5.65625	7.40625	8.45312
Length    4/   4, alignment  4/ 0, N  512:	5.48438	7.73438	8.73438
Length    8/   8, alignment  0/ 0, N  512:	9.42188	8.3125	8
Length    8/   8, alignment  0/ 0, N  512:	9.15625	7.48438	8.03125
Length   16/  16, alignment  0/ 0, N  512:	23.4062	9.64062	10.5156
Length   16/  16, alignment  7/ 2, N  512:	23.4688	9.07812	10
Length   16/   4, alignment  0/ 0, N  512:	11.1094	8.84375	10.0625
Length   16/   4, alignment  7/ 2, N  512:	10.8438	8.90625	9.54688
Length   32/  32, alignment  0/ 0, N  512:	44.1875	11.4688	12.2031
Length   32/  32, alignment  6/ 4, N  512:	44.4531	11.3281	11.9844
Length   32/   8, alignment  0/ 0, N  512:	22.125	10.7031	11.7344
Length   32/   8, alignment  6/ 4, N  512:	21.8906	10.1875	11.0781
Length   64/  64, alignment  0/ 0, N  512:	80.6719	12.9844	14.2969
Length   64/  64, alignment  5/ 6, N  512:	80.5781	12.7656	13.6562
Length   64/  16, alignment  0/ 0, N  512:	46.4219	12.0156	12.8281
Length   64/  16, alignment  5/ 6, N  512:	46.75	12.0156	12.75
Length  128/ 128, alignment  0/ 0, N  512:	150.109	17.4219	18.4844
Length  128/ 128, alignment  4/ 0, N  512:	151.125	16.5	17.6719
Length  128/  32, alignment  0/ 0, N  512:	83.5781	15.6406	16.0156
Length  128/  32, alignment  4/ 0, N  512:	83.7969	14.3281	15.1562
Length  256/ 256, alignment  0/ 0, N  512:	290.125	25.4062	26.0469
Length  256/ 256, alignment  3/ 2, N  512:	291.234	26.1719	26.7188
Length  256/  64, alignment  0/ 0, N  512:	156.594	20.4062	20.8594
Length  256/  64, alignment  3/ 2, N  512:	157.312	21.6094	22.2344
Length  512/ 512, alignment  0/ 0, N  512:	570.844	47.8594	49.2812
Length  512/ 512, alignment  2/ 4, N  512:	572.234	54.3438	55.6562
Length  512/ 128, alignment  0/ 0, N  512:	303.359	35.2188	35.5156
Length  512/ 128, alignment  2/ 4, N  512:	302.891	41.2344	42.3281
Length 1024/1024, alignment  0/ 0, N  512:	931	61.1719	62.25
Length 1024/1024, alignment  1/ 6, N  512:	929.234	67.125	68.5312
Length 1024/ 256, alignment  0/ 0, N  512:	393.234	38.2812	39.1562
Length 1024/ 256, alignment  1/ 6, N  512:	392.219	44.5781	45.2969
Length   16/   1, alignment  1/ 2, N  512:	9.46875	8.71875	9.53125
Length   16/   1, alignment  2/ 1, N  512:	9.07812	8.48438	9.3125
Length   16/  10, alignment  1/ 1, N  512:	13.1875	8.875	9.6875
Length   32/   1, alignment  2/ 4, N  512:	19.2188	10.2031	11.4688
Length   32/   1, alignment  4/ 2, N  512:	19.0938	10.0781	11.25
Length   32/  10, alignment  2/ 2, N  512:	23.6094	10.4062	11.4375
Length   64/   1, alignment  3/ 6, N  512:	31.6719	12.2188	12.5469
Length   64/   1, alignment  6/ 3, N  512:	31.6719	11.2031	12.2188
Length   64/  10, alignment  3/ 3, N  512:	36.2188	11.8438	12.5938
Length  128/   1, alignment  4/ 0, N  512:	57.1719	15.0781	15.6562
Length  128/   1, alignment  0/ 4, N  512:	57.375	14.4219	15.0625
Length  128/  10, alignment  4/ 4, N  512:	61.9062	14.9844	15.5469
Length  256/   1, alignment  5/ 2, N  512:	108.328	20.3906	20.8594
Length  256/   1, alignment  2/ 5, N  512:	108.219	20.1719	20.7344
Length  256/  10, alignment  5/ 5, N  512:	114.094	20.7969	21.2188
Length  512/   1, alignment  6/ 4, N  512:	210.562	37.3906	38.1875
Length  512/   1, alignment  4/ 6, N  512:	210.969	37.5312	38.2969
Length  512/  10, alignment  6/ 6, N  512:	218.734	37.625	37.75
Length 1024/   1, alignment  7/ 6, N  512:	210.406	30.5312	31.3594
Length 1024/   1, alignment  6/ 7, N  512:	211.438	39.0469	38.4531
Length 1024/  10, alignment  7/ 7, N  512:	219	39.875	38.8281
Length    2/   2, alignment  0/ 2, N 2048:	3.96875	7.79688	8.60938
Length    4/   4, alignment  0/ 0, N 2048:	5.6875	8.21875	8.5
Length    4/   4, alignment  4/ 0, N 2048:	5.45312	7.40625	8.71875
Length    8/   8, alignment  0/ 0, N 2048:	9.1875	7.39062	7.96875
Length    8/   8, alignment  0/ 0, N 2048:	8.92188	8.1875	8.15625
Length   16/  16, alignment  0/ 0, N 2048:	23.5	9.5625	10.4688
Length   16/  16, alignment  7/ 2, N 2048:	23.4219	9.09375	10.1094
Length   16/   4, alignment  0/ 0, N 2048:	10.6875	8.95312	10.0781
Length   16/   4, alignment  7/ 2, N 2048:	10.2969	8.82812	9.48438
Length   32/  32, alignment  0/ 0, N 2048:	44.5469	11.5938	12.1562
Length   32/  32, alignment  6/ 4, N 2048:	44.375	10.9375	11.8906
Length   32/   8, alignment  0/ 0, N 2048:	22.1094	10.7188	11.625
Length   32/   8, alignment  6/ 4, N 2048:	21.8906	10.1719	11.1406
Length   64/  64, alignment  0/ 0, N 2048:	79.7656	12.875	14.4375
Length   64/  64, alignment  5/ 6, N 2048:	79.5	12.7031	13.5469
Length   64/  16, alignment  0/ 0, N 2048:	45.5312	11.7812	13.2188
Length   64/  16, alignment  5/ 6, N 2048:	46.0312	12.3125	12.9531
Length  128/ 128, alignment  0/ 0, N 2048:	149.469	17.375	18.1562
Length  128/ 128, alignment  4/ 0, N 2048:	150.469	16.6094	17.5312
Length  128/  32, alignment  0/ 0, N 2048:	82.2812	15.1406	16.0156
Length  128/  32, alignment  4/ 0, N 2048:	82.3281	14.2188	15.2344
Length  256/ 256, alignment  0/ 0, N 2048:	290.656	25.3125	25.7031
Length  256/ 256, alignment  3/ 2, N 2048:	289.953	26.0781	26.625
Length  256/  64, alignment  0/ 0, N 2048:	156.328	20.1094	21
Length  256/  64, alignment  3/ 2, N 2048:	155.766	21.4062	22.1719
Length  512/ 512, alignment  0/ 0, N 2048:	571.766	48.6406	49.7656
Length  512/ 512, alignment  2/ 4, N 2048:	571.453	55.0625	56
Length  512/ 128, alignment  0/ 0, N 2048:	302.922	35.8281	36.625
Length  512/ 128, alignment  2/ 4, N 2048:	301.609	41.625	42.75
Length 1024/1024, alignment  0/ 0, N 2048:	1132.52	83.9219	85.2812
Length 1024/1024, alignment  1/ 6, N 2048:	1131.28	91.7656	92.875
Length 1024/ 256, alignment  0/ 0, N 2048:	595.938	59.9062	61.0938
Length 1024/ 256, alignment  1/ 6, N 2048:	596.031	69.25	69.7812
Length   16/   1, alignment  1/ 2, N 2048:	9.53125	8.70312	9.53125
Length   16/   1, alignment  2/ 1, N 2048:	9.125	8.4375	9.71875
Length   16/  10, alignment  1/ 1, N 2048:	13.2812	8.71875	9.625
Length   32/   1, alignment  2/ 4, N 2048:	19.1562	10.2812	11.4688
Length   32/   1, alignment  4/ 2, N 2048:	19.1406	10.3281	11.2188
Length   32/  10, alignment  2/ 2, N 2048:	23.3125	10.4688	11.5
Length   64/   1, alignment  3/ 6, N 2048:	32.7188	12.1094	12.4219
Length   64/   1, alignment  6/ 3, N 2048:	32.5625	11.1562	12.1406
Length   64/  10, alignment  3/ 3, N 2048:	37.625	11.9219	12.4688
Length  128/   1, alignment  4/ 0, N 2048:	57.9844	14.8281	15.6562
Length  128/   1, alignment  0/ 4, N 2048:	58.0781	14.3906	15.0938
Length  128/  10, alignment  4/ 4, N 2048:	63	14.9062	15.2812
Length  256/   1, alignment  5/ 2, N 2048:	109.391	20.0781	20.8281
Length  256/   1, alignment  2/ 5, N 2048:	108.938	19.9375	20.75
Length  256/  10, alignment  5/ 5, N 2048:	114.75	20.7656	20.9844
Length  512/   1, alignment  6/ 4, N 2048:	210.656	38.6562	39.0469
Length  512/   1, alignment  4/ 6, N 2048:	211.094	38.2656	39.0312
Length  512/  10, alignment  6/ 6, N 2048:	218.797	38.25	38.2969
Length 1024/   1, alignment  7/ 6, N 2048:	415.297	52.3438	52.9688
Length 1024/   1, alignment  6/ 7, N 2048:	416.781	63.4062	63.1562
Length 1024/  10, alignment  7/ 7, N 2048:	426.078	63.3281	63.4531
  

Comments

Ondrej Bilka Jan. 3, 2015, 10:07 a.m. UTC | #1
On Wed, Dec 24, 2014 at 10:17:17AM -0200, Adhemerval Zanella wrote:
> Hi
> 
> The commit 3eb38795dbbbd816 (Simplify strncat) implements a new 
> strncat algorithm that uses strlen, strnlen, and memcpy.  This is 
> faster than POWER7 current implementation, especially for unaligned 
> strings (where POWER7 code uses byte-byte operations).
> 
> This patch removes the assembly implementation and uses a multiarch
> especialization based on default algorithm calling optimized POWER7
> symbols. Benchtests results in attachments (ran on a POWER8 machine).
> 
> Tested on powerpc64 and powerpc64le.
> 
Should be ok, assembly implementation is lot slower in practice as its
rarely called at all and so is unlikely in cache.
  

Patch

diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.S
deleted file mode 100644
index ead4a9a..0000000
--- a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.S
+++ /dev/null
@@ -1,42 +0,0 @@ 
-/* Optimized strncat implementation for POWER7.
-   Copyright (C) 2014 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <sysdep.h>
-
-#undef EALIGN
-#define EALIGN(name, alignt, words)				\
-  .section ".text";						\
-  ENTRY_2(__strncat_power7)					\
-  .align ALIGNARG(alignt);					\
-  EALIGN_W_##words;						\
-  BODY_LABEL(__strncat_power7):					\
-  cfi_startproc;						\
-  LOCALENTRY(__strncat_power7)
-
-#undef END
-#define END(name)						\
-  cfi_endproc;							\
-  TRACEBACK(__strncat_power7)					\
-  END_2(__strncat_power7)
-
-#undef libc_hidden_builtin_def
-#define libc_hidden_builtin_def(name)
-
-#define STRLEN __strlen_power7
-
-#include <sysdeps/powerpc/powerpc64/power7/strncat.S>
diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c
new file mode 100644
index 0000000..fff28bf
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/multiarch/strncat-power7.c
@@ -0,0 +1,31 @@ 
+/* Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/ >.  */
+
+#include <string.h>
+
+#define STRNCAT __strncat_power7
+
+extern __typeof (strncat) __strncat_power7 attribute_hidden;
+extern __typeof (strlen) __strlen_power7 attribute_hidden;
+extern __typeof (strnlen) __strnlen_power7 attribute_hidden;
+extern __typeof (memcpy) __memcpy_power7 attribute_hidden;
+
+#define strlen    __strlen_power7
+#define __strnlen __strnlen_power7
+#define memcpy    __memcpy_power7
+
+#include <string/strncat.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/strncat.S b/sysdeps/powerpc/powerpc64/power7/strncat.S
deleted file mode 100644
index f5ea52d..0000000
--- a/sysdeps/powerpc/powerpc64/power7/strncat.S
+++ /dev/null
@@ -1,228 +0,0 @@ 
-/* Optimized strncat implementation for PowerPC64/POWER7.
-
-   Copyright (C) 2014 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, see
-   <http://www.gnu.org/licenses/>.  */
-
-/* The algorithm is as follows for aligned memory access :
-
-   if address of s2 is divisible by 0x7UL,
-       perform aligned doubleword catenation
-   else
-       perform unaligned catenation
-
-   The aligned comparison are made using cmpb instructions.  */
-
-/* char* [r3] strncat (const char *s1 [r3],
-                       const char *s2 [r4],
-                       size_t size [r5])  */
-
-#include <sysdep.h>
-
-#ifndef STRNCAT
-# undef strncat
-# define STRNCAT  strncat
-#endif
-
-#ifndef STRLEN
-/* For builds with no IFUNC support, local calls should be made to internal
-   GLIBC symbol (created by libc_hidden_builtin_def).  */
-# ifdef SHARED
-#  define STRLEN   __GI_strlen
-# else
-#  define STRLEN   strlen
-# endif
-#endif
-
-#define	FRAMESIZE	(FRAME_MIN_SIZE+32)
-
-	.machine  power7
-EALIGN(STRNCAT, 4, 0)
-	CALL_MCOUNT 3
-
-	mflr r0				/* Load link register LR to r0.  */
-
-/* We shall use r29, r30 and r31 non volatile register for retention.
-   Save all the callee registers in the GPR save area.  */
-	std r29, -24(r1)		/* Save callers register r29.  */
-	std r30, -16(r1)		/* Save callers register r30.  */
-	std r31, -8(r1)			/* Save callers register r31.  */
-
-	std r0, 16(r1)			/* Store the link register.  */
-	stdu r1, -FRAMESIZE(r1)		/* Create the stack frame.  */
-
-/* Improve performance with CPU pre-fetch.  */
-	dcbt 0, r3			/* Pre-fetch str to avoid cache
-					   miss.  */
-	dcbt 0, r4			/* Pre-fetch accept to avoid cache
-					   miss.  */
-
-	mr. r29, r5			/* Save "n" in r29.  */
-	mr r30, r3			/* Save "s1" in r30 from r3.  */
-	beq cr0,L(done)
-
-	mr r31, r4			/* Save "s2" in r31 from r4.  */
-	bl STRLEN			/* Call optimized strlen on s1; goto
-					   end of s1.  */
-	nop
-	cmpldi cr7, r29, 7		/* If s2 is <=7 process
-					    byte-by-byte.  */
-	add r3, r30, r3			/* Grab the last character of s1.  */
-	bgt cr7,L(alignment)		/* Process by aligned strings.  */
-
-	cmpldi cr7, r29, 3		/* If n is >= 4, we can
-					   byte-unroll.  */
-	addi r9, r3, -1			/* Make "s1" point before next
-					   character, increment when read.  */
-	bgt cr7, L(bytes_unroll)	/* Process each byte.  */
-
-L(byte_by_byte):
-	lbz r10, 0(r31)
-	addi r8, r9, 1
-	cmpdi cr7, r10, 0		/* Check for NULL in "s2".  */
-	stb r10, 1(r9)
-	beq cr7, L(done)
-	add r9, r9, r29
-	subf r9, r8, r9
-	addi r9, r9, 1
-	mtctr r9
-	b L(branch2)
-	.p2align 4
-L(branch1):
-	lbzu r10, 1(r31)
-	cmpdi cr7, r10, 0
-	stbu r10, 1(r8)
-	beq cr7,L(done)
-L(branch2):
-	mr r9, r8
-	bdnz L(branch1)
-	beq cr7,L(done)
-L(nullTerminate):
-	li r10, 0			/* Load NULL for termination.  */
-	stb r10, 1(r9)			/* Append or terminate s1 with
-					   NULL.  */
-	.p2align 4			/* A small section here.  */
-L(done):				/* We return now.   */
-	addi r1, r1, FRAMESIZE		/* Restore stack pointer.  */
-	mr r3, r30			/* Set the return value length of
-					   string.  */
-	ld r0, 16(r1)			/* Read the saved link register.  */
-	ld r29, -24(r1)			/* Restore save register r29.  */
-	ld r30, -16(r1)			/* Restore save register r30.  */
-	ld r31, -8(r1)			/* Restore save register r31.  */
-	mtlr r0				/* Restore link register.  */
-	blr				/* Branch to link register.  */
-
-	.p2align 4
-L(alignment):
-	rldicl. r9, r31, 0, 61		/* Check if s2 is 8byte aligned  */
-	beq cr0,L(dwordAligned)
-
-	.p2align 4
-/* Unaligned bytes in string, so process byte by byte.
-   POWER7 has performance gains over loop unroll.  */
-L(bytes_unroll):
-	addi r9, r3, -1
-	srdi r10, r29, 2
-	mtctr r10
-	b L(L10)
-	.p2align 4
-L(L44):
-	lbz r10, 1(r31)			/* Load byte.  */
-	cmpdi cr7, r10, 0		/* Compare ; if byte not zero,
-					   continue.  */
-	stb r10, 2(r9)			/* Store byte  */
-	beq cr7, L(done)
-	addi r31, r31, 4
-
-	lbz r10, -2(r31)		/* Perform loop unroll here on byte
-					   load and store.  */
-	cmpdi cr7, r10, 0
-	stb r10, 3(r9)
-	beq cr7, L(done)
-
-	lbz r10, -1(r31)		/* Loop unroll here.  */
-	cmpdi cr7, r10, 0
-	stbu r10, 4(r9)
-	beq cr7, L(done)
-
-	bdz L(leftNbytes)
-
-L(L10):
-	lbz r10, 0(r31)			/* Loop unroll here.  */
-	cmpdi cr7, r10, 0
-	stb r10, 1(r9)
-	bne cr7,L(L44)
-	b L(done)
-	.p2align 4
-/* If s2 is double word aligned, we load and store double word.  */
-L(dwordAligned):
-/* read, write 8 bytes at a time  */
-	srdi r8, r29, 3			/* Compute count for CTR to loop;
-					   count = n/8.  */
-	li r7, 0			/* Load r7 with NULL.  */
-	li r10, 0			/* Load r10 with MASK '0'.  */
-
-	mtctr r8			/* Move count to CTR.  */
-L(loop8):
-	ld r9, 0(r31)			/* Read double word from s2.  */
-	cmpb r6, r9, r10		/* Compare bytes in s2 we read
-					   just now.  */
-	cmpdi r6, 0			/* If cmpb returned NULL,
-					   we continue.  */
-	bne+ L(a8)
-	std r9, 0(r3)			/* Append double word from s2
-					   with s1.  */
-	addi r3, r3, 8			/* Increment s1.  */
-	addi r31, r31, 8		/* Increment s2.  */
-	subi r29, r29, 8		/* Decrement count by 8.  */
-	bdnz L(loop8)			/* Continue until "count" is
-					   non zero.  */
-
-L(a8):
-	cmpdi r29, 0			/* If "n" is already zero, we skip. */
-	beq+ L(align8align)
-
-	mtctr r29			/* Process left over bytes in "n".  */
-L(unaligned0):
-	lbz r9, 0(r31)			/* Read a byte from s2.  */
-	cmpw r9, r7			/* If byte is NULL, we stop here . */
-	beq+ L(align8align)		/* Skip processing further if NULL.  */
-	stb  r9, 0(r3)			/* If not NULL, store byte into s1.  */
-	addi r3, r3, 1			/* Increment s1 by 1.  */
-	addi r31, r31, 1		/* Increment s2 by 1.  */
-	bdnz L(unaligned0)		/* Decrement counter "n" and loop
-					   until non zero.  */
-L(align8align):
-	stb r7, 0(r3)			/* Terminate s1 with NULL.  */
-
-	addi r1, r1, FRAMESIZE		/* Restore stack pointer.  */
-	mr r3, r30			/* Set the return value, length of
-					   string.  */
-	ld r0, 16(r1)			/* Read the saved link register.  */
-	ld r29, -24(r1)			/* Restore save register r29.  */
-	ld r30, -16(r1)			/* Restore save register r30.  */
-	ld r31, -8(r1)			/* Restore save register r31.  */
-	mtlr r0				/* Restore link register.  */
-	blr				/* Branch to link register  */
-
-	.p2align 4
-L(leftNbytes):
-	rldicl. r29, r29, 0, 62		/* Check if n>0 and n < 4 bytes.  */
-	bne cr0,L(byte_by_byte)		/* Process bytes one by one. */
-	b L(nullTerminate)		/* Now, finish catenation with
-					   NULL termination.  */
-END(STRNCAT)