1#!/bin/bash
2
3# Script to measure memset and memcpy for different sizes and strategies.
4#
5# Contributed by Jan Hubicka <jh@suse.cz>
6#
7# Copyright (C) 2019 Free Software Foundation, Inc.
8#
9# This file is part of GCC.
10#
11# GCC is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 3, or (at your option)
14# any later version.
15#
16# GCC is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with GCC; see the file COPYING.  If not, write to
23# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
24# Boston, MA 02110-1301, USA.
25
26# This script will search a line starting with 'spawn' that includes the
27# pattern you are looking for (typically a source file name).
28#
29# Once it finds that pattern, it re-executes the whole command
30# in the spawn line.  If the pattern matches more than one spawn
31# command, it asks which one you want.
32
33test()
34{
35rm -f a.out
36cat <<END | $1 -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize -
37#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2)
38/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/
39$type t[BUFFER_SIZE];
40int main()
41{
42  unsigned int i;
43  for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++)
44#ifdef test_memset
45    __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
46#else
47    __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
48#endif
49  return 0;
50}
51END
52TIME=`/usr/bin/time -f "%E" ./a.out 2>&1`
53echo -n " "$TIME
54echo $TIME $4 >>/tmp/accum
55}
56
57test2()
58{
59rm -f a.out
60cat <<END | clang -x c -O3 $3 -DAVG_SIZE=$2 $STRINGOP -DMEMORY_COPIES=$memsize 2>/dev/null -
61#define BUFFER_SIZE (16*1024*1024 + AVG_SIZE*2)
62/*#define MEMORY_COPIES (1024*1024*64*(long long)10)*/
63$type t[BUFFER_SIZE];
64int main()
65{
66  unsigned int i;
67  for (i=0;i<((long long)MEMORY_COPIES + AVG_SIZE * 2 - 1)/AVG_SIZE*2;i++)
68#ifdef test_memset
69    __builtin_memset (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), i, (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
70#else
71    __builtin_memcpy (t+(i*1024*1024+i*1)%(BUFFER_SIZE - AVG_SIZE*2), t+((i+1)*1024*1024*4+i*1)%(BUFFER_SIZE - AVG_SIZE *2), (AVG_SIZE + i) % (AVG_SIZE * 2 + 0));
72#endif
73  return 0;
74}
75END
76TIME=`/usr/bin/time -f "%E" ./a.out 2>&1`
77echo -n " "$TIME
78echo $TIME $4 >>/tmp/accum
79}
80
81testrow()
82{
83echo -n "" >/tmp/accum
84printf "%12i " $3
85test "$2" "$3" "-mstringop-strategy=libcall" libcall
86test "$2" "$3" "-mstringop-strategy=rep_byte -malign-stringops" rep1
87test "$2" "$3" "-mstringop-strategy=rep_byte -mno-align-stringops" rep1noalign
88test "$2" "$3" "-mstringop-strategy=rep_4byte -malign-stringops" rep4
89test "$2" "$3" "-mstringop-strategy=rep_4byte -mno-align-stringops" rep4noalign
90if [ "$mode" == 64 ]
91then
92test "$2" "$3" "-mstringop-strategy=rep_8byte -malign-stringops" rep8
93test "$2" "$3" "-mstringop-strategy=rep_8byte -mno-align-stringops" rep8noalign
94fi
95test "$2" "$3" "-mstringop-strategy=loop -malign-stringops"  loop
96test "$2" "$3" "-mstringop-strategy=loop -mno-align-stringops"  loopnoalign
97test "$2" "$3" "-mstringop-strategy=unrolled_loop -malign-stringops" unrl
98test "$2" "$3" "-mstringop-strategy=unrolled_loop -mno-align-stringops" unrlnoalign
99test "$2" "$3" "-mstringop-strategy=vector_loop -malign-stringops" sse
100test "$2" "$3" "-mstringop-strategy=vector_loop -mno-align-stringops -msse2" ssenoalign
101#test2 "$2" "$3" ""
102test "$2" "$3" "-mstringop-strategy=byte_loop" byte
103best=`cat /tmp/accum | sort | head -1`
104test "$2" "$3" " -fprofile-generate" >/dev/null 2>&1
105test "$2" "$3" " -fprofile-use"
106test "$2" "$3" " -minline-stringops-dynamically"
107echo "    $best"
108}
109
110test_all_sizes()
111{
112if [ "$mode" == 64 ]
113then
114echo "  block size  libcall rep1    noalg   rep4    noalg   rep8    noalg   loop    noalg   unrl    noalg   sse     noalg   byte    PGO     dynamic    BEST"
115else
116echo "  block size  libcall rep1    noalg   rep4    noalg   loop    noalg   unrl    noalg   sse     noalg   byte    PGO     dynamic    BEST"
117fi
118#for size in 1 2 3 4 6 8 10 12 14 16 24 32 48 64 128 256 512 1024 4096 8192 81920 819200 8192000
119#for size in 8192000 819200 81920 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 5 4 3 2 1
120for size in 8192000 819200 81920 20480 8192 4096 2048 1024 512 256 128 64 48 32 24 16 14 12 10 8 6 4 1
121#for size in 128 256 1024 4096 8192 81920 819200
122do
123testrow "$1" "$2" $size
124done
125}
126
127mode=$1
128shift
129export memsize=$1
130shift
131cmdline=$*
132if [ "$mode" != 32 ]
133then
134  if [ "$mode" != 64 ]
135  then
136    echo "Usage:"
137    echo "test_stringop mode size cmdline"
138    echo "mode is either 32 or 64"
139    echo "size is amount of memory copied in each test.  Should be chosed small enough so runtime is less than minute for each test and sorting works"
140    echo "Example: test_stringop 32 640000000 ./xgcc -B ./ -march=pentium3"
141    exit
142  fi
143fi
144
145echo "memcpy"
146export STRINGOP=""
147type=char
148test_all_sizes $mode "$cmdline -m$mode"
149echo "Aligned"
150type=long
151test_all_sizes $mode "$cmdline -m$mode"
152echo "memset"
153export STRINGOP="-Dtest_memset=1"
154type=char
155test_all_sizes $mode "$cmdline -m$mode"
156echo "Aligned"
157type=long
158test_all_sizes $mode "$cmdline -m$mode"
159