1#! /usr/local/bin/apl --script
2
3tunable parameters for this benchmark program
45  DO_PLOT0do/don't plot the results of start-up cost
6  ILRC←1000             ⍝ repeat count for the inner loop of start-up cost
7  LEN_PI←1000000        ⍝ vector length for measuring the per-item cost
8  PROFILE←4000 2000 50  ⍝ fractions of Integer, Real, and Complex numbers
9  CORES←3               ⍝ number of cores used for parallel execution
10  TIME_LIMIT←2000       ⍝ time limit per pass (milliseconds)
11
12)COPY 5 FILE_IO
13
14 ⍝ expressions to be benchmarked
1516∇Z←MON_EXPR
17  Z←⍬
18  ⍝     A          OP    B          N CN              STAT
19  ⍝-------------------------------------------------------
20  Z←Z,⊂ ""         "+"   "Mix_IRC"  1 "F12_PLUS"      35
21  Z←Z,⊂ ""         "-"   "Mix_IRC"  1 "F12_MINUS"     35
22  Z←Z,⊂ ""         "×"   "Mix_IRC"  1 "F12_TIMES"     35
23  Z←Z,⊂ ""         "÷"   "Mix1_IRC" 1 "F12_DIVIDE"    35
24  Z←Z,⊂ ""         "∼"   "Bool"     1 "F12_WITHOUT"   35
25  Z←Z,⊂ ""         "⌈"   "Mix_IR"   1 "F12_RND_UP"    35
26  Z←Z,⊂ ""         "⌊"   "Mix_IR"   1 "F12_RND_DN"    35
27  Z←Z,⊂ ""         "!"   "Int2"     1 "F12_BINOM"     35
28  Z←Z,⊂ ""         "⋆"   "Mix_IRC"  1 "F12_POWER"     35
29  Z←Z,⊂ ""         "⍟"   "Mix1_IRC" 1 "F12_LOGA"      35
30  Z←Z,⊂ ""         "○"   "Mix_IRC"  1 "F12_CIRCLE"    35
31  Z←Z,⊂ ""         "∣"   "Mix_IR"   1 "F12_STILE"     35
32  Z←Z,⊂ ""         "?"   "Int2"     1 "F12_ROLL"      35
3334
35∇Z←DYA_EXPR
36  Z←⍬
37  ⍝     A          OP    B          N CN              STAT
38  ⍝-------------------------------------------------------
39  Z←Z,⊂ "Mix_IRC"  "+"   "Mix1_IRC" 2 "F12_PLUS"      36
40  Z←Z,⊂ "Mix_IRC"  "-"   "Mix1_IRC" 2 "F12_MINUS"     36
41  Z←Z,⊂ "Mix_IRC"  "×"   "Mix1_IRC" 2 "F12_TIMES"     36
42  Z←Z,⊂ "Mix1_IRC" "÷"   "Mix1_IRC" 2 "F12_DIVIDE"    36
43  Z←Z,⊂ "Bool"     "∧"   "Bool1"    2 "F2_AND"        36
44  Z←Z,⊂ "Bool"     "∨"   "Bool1"    2 "F2_OR"         36
45  Z←Z,⊂ "Bool"     "⍲"   "Bool1"    2 "F2_NAND"       36
46  Z←Z,⊂ "Bool"     "⍱"   "Bool1"    2 "F2_NOR"        36
47  Z←Z,⊂ "Mix_IR"   "⌈"   "Mix_IR"   2 "F12_RND_UP"    36
48  Z←Z,⊂ "Mix_IR"   "⌊"   "Mix_IR"   2 "F12_RND_DN"    36
49  Z←Z,⊂ "Mix_IRC"  "!"   "Mix_IRC"  2 "F12_BINOM"     36
50  Z←Z,⊂ "Mix_IRC"  "⋆"   "Mix_IRC"  2 "F12_POWER"     36
51  Z←Z,⊂ "Mix1_IRC" "⍟"   "Mix1_IRC" 2 "F12_LOGA"      36
52  Z←Z,⊂ "Mix_IR "  "<"   "Mix_IR"   2 "F2_LESS"       36
53  Z←Z,⊂ "Mix_IR "  "≤"   "Mix_IR"   2 "F2_LEQ"        36
54  Z←Z,⊂ "Mix_IRC"  "="   "Mix_IRC"  2 "F2_EQUAL"      36
55  Z←Z,⊂ "Mix_IRC"  "≠"   "Mix_IRC"  2 "F2_UNEQ"       36
56  Z←Z,⊂ "Mix_IR"   ">"   "Mix_IR"   2 "F2_GREATER"    36
57  Z←Z,⊂ "Mix_IR"   "≥"   "Mix_IR"   2 "F2_MEQ"        36
58  Z←Z,⊂ "1"        "○"   "Mix_IRC"  2 "F12_CIRCLE"    36
59  Z←Z,⊂ "Mix_IRC"  "∣"   "Mix_IRC"  2 "F12_STILE"     36
60  Z←Z,⊂ "1 2 3"    "⋸"   "Int"      2 "F12_FIND"      36
61  Z←Z,⊂ "Mat1_IRC" "+.×" "Mat1_IRC" 3 "OPER2_INNER"   38
62  Z←Z,⊂ "Vec1_IRC" "∘.×" "Vec1_IRC" 3 "OPER2_OUTER"   39
6364
65∇INIT_DATA LEN;N;Ilen;Rlen;Clen
66  ⍝⍝
67  ⍝⍝ setup variables used in benchmark expressions:
68  ⍝⍝ Int:  ¯2 ... 9
69  ⍝⍝ Int1: nonzero Int
70  ⍝⍝ Real: ¯10 to 10 or so
71  ⍝⍝
72  (Ilen Rlen Clen)←PROFILE
73  Int  ← 10 - ? Ilen ⍴ 12
74  Int1 ← Ilen ⍴ (Int≠0)/Int
75  Int2 ← Ilen ⍴ (Int>0) / Int
76  Bool ← 2 ∣ Int
77  Bool1← 1 ⌽ Bool
78  Real ← Rlen ⍴ Int + 3 ÷ ○1
79  Real1← Rlen ⍴ (Real≠0)/Real
80  Real2← Rlen ⍴ (Real>0)/Real
81  Comp ← Clen ⍴ Real + 0J1×1⌽Real
82  Comp1← Clen ⍴ (Comp≠0)/Comp
83
84  Mix_IR   ←Int,Real         ◊ Mix_IR   [N?N←⍴Mix_IR  ] ← Mix_IR
85  Mix_IRC  ←Int,Real,Comp    ◊ Mix_IRC  [N?N←⍴Mix_IRC ] ← Mix_IRC
86  Mix1_IRC ←Int1,Real1,Comp1 ◊ Mix1_IRC [N?N←⍴Mix1_IRC] ← Mix1_IRC
87
88  Int      ← LEN ⍴ Int
89  Int1     ← LEN ⍴ Int1
90  Int2     ← LEN ⍴ Int2
91  Bool     ← LEN ⍴ Bool
92  Bool1    ← LEN ⍴ Bool1
93  Real     ← LEN ⍴ Real
94  Real1    ← LEN ⍴ Real1
95  Real2    ← LEN ⍴ Real2
96  Comp     ← LEN ⍴ Comp
97  Comp1    ← LEN ⍴ Comp1
98  Mix_IR   ← LEN ⍴ Mix_IR
99  Mix_IRC  ← LEN ⍴ Mix_IRC
100  Mix1_IRC ← LEN ⍴ Mix1_IRC
101  Mat1_IRC ← (2⍴⌈LEN⋆0.35)⍴Mix1_IRC
102  Vec1_IRC ← (⌈LEN⋆0.5)⍴Mix1_IRC
103104
105'libaplplot' ⎕FX  'PLOT'
106
107∇EXPR PLOT_P DATA;PLOTARG
108  ⍝⍝
109  ⍝⍝ plot data if enabled by DO_PLOT
110  ⍝⍝
111 →DO_PLOT↓0
112  PLOTARG←'xcol 0;'
113  PLOTARG←PLOTARG,'xlabel "result length";'
114  PLOTARG←PLOTARG,'ylabel "CPU cycles";'
115  PLOTARG←PLOTARG,'draw l;'
116  PLOTARG←PLOTARG,'plwindow ' , TITLE EXPR
117  ⊣ PLOTARG PLOT DATA
118119120
121∇Z←Average[X] B
122 ⍝⍝ return the average of B along axis X
123 Z←(+/[X]B) ÷ (⍴B)[X]
124125
126∇Z←TITLE EXPR;A;OP;B
127  (A OP B)←3↑EXPR
128  Z←OP, ' ', B
129  →(0=⍴A)/0
130  Z←A,' ',Z
131132
133∇Z←TITLE1 EXPR;A;OP;B;Z1
134  (A OP B)←3↑EXPR
135  Z←OP, ' B"'    ◊ Z1←'"'
136  →(0=⍴A)/1+↑⎕LC ◊ Z1←'"A '
137  Z←Z1,Z
138139
140∇Z←X LSQRL Y;N;XY;XX;Zb;Za;SX;SXX;SY;SXY
141 ⍝⍝ return the least square regression line (a line a + b×N with minimal
142 ⍝⍝ distance from samples Y(X))
143 N←⍴X
144 XY←X×Y ◊ XX←X×X
145 SX←+/X ◊ SY←+/Y ◊ SXY←+/XY ◊ SXX←+/XX
146 Zb←( (N×SXY) - SX×SY ) ÷ ((N×SXX) - SX×SX)
147 Za←(SY - Zb×SX) ÷ N
148 Z←Za, Zb
149150
151  ⍝ ----------------------------------------------------
152  ⍝ Run one pass (one length), return average cycles
153154∇Z←ONE_PASS EXPR;OP;STAT;I;ZZ;TH1;TH2;CYCLES;T0;T1
155  OP←⊃EXPR[2]
156  STAT←EXPR[6]
157  TH1← 1 FIO∆set_monadic_threshold OP
158  TH2← 1 FIO∆set_dyadic_threshold  OP
159
160  I←0
161  ZZ←⍬
162  T0←24 60 60 1000⊥¯4↑⎕TS
163L:
164  FIO∆clear_statistics STAT
165  Q←⍎TITLE EXPR
166  CYCLES←(FIO∆get_statistics STAT)[4]
167  ZZ←ZZ,CYCLES
168  T1←24 60 60 1000⊥¯4↑⎕TS
169  →((I≥2) ∧ TIME_LIMIT<T1-T0)⍴DONE   ⍝ don't let it run too long
170  →(ILRC≥I←I+1)/L
171DONE:
172
173  ⍝ restore thresholds
174  ⊣ TH1 FIO∆set_monadic_threshold OP
175  ⊣ TH2 FIO∆set_dyadic_threshold  OP
176
177  ⍝ ignore the first 2 measurements as cache warm-up
178179  ZZ←2↓ZZ
180⍝ Z←(⍴,Q), ⌊ Average[1]ZZ
181  Z←(⍴,Q), ⌊ ⌊⌿ZZ
182183
184  ⍝ ----------------------------------------------------
185  ⍝ figure start-up times for sequential and parallel execution.
186  ⍝ We use small vector sizes for better precision
187188∇Z←FIGURE_A EXPR;LENGTHS;I;LEN;ZS;ZP;SA;SB;PA;PB;H1;H2;P;TXT
189  TXT←78↑'  ===================== ', (TITLE EXPR), '  ', 80⍴ '='
190  '' ◊ TXT ◊ ''
191  Z←0 3⍴0
192  LL←⍴LENGTHS←⌽⍳20 ⍝ outer loop vector lengths
193  'Benchmarking start-up cost for ', (TITLE EXPR), ' ...'
194
195  I←1 ◊ ZS←0 2⍴0
196  ⎕SYL[26;2] ← 0   ⍝ sequential
197LS: INIT_DATA LEN←LENGTHS[I]
198  ZS←ZS⍪ONE_PASS EXPR
199  →(LL≥I←I+1)⍴LS
200
201  I←1 ◊ ZP←0 2⍴0
202  ⎕SYL[26;2] ← CORES   ⍝ parallel
203LP: INIT_DATA LEN←LENGTHS[I]
204  ZP←ZP⍪ONE_PASS EXPR
205  →(LL≥I←I+1)⍴LP
206
207  (SA SB)←⌊ ZS[;1] LSQRL ZS[;2]
208  (PA PB)←⌊ ZP[;1] LSQRL ZP[;2]
209
210  ⍝ print and plot result
211212  P←ZS,ZP[;2]            ⍝ sequential and parallel cycles
213  P←P,(SA+ZS[;1]×SB)     ⍝ sequential least square regression line
214  P←P,(PA+ZP[;1]×PB)     ⍝ parallel least square regression line
215  H1←'Length' '  Sequ Cycles' '  Para Cycles' '  Linear Sequ' 'Linear Para'
216  H2←'======' '  ===========' '  ===========' '  ===========' '==========='
217  H1⍪H2⍪P
218
219  ''
220  'regression line sequential:     ', (¯8↑⍕SA), ' + ', (⍕SB),'×N cycles'
221  'regression line parallel:       ', (¯8↑⍕PA), ' + ', (⍕PB),'×N cycles'
222
223  ⍝ xdomain of aplplot seems not to work for xy plots - create a dummy x=0 line
224  P←(0, SA, PA, SA, PA)⍪P
225
226  EXPR PLOT_P P
227  Z←SA,PA
228229
230  ⍝ ----------------------------------------------------
231  ⍝ figure per-item times for sequential and parallel execution.
232  ⍝ We use one LARGE vector
233234∇Z←SUP_A FIGURE_B EXPR;SOFF;POFF;SCYC;PCYC;LEN
235  (SOFF POFF)←SUP_A
236  'Benchmarking per-item cost for ', (TITLE EXPR), ' ...'
237  SUMMARY←SUMMARY,⊂'-------------- ', (TITLE EXPR), ' -------------- '
238  SUMMARY←SUMMARY,⊂'average sequential startup cost:', (¯8↑⍕⌈SOFF), ' cycles'
239  SUMMARY←SUMMARY,⊂'average parallel startup cost:  ', (¯8↑⍕⌈POFF), ' cycles'
240
241  INIT_DATA LEN_PI
242  ⎕SYL[26;2] ← 0   ⍝ sequential
243  (LEN SCYC)←ONE_PASS EXPR
244  ⎕SYL[26;2] ← CORES   ⍝ parallel
245  (LEN PCYC)←ONE_PASS EXPR
246  Z←⊂TITLE EXPR
247  Z←Z, ⌈ (SCYC - SOFF) ÷ LEN
248  Z←Z, ⌈ (PCYC - POFF) ÷ LEN
249  TS←'per item cost sequential:       ',(¯8↑⍕Z[2]), ' cycles'
250  TP←'per item cost parallel:         ',(¯8↑⍕Z[3]), ' cycles'
251  SUMMARY←SUMMARY,(⊂TS),(⊂TP)
252
253  SUP_A BREAK_EVEN (⊂EXPR),Z
254255
256∇SUP BREAK_EVEN PERI;EXPR;OP;ICS;ICP;SUPS;SUPP;T1;T2;BE;OUT
257  (SUPS SUPP)←SUP   ⍝ start-up cost
258  (EXPR OP ICS ICP)←PERI ⍝ per-item cost
259  T1←'parallel break-even length:     '
260  T2←'     not reached' ◊ T3←'8888888888888888888ULL'
261  →(ICP ≥ ICS)⍴1+↑⎕LC ◊ T2←¯8↑BE←⍕⌈ (SUPP - SUPS) ÷ ICS - ICP ◊ T3←21↑BE
262  SUMMARY←SUMMARY,(⊂T1,T2),⊂''
263
264  OUT←'perfo_',(⍕EXPR[4])
265  OUT←OUT, 16↑'(',(⊃EXPR[5]),','
266  OUT←OUT, 6↑'_',((-1+0<⍴⊃EXPR[1])↑'AB'),','
267  OUT←OUT, 10↑(TITLE1 EXPR),','
268  OUT←OUT, T3,')',⎕UCS ,10
269  ⊣ OUT FIO∆fwrite_utf8 TH_FILE
270271
272  ⍝ ----------------------------------------------------
273
274∇GO;DYA_A;MON_A;SUMMARY;TH_FILE
275  CORES←CORES ⌊ ⎕SYL[25;2]
276  'Running ScalarBenchmark_2 with' CORES 'cores...'
277
278  ⍝ check that the core count can be set
279280  ⎕SYL[26;2] ← CORES
281  →(CORES = ⎕SYL[26;2])⍴CORES_OK
282  '*** CPU core count could not be set!'
283  '*** This is usually a configuration or platform problem.'
284  '***'
285  '***  try "make parallel1" in the top-level directory'
286  '***'
287  '*** the relevant ./configure options (used by make parallel1) are:'
288  '***      PERFORMANCE_COUNTERS_WANTED=yes'
289  '***      CORE_COUNT_WANTED=SYL'
290  '***'
291  '*** NOTE: parallel GNU APL currently requires linux and a recent Intel CPU'
292  '***'
293  →0
294
295CORES_OK:
296  ⎕SYL[26;2] ← 0
297
298  ⍝ figure start-up costs
299300  MON_A←Average[1] ⊃ FIGURE_A ¨ MON_EXPR
301  DYA_A←Average[1] ⊃ FIGURE_A ¨ DYA_EXPR
302
303  ⍝ figure per-item costs. we can do that only after computing MON_A/DYA_A
304305  ''
306  SUMMARY←0⍴''
307  TH_FILE←"w" FIO∆fopen "parallel_thresholds"
308
309  ⊣ "\n" FIO∆fwrite_utf8 TH_FILE
310
311  ⊣ (⊂MON_A) FIGURE_B ¨ MON_EXPR
312
313  ⊣ "\n" FIO∆fwrite_utf8 TH_FILE
314
315  ⊣ (⊂DYA_A) FIGURE_B ¨ DYA_EXPR
316
317  ⊣ "\n" FIO∆fwrite_utf8 TH_FILE
318  ⊣ "#undef perfo_1\n" FIO∆fwrite_utf8 TH_FILE
319"#undef perfo_2\n" FIOfwrite_utf8 TH_FILE
320"#undef perfo_3\n" FIOfwrite_utf8 TH_FILE
321"\n" FIOfwrite_utf8 TH_FILE
322
323FIOfclose TH_FILE
324
325 ''
326 78' ============================  SUMMARY  ',80'='
327 ''
328  ⊣ { ⎕←⍵ }¨SUMMARY
329330
331
332  GO
333
334  ]PSTAT
335  )OFF
336
337