1 /************************************************************************
2 ************************************************************************
3 FAUST compiler
4 Copyright (C) 2003-2018 GRAME, Centre National de Creation Musicale
5 ---------------------------------------------------------------------
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 ************************************************************************
20 ************************************************************************/
21
22 /**********************************************************************
23 - klass.cpp : class C++ to be filled (FAUST project) -
24
25 History :
26 -----------
27 17-10-2001 : initial implementation (yo)
28 18-10-2001 : add getFreshID (yo)
29 02-11-2001 : add sub-classes (yo)
30 06-11-2001 : change classers impression (yo)
31
32 ***********************************************************************/
33
34 #include <stdio.h>
35 #include <iostream>
36 #include <list>
37 #include <map>
38 #include <sstream>
39 #include <string>
40
41 #include "Text.hh"
42 #include "floats.hh"
43 #include "klass.hh"
44 #include "ppsig.hh"
45 #include "recursivness.hh"
46 #include "signals.hh"
47 #include "uitree.hh"
48
49 static int gTaskCount = 0;
50
51 bool Klass::fNeedPowerDef = false;
52
53 /**
54 * Store the loop used to compute a signal
55 */
setLoopProperty(Tree sig,Loop * l)56 void Klass::setLoopProperty(Tree sig, Loop* l)
57 {
58 fLoopProperty.set(sig, l);
59 }
60
61 /**
62 * Returns the loop used to compute a signal
63 */
getLoopProperty(Tree sig,Loop * & l)64 bool Klass::getLoopProperty(Tree sig, Loop*& l)
65 {
66 return fLoopProperty.get(sig, l);
67 }
68
69 /**
70 * Open a non-recursive loop on top of the stack of open loops.
71 * @param size the number of iterations of the loop
72 */
openLoop(const string & size)73 void Klass::openLoop(const string& size)
74 {
75 fTopLoop = new Loop(fTopLoop, size);
76 // cerr << "\nOPEN SHARED LOOP(" << size << ") ----> " << fTopLoop << endl;
77 }
78
79 /**
80 * Open a recursive loop on top of the stack of open loops.
81 * @param recsymbol the recursive symbol defined in this loop
82 * @param size the number of iterations of the loop
83 */
openLoop(Tree recsymbol,const string & size)84 void Klass::openLoop(Tree recsymbol, const string& size)
85 {
86 fTopLoop = new Loop(recsymbol, fTopLoop, size);
87 // cerr << "\nOPEN REC LOOP(" << *recsymbol << ", " << size << ") ----> " << fTopLoop << endl;
88 }
89
listAllLoopProperties(Tree sig,set<Loop * > & L,set<Tree> & visited)90 void Klass::listAllLoopProperties(Tree sig, set<Loop*>& L, set<Tree>& visited)
91 {
92 if (visited.count(sig)==0) {
93 visited.insert(sig);
94 Loop* l;
95 if (getLoopProperty(sig, l)) {
96 L.insert(l);
97 } else {
98 // we go down the expression
99 vector<Tree> subsigs;
100 int n = getSubSignals(sig, subsigs, false);
101 for (int i = 0; i < n; i++) {
102 listAllLoopProperties(subsigs[i], L, visited);
103 }
104 }
105 }
106 }
107
108 /**
109 * Close the top loop and either keep it
110 * or absorb it within its enclosing loop.
111 */
closeLoop(Tree sig)112 void Klass::closeLoop(Tree sig)
113 {
114 faustassert(fTopLoop);
115
116 // fix the missing dependencies
117 set<Loop*> L;
118 set<Tree> V;
119 listAllLoopProperties(sig, L, V);
120 for (Loop* l : L) {
121 fTopLoop->fBackwardLoopDependencies.insert(l);
122 }
123
124 Loop* l = fTopLoop;
125 fTopLoop = l->fEnclosingLoop;
126 faustassert(fTopLoop);
127
128 // l->println(4, cerr);
129 // cerr << endl;
130
131 Tree S = symlist(sig);
132 // cerr << "CLOSE LOOP :" << l << " with symbols " << *S << endl;
133 if (l->isEmpty() || fTopLoop->hasRecDependencyIn(S)) {
134 // cout << " will absorb" << endl;
135 // empty or dependent loop -> absorbed by enclosing one
136 // cerr << "absorbed by : " << fTopLoop << endl;
137 fTopLoop->absorb(l);
138 // delete l; HACK !!!
139 } else {
140 // cout << " will NOT absorb" << endl;
141 // we have an independent loop
142 setLoopProperty(sig, l); // associate the signal
143 fTopLoop->fBackwardLoopDependencies.insert(l);
144 // we need to indicate that all recursive symbols defined
145 // in this loop are defined in this loop
146 for (Tree lsym = l->fRecSymbolSet; !isNil(lsym); lsym = tl(lsym)) {
147 this->setLoopProperty(hd(lsym), l);
148 // cerr << "loop " << l << " defines " << *hd(lsym) << endl;
149 }
150 }
151 // cerr << "\n" << endl;
152 }
153
154 /**
155 * Print a list of elements (e1, e2,...)
156 */
printdecllist(int n,const string & decl,list<string> & content,ostream & fout)157 void printdecllist(int n, const string& decl, list<string>& content, ostream& fout)
158 {
159 if (!content.empty()) {
160 fout << "\\";
161 tab(n, fout);
162 fout << decl;
163 string sep = "(";
164 for (const auto& s : content) {
165 fout << sep << s;
166 sep = ", ";
167 }
168 fout << ')';
169 }
170 }
171
172 /**
173 * Print the required C++ libraries as comments in source code
174 */
printLibrary(ostream & fout)175 void Klass::printLibrary(ostream& fout)
176 {
177 set<string> S;
178 set<string>::iterator f;
179
180 string sep;
181 collectLibrary(S);
182 fout << "/* link with ";
183 for (f = S.begin(), sep = ": "; f != S.end(); f++, sep = ", ") {
184 fout << sep << *f;
185 }
186 fout << " */\n";
187 }
188
189 /**
190 * Print the required include files
191 */
printIncludeFile(ostream & fout)192 void Klass::printIncludeFile(ostream& fout)
193 {
194 if (gGlobal->gOpenMPSwitch) {
195 fout << "#include <omp.h>\n";
196 }
197
198 set<string> S;
199 collectIncludeFile(S);
200 for (const auto& f : S) {
201 string inc = f;
202 // Only print non-empty include (inc is actually quoted)
203 if (inc.size() > 2) {
204 fout << "#include " << f << "\n";
205 }
206 }
207 }
208
209 /**
210 * Print additional functions required by the generated code
211 */
printAdditionalCode(ostream & fout)212 void Klass::printAdditionalCode(ostream& fout)
213 {
214 if (fNeedPowerDef) {
215 // Add faustpower definition to C++ code
216 fout << "#ifndef FAUSTPOWER" << endl;
217 fout << "#define FAUSTPOWER" << endl;
218 fout << "#include <cmath>" << endl;
219
220 fout << "template <int N> inline int faustpower(int x) { return faustpower<N/2>(x) * "
221 "faustpower<N-N/2>(x); } "
222 << endl;
223 fout << "template <> inline int faustpower<0>(int x) { return 1; }" << endl;
224 fout << "template <> inline int faustpower<1>(int x) { return x; }" << endl;
225 fout << "template <> inline int faustpower<2>(int x) { return x*x; }" << endl;
226
227 if (gGlobal->gFloatSize == 1) {
228 fout << "template <int N> inline float faustpower(float x) { return faustpower<N/2>(x) * "
229 "faustpower<N-N/2>(x); } "
230 << endl;
231 fout << "template <> inline float faustpower<0>(float x) { return 1; }" << endl;
232 fout << "template <> inline float faustpower<1>(float x) { return x; }" << endl;
233 fout << "template <> inline float faustpower<2>(float x) { return x*x; }" << endl;
234
235 } else if (gGlobal->gFloatSize == 2) {
236 fout << "template <int N> inline double faustpower(double x) { return faustpower<N/2>(x) * "
237 "faustpower<N-N/2>(x); } "
238 << endl;
239 fout << "template <> inline double faustpower<0>(double x) { return 1; }" << endl;
240 fout << "template <> inline double faustpower<1>(double x) { return x; }" << endl;
241 fout << "template <> inline double faustpower<2>(double x) { return x*x; }" << endl;
242
243 } else if (gGlobal->gFloatSize == 3) {
244 fout << "template <int N> inline long double faustpower(long double x){ return powl(x,N); }" << endl;
245 }
246 fout << "#endif" << endl;
247 }
248 }
249
250 /**
251 * Print metadata declaration
252 */
printMetadata(int n,const MetaDataSet & S,ostream & fout)253 void Klass::printMetadata(int n, const MetaDataSet& S, ostream& fout)
254 {
255 tab(n, fout);
256 fout << "virtual void metadata(Meta* m) { ";
257
258 // We do not want to accumulate metadata from all hierachical levels, so the upper level only is kept
259 for (const auto& i : gGlobal->gMetaDataSet) {
260 if (i.first != tree("author")) {
261 tab(n + 1, fout);
262 fout << "m->declare(\"" << *(i.first) << "\", " << **(i.second.begin()) << ");";
263 } else {
264 // But the "author" meta data is accumulated, the upper level becomes the main author and sub-levels become
265 // "contributor"
266 for (const auto& j : i.second) {
267 if (j == *i.second.begin()) {
268 tab(n + 1, fout);
269 fout << "m->declare(\"" << *(i.first) << "\", " << *j << ");";
270 } else {
271 tab(n + 1, fout);
272 fout << "m->declare(\""
273 << "contributor"
274 << "\", " << *j << ");";
275 }
276 }
277 }
278 }
279
280 tab(n, fout);
281 fout << "}" << endl;
282 }
283
isElement(const set<Loop * > & S,Loop * l)284 inline bool isElement(const set<Loop*>& S, Loop* l)
285 {
286 return S.find(l) != S.end();
287 }
288
289 /**
290 * Print a loop graph deep first
291 */
printLoopDeepFirst(int n,ostream & fout,Loop * l,set<Loop * > & visited)292 void Klass::printLoopDeepFirst(int n, ostream& fout, Loop* l, set<Loop*>& visited)
293 {
294 // avoid printing already printed loops
295 if (isElement(visited, l)) return;
296
297 // remember we have printed this loop
298 visited.insert(l);
299
300 // print the dependencies loops (that need to be computed before this one)
301 for (lset::const_iterator p = l->fBackwardLoopDependencies.begin(); p != l->fBackwardLoopDependencies.end(); p++) {
302 printLoopDeepFirst(n, fout, *p, visited);
303 }
304 // the print the loop itself
305 tab(n, fout);
306 tab(n, fout);
307 fout << "// LOOP " << l << ", ORDER " << l->fOrder << endl;
308 l->println(n + 1, fout);
309 }
310
311 /**
312 * Compute how many time each loop is used in a DAG
313 */
computeUseCount(Loop * l)314 static void computeUseCount(Loop* l)
315 {
316 l->fUseCount++;
317 if (l->fUseCount == 1) {
318 for (lset::iterator p = l->fBackwardLoopDependencies.begin(); p != l->fBackwardLoopDependencies.end(); p++) {
319 computeUseCount(*p);
320 }
321 }
322 }
323
324 /**
325 * Group together sequences of loops
326 */
groupSeqLoops(Loop * l,set<Loop * > & visited)327 static void groupSeqLoops(Loop* l, set<Loop*>& visited)
328 {
329 if (visited.find(l) == visited.end()) {
330 visited.insert(l);
331 int n = (int)l->fBackwardLoopDependencies.size();
332 if (n == 0) {
333 return;
334 } else if (n == 1) {
335 Loop* f = *(l->fBackwardLoopDependencies.begin());
336 if (f->fUseCount == 1) {
337 l->concat(f);
338 groupSeqLoops(l, visited);
339 } else {
340 groupSeqLoops(f, visited);
341 }
342 return;
343 } else if (n > 1) {
344 for (lset::iterator p = l->fBackwardLoopDependencies.begin(); p != l->fBackwardLoopDependencies.end();
345 p++) {
346 groupSeqLoops(*p, visited);
347 }
348 }
349 }
350 }
351
352 #define WORK_STEALING_INDEX 0
353 #define LAST_TASK_INDEX 1
354 #define START_TASK_INDEX LAST_TASK_INDEX + 1
355
356 #define START_TASK_MAX 2
357
buildTasksList()358 void Klass::buildTasksList()
359 {
360 lgraph G;
361
362 if (gGlobal->gGroupTaskSwitch) {
363 computeUseCount(fTopLoop);
364 set<Loop*> visited;
365 groupSeqLoops(fTopLoop, visited);
366 }
367
368 sortGraph(fTopLoop, G);
369 int index_task = START_TASK_INDEX;
370
371 addDeclCode("TaskGraph fGraph;");
372 addDeclCode("FAUSTFLOAT** input;");
373 addDeclCode("FAUSTFLOAT** output;");
374 addDeclCode("volatile bool fIsFinished;");
375 addDeclCode("int fCount;");
376 addDeclCode("int fIndex;");
377 addDeclCode("DSPThreadPool* fThreadPool;");
378 addDeclCode("int fStaticNumThreads;");
379 addDeclCode("int fDynamicNumThreads;");
380
381 // Compute forward dependencies
382 for (int l = (int)G.size() - 1; l >= 0; l--) {
383 for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
384 for (lset::const_iterator p1 = (*p)->fBackwardLoopDependencies.begin();
385 p1 != (*p)->fBackwardLoopDependencies.end(); p1++) {
386 (*p1)->fForwardLoopDependencies.insert((*p));
387 }
388 (*p)->fIndex = index_task;
389 index_task++;
390 }
391 }
392
393 // Compute ready tasks list
394 vector<int> task_num;
395 for (int l = (int)G.size() - 1; l >= 0; l--) {
396 lset::const_iterator next;
397 for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
398 if ((*p)->fBackwardLoopDependencies.size() == 0) {
399 task_num.push_back((*p)->fIndex);
400 }
401 }
402 }
403
404 if (task_num.size() < START_TASK_MAX) {
405 // Push ready tasks thread 0, execute one task directly
406
407 addZone3("if (cur_thread == 0) {");
408
409 Loop* keep = NULL;
410 for (int l = (int)G.size() - 1; l >= 0; l--) {
411 lset::const_iterator next;
412 for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
413 if ((*p)->fBackwardLoopDependencies.size() == 0) {
414 if (keep == NULL) {
415 keep = *p;
416 } else {
417 addZone3(subst(" taskqueue.PushHead($0);", T((*p)->fIndex)));
418 }
419 }
420 }
421 }
422
423 if (keep != NULL) {
424 addZone3(subst(" tasknum = $0;", T(keep->fIndex)));
425 }
426
427 addZone3("} else {");
428 addZone3(" tasknum = TaskQueue::GetNextTask(cur_thread, fDynamicNumThreads);");
429 addZone3("}");
430
431 } else {
432 // Cut ready tasks list and have each thread (dynamically) use a subpart
433 addZone3(subst("int task_list_size = $0;", T((int)task_num.size())));
434 stringstream buf;
435 buf << "int task_list[" << task_num.size() << "] = {";
436 for (size_t i = 0; i < task_num.size(); i++) {
437 buf << task_num[i];
438 if (i != (task_num.size() - 1)) buf << ",";
439 }
440 buf << "};";
441
442 addZone3(buf.str());
443 addZone3("taskqueue.InitTaskList(task_list_size, task_list, fDynamicNumThreads, cur_thread, tasknum);");
444 }
445
446 // Last stage connected to end task
447 if (G[0].size() > 1) {
448 addZone2c("// Initialize end task, if more than one input");
449 addZone2c(subst("fGraph.InitTask($0,$1);", T(LAST_TASK_INDEX), T((int)G[0].size())));
450 } else {
451 addZone2c("// End task has only one input, so will be directly activated");
452 }
453
454 // Compute init section
455 addZone2c("// Only initialize taks with more than one input");
456 for (int l = (int)G.size() - 1; l >= 0; l--) {
457 for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
458 if ((*p)->fBackwardLoopDependencies.size() > 1) { // Only initialize taks with more than 1 input, since
459 // taks with one input are "directly" activated.
460 addZone2c(subst("fGraph.InitTask($0,$1);", T(START_TASK_INDEX + gTaskCount++),
461 T((int)(*p)->fBackwardLoopDependencies.size())));
462 } else {
463 gTaskCount++;
464 }
465 }
466 }
467
468 addInitCode("fStaticNumThreads = get_max_cpu();");
469 addInitCode(
470 "fDynamicNumThreads = getenv(\"OMP_NUM_THREADS\") ? atoi(getenv(\"OMP_NUM_THREADS\")) : fStaticNumThreads;");
471 addInitCode("fThreadPool->StartAll(fStaticNumThreads - 1, false);");
472
473 gTaskCount = 0;
474 }
475
476 /**
477 * Print the loop graph (used for vector code)
478 */
printLoopGraphVector(int n,ostream & fout)479 void Klass::printLoopGraphVector(int n, ostream& fout)
480 {
481 if (gGlobal->gGroupTaskSwitch) {
482 computeUseCount(fTopLoop);
483 set<Loop*> visited;
484 groupSeqLoops(fTopLoop, visited);
485 }
486
487 lgraph G;
488 sortGraph(fTopLoop, G);
489
490 #if 1
491 // EXPERIMENTAL
492 if (gGlobal->gVectorSwitch && gGlobal->gDeepFirstSwitch) {
493 set<Loop*> visited;
494 printLoopDeepFirst(n, fout, fTopLoop, visited);
495 return;
496 }
497 #endif
498
499 // normal mode
500 for (int l = (int)G.size() - 1; l >= 0; l--) {
501 if (gGlobal->gVectorSwitch) {
502 tab(n, fout);
503 fout << "// Section : " << G.size() - l;
504 }
505 for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
506 (*p)->println(n, fout);
507 }
508 }
509 }
510
511 /**
512 * Print the loop graph as a serie of parallel loops
513 */
printLoopGraphOpenMP(int n,ostream & fout)514 void Klass::printLoopGraphOpenMP(int n, ostream& fout)
515 {
516 if (gGlobal->gGroupTaskSwitch) {
517 computeUseCount(fTopLoop);
518 set<Loop*> visited;
519 groupSeqLoops(fTopLoop, visited);
520 }
521
522 lgraph G;
523 sortGraph(fTopLoop, G);
524
525 // OpenMP mode : add OpenMP directives
526 for (int l = (int)G.size() - 1; l >= 0; l--) {
527 tab(n, fout);
528 fout << "// Section : " << G.size() - l;
529 printLoopLevelOpenMP(n, (int)G.size() - l, G[l], fout);
530 }
531 }
532
533 /**
534 * Print the loop graph as a serie of parallel loops
535 */
printLoopGraphScheduler(int n,ostream & fout)536 void Klass::printLoopGraphScheduler(int n, ostream& fout)
537 {
538 if (gGlobal->gGroupTaskSwitch) {
539 computeUseCount(fTopLoop);
540 set<Loop*> visited;
541 groupSeqLoops(fTopLoop, visited);
542 }
543
544 lgraph G;
545 sortGraph(fTopLoop, G);
546
547 // OpenMP mode : add OpenMP directives
548 for (int l = (int)G.size() - 1; l > 0; l--) {
549 tab(n, fout);
550 fout << "// Section : " << G.size() - l;
551 printLoopLevelScheduler(n, (int)G.size() - l, G[l], fout);
552 }
553
554 printLastLoopLevelScheduler(n, (int)G.size(), G[0], fout);
555 }
556
557 /**
558 * Print the loop graph in dot format
559 */
printGraphDotFormat(ostream & fout)560 void Klass::printGraphDotFormat(ostream& fout)
561 {
562 lgraph G;
563 sortGraph(fTopLoop, G);
564
565 fout << "strict digraph loopgraph {" << endl;
566 fout << '\t' << "rankdir=LR;" << endl;
567 fout << '\t' << "node[color=blue, fillcolor=lightblue, style=filled, fontsize=9];" << endl;
568
569 int lnum = 0; // used for loop numbers
570 // for each level of the graph
571 for (int l = (int)G.size() - 1; l >= 0; l--) {
572 // for each task in the level
573 for (lset::const_iterator t = G[l].begin(); t != G[l].end(); t++) {
574 // print task label "Lxxx : 0xffffff"
575 fout << '\t' << 'L' << (*t) << "[label=<<font face=\"verdana,bold\">L" << lnum++ << "</font> : " << (*t)
576 << ">];" << endl;
577 // for each source of the task
578 for (lset::const_iterator src = (*t)->fBackwardLoopDependencies.begin();
579 src != (*t)->fBackwardLoopDependencies.end(); src++) {
580 // print the connection Lxxx -> Lyyy;
581 fout << '\t' << 'L' << (*src) << "->" << 'L' << (*t) << ';' << endl;
582 }
583 }
584 }
585 fout << "}" << endl;
586 }
587
588 /**
589 * Print the loop graph (used for internals classes)
590 */
printLoopGraphInternal(int n,ostream & fout)591 void Klass::printLoopGraphInternal(int n, ostream& fout)
592 {
593 lgraph G;
594 sortGraph(fTopLoop, G);
595
596 // normal mode
597 for (int l = (int)G.size() - 1; l >= 0; l--) {
598 if (gGlobal->gVectorSwitch) {
599 tab(n, fout);
600 fout << "// Section : " << G.size() - l;
601 }
602 for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
603 (*p)->printoneln(n, fout);
604 }
605 }
606 }
607
608 /**
609 * Print the loop graph (scalar mode)
610 */
printLoopGraphScalar(int n,ostream & fout)611 void Klass::printLoopGraphScalar(int n, ostream& fout)
612 {
613 fTopLoop->printoneln(n, fout);
614 }
615
616 /**
617 * returns true if all the loops are non recursive
618 */
nonRecursiveLevel(const lset & L)619 static bool nonRecursiveLevel(const lset& L)
620 {
621 for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
622 if ((*p)->fIsRecursive) return false;
623 }
624 return true;
625 }
626
627 /**
628 * Print the 'level' of the loop graph as a set of
629 * parallel loops
630 */
printLoopLevelOpenMP(int n,int lnum,const lset & L,ostream & fout)631 void Klass::printLoopLevelOpenMP(int n, int lnum, const lset& L, ostream& fout)
632 {
633 if (nonRecursiveLevel(L) && L.size() == 1) {
634 for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
635 if ((*p)->isEmpty() == false) {
636 if (gGlobal->gOpenMPLoop) {
637 (*p)->printParLoopln(n, fout);
638 } else {
639 tab(n, fout);
640 fout << "#pragma omp single ";
641 tab(n, fout);
642 fout << "{ ";
643 (*p)->println(n + 1, fout);
644 tab(n, fout);
645 fout << "} ";
646 }
647 }
648 }
649
650 } else if (L.size() > 1) {
651 tab(n, fout);
652 fout << "#pragma omp sections ";
653 tab(n, fout);
654 fout << "{ ";
655 for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
656 tab(n + 1, fout);
657 fout << "#pragma omp section ";
658 tab(n + 1, fout);
659 fout << "{";
660 (*p)->println(n + 2, fout);
661 tab(n + 1, fout);
662 fout << "} ";
663 }
664 tab(n, fout);
665 fout << "} ";
666 } else if (L.size() == 1 && !(*L.begin())->isEmpty()) {
667 tab(n, fout);
668 fout << "#pragma omp single ";
669 tab(n, fout);
670 fout << "{ ";
671 for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
672 (*p)->println(n + 1, fout);
673 }
674 tab(n, fout);
675 fout << "} ";
676 }
677 }
678
679 /**
680 * Print the 'level' of the loop graph as a set of
681 * parallel loops
682 */
printLastLoopLevelScheduler(int n,int lnum,const lset & L,ostream & fout)683 void Klass::printLastLoopLevelScheduler(int n, int lnum, const lset& L, ostream& fout)
684 {
685 if (nonRecursiveLevel(L) && L.size() == 1 && !(*L.begin())->isEmpty()) {
686 lset::const_iterator p = L.begin();
687 tab(n, fout);
688 fout << "case " << gTaskCount++ << ": { ";
689 (*p)->println(n + 1, fout);
690 tab(n + 1, fout);
691 fout << "tasknum = LAST_TASK_INDEX;";
692 tab(n + 1, fout);
693 fout << "break;";
694 tab(n, fout);
695 fout << "} ";
696
697 } else if (L.size() > 1) {
698 for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
699 tab(n, fout);
700 fout << "case " << gTaskCount++ << ": { ";
701 (*p)->println(n + 1, fout);
702 tab(n + 1, fout);
703 fout << "fGraph.ActivateOneOutputTask(taskqueue, LAST_TASK_INDEX, tasknum);";
704 tab(n + 1, fout);
705 fout << "break;";
706 tab(n, fout);
707 fout << "} ";
708 }
709
710 } else if (L.size() == 1 && !(*L.begin())->isEmpty()) {
711 lset::const_iterator p = L.begin();
712 tab(n, fout);
713 fout << "case " << gTaskCount++ << ": { ";
714 (*p)->println(n + 1, fout);
715 tab(n + 1, fout);
716 fout << "tasknum = LAST_TASK_INDEX;";
717 tab(n + 1, fout);
718 fout << "break;";
719 tab(n, fout);
720 fout << "} ";
721 }
722 }
723
printOneLoopScheduler(lset::const_iterator p,int n,ostream & fout)724 void Klass::printOneLoopScheduler(lset::const_iterator p, int n, ostream& fout)
725 {
726 tab(n, fout);
727 fout << "case " << gTaskCount++ << ": { ";
728 (*p)->println(n + 1, fout);
729
730 // One output only
731 if ((*p)->fForwardLoopDependencies.size() == 1) {
732 lset::const_iterator p1 = (*p)->fForwardLoopDependencies.begin();
733 if ((*p1)->fBackwardLoopDependencies.size() == 1) {
734 tab(n + 1, fout);
735 fout << subst("tasknum = $0;", T((*p1)->fIndex));
736 } else {
737 tab(n + 1, fout);
738 fout << subst("fGraph.ActivateOneOutputTask(taskqueue, $0, tasknum);", T((*p1)->fIndex));
739 }
740
741 } else {
742 Loop* keep = NULL;
743 // Find one output with only one backward dependencies
744 for (lset::const_iterator p1 = (*p)->fForwardLoopDependencies.begin();
745 p1 != (*p)->fForwardLoopDependencies.end(); p1++) {
746 if ((*p1)->fBackwardLoopDependencies.size() == 1) {
747 keep = *p1;
748 break;
749 }
750 }
751
752 if (keep == NULL) {
753 tab(n + 1, fout);
754 fout << "tasknum = WORK_STEALING_INDEX;";
755 }
756
757 for (lset::const_iterator p1 = (*p)->fForwardLoopDependencies.begin();
758 p1 != (*p)->fForwardLoopDependencies.end(); p1++) {
759 if ((*p1)->fBackwardLoopDependencies.size() == 1) { // Task is the only input
760 if (*p1 != keep) {
761 tab(n + 1, fout);
762 fout << subst("taskqueue.PushHead($0);", T((*p1)->fIndex));
763 }
764 } else {
765 if (keep == NULL) {
766 tab(n + 1, fout);
767 fout << subst("fGraph.ActivateOutputTask(taskqueue, $0, tasknum);", T((*p1)->fIndex));
768 } else {
769 tab(n + 1, fout);
770 fout << subst("fGraph.ActivateOutputTask(taskqueue, $0);", T((*p1)->fIndex));
771 }
772 }
773 }
774
775 if (keep != NULL) {
776 tab(n + 1, fout);
777 fout << subst("tasknum = $0;", T(keep->fIndex)); // Last one
778 } else {
779 tab(n + 1, fout);
780 fout << "fGraph.GetReadyTask(taskqueue, tasknum);"; // Last one
781 }
782 }
783
784 tab(n + 1, fout);
785 fout << "break;";
786 tab(n, fout);
787 fout << "} ";
788 }
789
790 /**
791 * Print the 'level' of the loop graph as a set of
792 * parallel loops
793 */
794
printLoopLevelScheduler(int n,int lnum,const lset & L,ostream & fout)795 void Klass::printLoopLevelScheduler(int n, int lnum, const lset& L, ostream& fout)
796 {
797 if (nonRecursiveLevel(L) && L.size() == 1 && !(*L.begin())->isEmpty()) {
798 printOneLoopScheduler(L.begin(), n, fout);
799 } else if (L.size() > 1) {
800 for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
801 printOneLoopScheduler(p, n, fout);
802 }
803 } else if (L.size() == 1 && !(*L.begin())->isEmpty()) {
804 printOneLoopScheduler(L.begin(), n, fout);
805 }
806 }
807
808 /**
809 * Print a full C++ class corresponding to a Faust dsp
810 */
println(int n,ostream & fout)811 void Klass::println(int n, ostream& fout)
812 {
813 list<Klass*>::iterator k;
814
815 tab(n, fout);
816 fout << "#ifndef FAUSTCLASS " << endl;
817 fout << "#define FAUSTCLASS " << fKlassName << endl;
818 fout << "#endif" << endl;
819
820 if (gGlobal->gSchedulerSwitch) {
821 tab(n, fout);
822 fout << "class " << fKlassName << " : public " << fSuperKlassName << ", public Runnable {";
823 } else {
824 tab(n, fout);
825 fout << "class " << fKlassName << " : public " << fSuperKlassName << " {";
826 }
827
828 if (gGlobal->gUIMacroSwitch) {
829 tab(n, fout);
830 fout << " public:";
831 } else {
832 tab(n, fout);
833 fout << " private:";
834 }
835
836 for (k = fSubClassList.begin(); k != fSubClassList.end(); k++) (*k)->println(n + 1, fout);
837
838 printlines(n + 1, fDeclCode, fout);
839
840 tab(n + 1, fout);
841 fout << "int fSampleRate;\n";
842
843 tab(n, fout);
844 fout << " public:";
845
846 if (gGlobal->gMemoryManager) {
847 tab(n + 1, fout);
848 fout << "static dsp_memory_manager* fManager;" << endl;
849 }
850
851 printMetadata(n + 1, gGlobal->gMetaDataSet, fout);
852
853 if (gGlobal->gSchedulerSwitch) {
854 tab(n + 1, fout);
855 fout << fKlassName << "() { "
856 << "fThreadPool = DSPThreadPool::Init(); }";
857
858 tab(n + 1, fout);
859 fout << "virtual ~" << fKlassName << "() { "
860 << "DSPThreadPool::Destroy(); }";
861 }
862
863 tab(n + 1, fout);
864 fout << "virtual int getNumInputs() { "
865 << "return " << fNumInputs << "; }";
866
867 tab(n + 1, fout);
868 fout << "virtual int getNumOutputs() { "
869 << "return " << fNumOutputs << "; }";
870
871 tab(n + 1, fout);
872 fout << "static void classInit(int sample_rate) {";
873 printlines(n + 2, fStaticInitCode, fout);
874 tab(n + 1, fout);
875 fout << "}";
876
877 if (gGlobal->gMemoryManager) {
878 tab(n + 1, fout);
879 fout << "static void classDestroy() {";
880 printlines(n + 2, fStaticDestroyCode, fout);
881 tab(n + 1, fout);
882 fout << "}";
883 }
884
885 tab(n + 1, fout);
886 fout << "virtual void instanceConstants(int sample_rate) {";
887 tab(n + 2, fout);
888 fout << "fSampleRate = sample_rate;";
889 printlines(n + 2, fInitCode, fout);
890 tab(n + 1, fout);
891 fout << "}";
892
893 tab(n + 1, fout);
894 fout << "virtual void instanceResetUserInterface() {";
895 printlines(n + 2, fInitUICode, fout);
896 tab(n + 1, fout);
897 fout << "}";
898
899 tab(n + 1, fout);
900 fout << "virtual void instanceClear() {";
901 printlines(n + 2, fClearCode, fout);
902 tab(n + 1, fout);
903 fout << "}";
904
905 if (gGlobal->gMemoryManager) {
906 tab(n + 1, fout);
907 fout << "virtual void init(int sample_rate) {}";
908 } else {
909 tab(n + 1, fout);
910 fout << "virtual void init(int sample_rate) {";
911 tab(n + 2, fout);
912 fout << "classInit(sample_rate);";
913 tab(n + 2, fout);
914 fout << "instanceInit(sample_rate);";
915 tab(n + 1, fout);
916 fout << "}";
917 }
918
919 tab(n + 1, fout);
920 fout << "virtual void instanceInit(int sample_rate) {";
921 tab(n + 2, fout);
922 fout << "instanceConstants(sample_rate);";
923 tab(n + 2, fout);
924 fout << "instanceResetUserInterface();";
925 tab(n + 2, fout);
926 fout << "instanceClear();";
927 tab(n + 1, fout);
928 fout << "}";
929
930 tab(n + 1, fout);
931 fout << "virtual " << fKlassName << "* clone() {";
932 tab(n + 2, fout);
933 fout << "return new " << fKlassName << "();";
934 tab(n + 1, fout);
935 fout << "}";
936
937 tab(n + 1, fout);
938 fout << "virtual int getSampleRate() {";
939 tab(n + 2, fout);
940 fout << "return fSampleRate;";
941 tab(n + 1, fout);
942 fout << "}";
943
944 tab(n + 1, fout);
945 fout << "virtual void buildUserInterface(UI* ui_interface) {";
946 printlines(n + 2, fUICode, fout);
947 tab(n + 1, fout);
948 fout << "}";
949
950 printComputeMethod(n, fout);
951
952 tab(n, fout);
953 fout << "};\n" << endl;
954
955 printlines(n, fStaticFields, fout);
956
957 if (gGlobal->gMemoryManager) {
958 tab(n, fout);
959 fout << "dsp_memory_manager* " << fKlassName << "::fManager = 0;" << endl;
960 }
961
962 // generate user interface macros if needed
963 if (gGlobal->gUIMacroSwitch) {
964 tab(n, fout);
965 fout << "#ifdef FAUST_UIMACROS";
966 tab(n + 1, fout);
967 tab(n + 1, fout);
968 for (const auto& it : gGlobal->gMetaDataSet) {
969 if (it.first == tree("filename")) {
970 fout << "#define FAUST_FILE_NAME " << **(it.second.begin());
971 break;
972 }
973 }
974 tab(n + 1, fout);
975 fout << "#define FAUST_CLASS_NAME " << "\"" << fKlassName << "\"";
976 tab(n + 1, fout);
977 fout << "#define FAUST_INPUTS " << fNumInputs;
978 tab(n + 1, fout);
979 fout << "#define FAUST_OUTPUTS " << fNumOutputs;
980 tab(n + 1, fout);
981 fout << "#define FAUST_ACTIVES " << fNumActives;
982 tab(n + 1, fout);
983 fout << "#define FAUST_PASSIVES " << fNumPassives;
984 printlines(n + 1, fUIMacro, fout);
985 tab(n, fout);
986 fout << "#endif" << endl;
987 }
988
989 fout << endl;
990 }
991
992 /**
993 * Print Compute() method according to the various switch
994 */
printComputeMethod(int n,ostream & fout)995 void Klass::printComputeMethod(int n, ostream& fout)
996 {
997 if (gGlobal->gSchedulerSwitch) {
998 printComputeMethodScheduler(n, fout);
999 } else if (gGlobal->gOpenMPSwitch) {
1000 printComputeMethodOpenMP(n, fout);
1001 } else if (gGlobal->gVectorSwitch) {
1002 switch (gGlobal->gVectorLoopVariant) {
1003 case 0:
1004 printComputeMethodVectorFaster(n, fout);
1005 break;
1006 case 1:
1007 printComputeMethodVectorSimple(n, fout);
1008 break;
1009 default: {
1010 stringstream error;
1011 error << "ERROR : unknown loop variant" << gGlobal->gVectorLoopVariant << endl;
1012 throw faustexception(error.str());
1013 }
1014 }
1015 } else {
1016 printComputeMethodScalar(n, fout);
1017 }
1018 }
1019
printComputeMethodScalar(int n,ostream & fout)1020 void Klass::printComputeMethodScalar(int n, ostream& fout)
1021 {
1022 tab(n + 1, fout);
1023 fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1024 tab(n + 2, fout);
1025 fout << "//zone1";
1026 printlines(n + 2, fZone1Code, fout);
1027 tab(n + 2, fout);
1028 fout << "//zone2";
1029 printlines(n + 2, fZone2Code, fout);
1030 tab(n + 2, fout);
1031 fout << "//zone2b";
1032 printlines(n + 2, fZone2bCode, fout);
1033 tab(n + 2, fout);
1034 fout << "//zone3";
1035 printlines(n + 2, fZone3Code, fout);
1036 tab(n + 2, fout);
1037 fout << "//LoopGraphScalar";
1038 printLoopGraphScalar(n + 2, fout);
1039 printlines(n + 2, fZone4Code, fout);
1040 tab(n + 1, fout);
1041 fout << "}";
1042 }
1043
1044 /**
1045 * Uses loops of constant gVecSize boundary in order to provide the
1046 * C compiler with more optimisation opportunities. Improves performances
1047 * in general, but not always
1048 */
printComputeMethodVectorFaster(int n,ostream & fout)1049 void Klass::printComputeMethodVectorFaster(int n, ostream& fout)
1050 {
1051 // in vector mode we need to split loops in smaller pieces not larger
1052 // than gVecSize
1053 tab(n + 1, fout);
1054 fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1055 printlines(n + 2, fZone1Code, fout);
1056 printlines(n + 2, fZone2Code, fout);
1057 printlines(n + 2, fZone2bCode, fout);
1058
1059 tab(n + 2, fout);
1060 fout << "int index;";
1061 tab(n + 2, fout);
1062 fout << "int fullcount = count;";
1063
1064 tab(n + 2, fout);
1065 fout << "for (index = 0; index <= fullcount - " << gGlobal->gVecSize << "; index += " << gGlobal->gVecSize << ") {";
1066 tab(n + 3, fout);
1067 fout << "// compute by blocks of " << gGlobal->gVecSize << " samples";
1068 tab(n + 3, fout);
1069 fout << "const int count = " << gGlobal->gVecSize << ";";
1070 printlines(n + 3, fZone3Code, fout);
1071 printLoopGraphVector(n + 3, fout);
1072 tab(n + 2, fout);
1073 fout << "}";
1074
1075 tab(n + 2, fout);
1076 fout << "if (index < fullcount) {";
1077 tab(n + 3, fout);
1078 fout << "// compute the remaining samples if any";
1079 tab(n + 3, fout);
1080 fout << "int count = fullcount-index;";
1081 printlines(n + 3, fZone3Code, fout);
1082 printLoopGraphVector(n + 3, fout);
1083 tab(n + 2, fout);
1084 fout << "}";
1085
1086 printlines(n + 2, fZone4Code, fout);
1087 tab(n + 1, fout);
1088 fout << "}";
1089 }
1090
1091 /**
1092 * Simple loop layout, generally less efficient than printComputeMethodVectorFaster
1093 */
printComputeMethodVectorSimple(int n,ostream & fout)1094 void Klass::printComputeMethodVectorSimple(int n, ostream& fout)
1095 {
1096 // in vector mode we need to split loops in smaller pieces not larger
1097 // than gVecSize
1098 tab(n + 1, fout);
1099 fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1100 printlines(n + 2, fZone1Code, fout);
1101 printlines(n + 2, fZone2Code, fout);
1102 printlines(n + 2, fZone2bCode, fout);
1103
1104 tab(n + 2, fout);
1105 fout << "int fullcount = count;";
1106 tab(n + 2, fout);
1107 fout << "for (int index = 0; index < fullcount; index += " << gGlobal->gVecSize << ") {";
1108 tab(n + 3, fout);
1109 fout << "int count = min(" << gGlobal->gVecSize << ", fullcount-index);";
1110 printlines(n + 3, fZone3Code, fout);
1111 printLoopGraphVector(n + 3, fout);
1112 tab(n + 2, fout);
1113 fout << "}";
1114
1115 printlines(n + 2, fZone4Code, fout);
1116 tab(n + 1, fout);
1117 fout << "}";
1118 }
1119
1120 /*
1121 void Klass::printComputeMethodVectorFix0 (int n, ostream& fout)
1122 {
1123 // in vector mode we need to split loops in smaller pieces not larger
1124 // than gVecSize
1125 tab(n+1,fout); fout << "virtual void compute (int fullcount, float** input, float** output) {";
1126 printlines(n+2, fZone1Code, fout);
1127 printlines(n+2, fZone2Code, fout);
1128 printlines(n+2, fZone2bCode, fout);
1129 tab(n+2,fout); fout << "for (int index = 0; index < fullcount; index += " << gVecSize << ") {";
1130 tab(n+3,fout); fout << "if (fullcount >= index + " << gVecSize << ") {";
1131 tab(n+4,fout); fout << "// compute by blocks of " << gVecSize << " samples";
1132 tab(n+4,fout); fout << "const int count = " << gVecSize << ";"; // temporaire
1133 printlines(n+4, fZone3Code, fout);
1134 printLoopGraph (n+4,fout);
1135 tab(n+3,fout); fout << "} else if (fullcount > index) {";
1136 //tab(n+3,fout); fout << "int count = min ("<< gVecSize << ", fullcount-index);";
1137 tab(n+4,fout); fout << "// compute the remaining samples";
1138 tab(n+4,fout); fout << "int count = fullcount-index;" ;
1139 printlines(n+4, fZone3Code, fout);
1140 printLoopGraph (n+4,fout);
1141 tab(n+3,fout); fout << "}";
1142 tab(n+2,fout); fout << "}";
1143 tab(n+1,fout); fout << "}";
1144 }
1145
1146 void Klass::printComputeMethodVectorFix1 (int n, ostream& fout)
1147 {
1148 // in vector mode we need to split loops in smaller pieces not larger
1149 // than gVecSize
1150 tab(n+1,fout); fout << "virtual void compute (int fullcount, float** input, float** output) {";
1151 printlines(n+2, fZone1Code, fout);
1152 printlines(n+2, fZone2Code, fout);
1153 printlines(n+2, fZone2bCode, fout);
1154
1155 tab(n+2,fout); fout << "int \tblock;";
1156 tab(n+2,fout); fout << "for (block = 0; block < fullcount/" << gVecSize << "; block++) {";
1157 tab(n+3,fout); fout << "// compute by blocks of " << gVecSize << " samples";
1158 tab(n+3,fout); fout << "const int index = block*" << gVecSize << ";";
1159 tab(n+3,fout); fout << "const int count = " << gVecSize << ";"; // temporaire
1160 printlines(n+3, fZone3Code, fout);
1161 printLoopGraph (n+3,fout);
1162 tab(n+2,fout); fout << "}";
1163
1164 tab(n+2,fout); fout << "if (fullcount%" << gVecSize << " != 0) {";
1165 //tab(n+3,fout); fout << "int count = min ("<< gVecSize << ", fullcount-index);";
1166 tab(n+3,fout); fout << "// compute the remaining samples";
1167 tab(n+3,fout); fout << "const int index = block*" << gVecSize << ";";
1168 tab(n+3,fout); fout << "int count = fullcount%" << gVecSize << ";" ;
1169 printlines(n+3, fZone3Code, fout);
1170 printLoopGraph (n+3,fout);
1171 tab(n+2,fout); fout << "}";
1172 tab(n+1,fout); fout << "}";
1173 }*/
1174
printComputeMethodOpenMP(int n,ostream & fout)1175 void Klass::printComputeMethodOpenMP(int n, ostream& fout)
1176 {
1177 // in openMP mode we need to split loops in smaller pieces not larger
1178 // than gVecSize and add OpenMP pragmas
1179 tab(n + 1, fout);
1180 fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1181 printlines(n + 2, fZone1Code, fout);
1182 printlines(n + 2, fZone2Code, fout);
1183 tab(n + 2, fout);
1184 fout << "int fullcount = count;";
1185 tab(n + 2, fout);
1186 fout << "#pragma omp parallel";
1187 printdecllist(n + 3, "firstprivate", fFirstPrivateDecl, fout);
1188
1189 tab(n + 2, fout);
1190 fout << "{";
1191 if (!fZone2bCode.empty()) {
1192 tab(n + 3, fout);
1193 fout << "#pragma omp single";
1194 tab(n + 3, fout);
1195 fout << "{";
1196 printlines(n + 4, fZone2bCode, fout);
1197 tab(n + 3, fout);
1198 fout << "}";
1199 }
1200
1201 tab(n + 3, fout);
1202 fout << "for (int index = 0; index < fullcount; index += " << gGlobal->gVecSize << ") {";
1203 tab(n + 4, fout);
1204 fout << "int count = min (" << gGlobal->gVecSize << ", fullcount-index);";
1205
1206 printlines(n + 4, fZone3Code, fout);
1207 printLoopGraphOpenMP(n + 4, fout);
1208
1209 tab(n + 3, fout);
1210 fout << "}";
1211
1212 tab(n + 2, fout);
1213 fout << "}";
1214
1215 printlines(n + 2, fZone4Code, fout);
1216 tab(n + 1, fout);
1217 fout << "}";
1218 }
1219
1220 /*
1221 void Klass::printComputeMethodScheduler (int n, ostream& fout)
1222 {
1223 tab(n+1,fout); fout << subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
1224 printlines(n+2, fZone1Code, fout);
1225 printlines(n+2, fZone2Code, fout);
1226
1227 // Init input and output
1228 tab(n+2,fout); fout << "// Init input and output";
1229 printlines(n+2, fZone3aCode, fout);
1230 printlines(n+2, fZone3bCode, fout);
1231
1232 tab(n+2,fout); fout << "// Init graph state";
1233 tab(n+2,fout); fout << "initState(fTasksList);";
1234 tab(n+2,fout); fout << "bool is_finished = false;";
1235 tab(n+2,fout); fout << "unsigned int index_in = 0;";
1236 tab(n+2,fout); fout << "unsigned int index_out = 0;";
1237 tab(n+2,fout); fout << "int count = min ("<< gVecSize << ", fullcount);";
1238
1239 tab(n+2,fout); fout << "InitSchedulingMap();";
1240 tab(n+2,fout); fout << "#pragma omp parallel";
1241 printdecllist(n+3, "firstprivate", fFirstPrivateDecl, fout);
1242
1243 tab(n+2,fout); fout << "{";
1244 tab(n+3,fout); fout << "while (!is_finished) {";
1245 tab(n+4,fout); fout << "Task* task = searchTaskToAcquire(fTasksList);";
1246 tab(n+4,fout); fout << "if (task != NULL) {";
1247 tab(n+5,fout); fout << "bool last_cycle_for_thread = false;";
1248 tab(n+5,fout); fout << "do {";
1249 tab(n+6,fout); fout << "AddTaskToScheduling(task);";
1250 tab(n+6,fout); fout << "switch (task->fNum) {";
1251
1252 // DSP tasks
1253 printLoopGraph (n+7,fout);
1254
1255 // Input task
1256 tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1257 printlines(n+8, fZone6Code, fout);
1258 tab(n+8, fout); fout << "index_in += count;";
1259 tab(n+8, fout); fout << "last_cycle_for_thread = (index_in > fullcount);";
1260 tab(n+8, fout); fout << "break;";
1261 tab(n+7, fout); fout << "} ";
1262
1263 // Output task
1264 tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1265 printlines(n+8, fZone7Code, fout);
1266 tab(n+8, fout); fout << "index_out += count;";
1267 tab(n+8, fout); fout << "last_cycle_for_thread = (index_out > fullcount);";
1268 tab(n+8, fout); fout << "break;";
1269 tab(n+7, fout); fout << "} ";
1270
1271 // End task
1272 tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1273 tab(n+8, fout); fout << "is_finished = ((index_in >= fullcount) && (index_out >=
1274 fullcount));"; tab(n+8, fout); fout << "break;"; tab(n+7, fout); fout << "} ";
1275
1276 tab(n+6,fout); fout << "}";
1277 tab(n+6,fout); fout << "if (last_cycle_for_thread) break;";
1278
1279 tab(n+5,fout); fout << "} while ((task = task->concludeAndTryToAcquireNext()) != NULL);";
1280 tab(n+4,fout); fout << "}";
1281 tab(n+3,fout); fout << "}";
1282 tab(n+2,fout); fout << "}";
1283 tab(n+2,fout); fout << "PrintSchedulingMap();";
1284 tab(n+1,fout); fout << "}";
1285 }
1286 */
1287
printComputeMethodScheduler(int n,ostream & fout)1288 void Klass::printComputeMethodScheduler(int n, ostream& fout)
1289 {
1290 tab(n + 1, fout);
1291 fout << "void display() {";
1292 tab(n + 2, fout);
1293 fout << "fGraph.Display();";
1294 tab(n + 1, fout);
1295 fout << "}";
1296
1297 tab(n + 1, fout);
1298 fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1299
1300 tab(n + 2, fout);
1301 fout << "GetRealTime();";
1302
1303 tab(n + 2, fout);
1304 fout << "this->input = input;";
1305 tab(n + 2, fout);
1306 fout << "this->output = output;";
1307
1308 tab(n + 2, fout);
1309 fout << "StartMeasure();";
1310
1311 tab(n + 2, fout);
1312 fout << "int fullcount = count;";
1313
1314 tab(n + 2, fout);
1315 fout << "for (fIndex = 0; fIndex < fullcount; fIndex += " << gGlobal->gVecSize << ") {";
1316
1317 tab(n + 3, fout);
1318 fout << "fCount = min (" << gGlobal->gVecSize << ", fullcount-fIndex);";
1319 tab(n + 3, fout);
1320 fout << "TaskQueue::Init();";
1321 printlines(n + 3, fZone2cCode, fout);
1322
1323 tab(n + 3, fout);
1324 fout << "fIsFinished = false;";
1325 tab(n + 3, fout);
1326 fout << "fThreadPool->SignalAll(fDynamicNumThreads - 1, this);";
1327 tab(n + 3, fout);
1328 fout << "computeThread(0);";
1329 tab(n + 3, fout);
1330 fout << "while (!fThreadPool->IsFinished()) {}";
1331
1332 tab(n + 2, fout);
1333 fout << "}";
1334
1335 tab(n + 2, fout);
1336 fout << "StopMeasure(fStaticNumThreads, fDynamicNumThreads);";
1337
1338 tab(n + 1, fout);
1339 fout << "}";
1340
1341 tab(n + 1, fout);
1342 fout << "void computeThread(int cur_thread) {";
1343
1344 tab(n + 2, fout);
1345 fout << "int count = fCount;";
1346
1347 printlines(n + 2, fZone1Code, fout);
1348 printlines(n + 2, fZone2Code, fout);
1349
1350 tab(n + 2, fout);
1351 fout << "// Init graph state";
1352
1353 tab(n + 2, fout);
1354 fout << "{";
1355 tab(n + 3, fout);
1356 fout << "TaskQueue taskqueue(cur_thread);";
1357 tab(n + 3, fout);
1358 fout << "int tasknum = -1;";
1359
1360 // Init input and output
1361 tab(n + 3, fout);
1362 fout << "// Init input and output";
1363 printlines(n + 3, fZone3Code, fout);
1364
1365 tab(n + 3, fout);
1366 fout << "while (!fIsFinished) {";
1367 tab(n + 4, fout);
1368 fout << "switch (tasknum) {";
1369
1370 // Work stealing task
1371 tab(n + 5, fout);
1372 fout << "case WORK_STEALING_INDEX: { ";
1373 tab(n + 6, fout);
1374 fout << "tasknum = TaskQueue::GetNextTask(cur_thread, fDynamicNumThreads);";
1375 tab(n + 6, fout);
1376 fout << "break;";
1377 tab(n + 5, fout);
1378 fout << "} ";
1379
1380 // End task
1381 tab(n + 5, fout);
1382 fout << "case LAST_TASK_INDEX: { ";
1383 tab(n + 6, fout);
1384 fout << "fIsFinished = true;";
1385 tab(n + 6, fout);
1386 fout << "break;";
1387 tab(n + 5, fout);
1388 fout << "} ";
1389
1390 gTaskCount = START_TASK_INDEX;
1391
1392 // DSP tasks
1393 printLoopGraphScheduler(n + 5, fout);
1394
1395 tab(n + 4, fout);
1396 fout << "}";
1397 tab(n + 3, fout);
1398 fout << "}";
1399 tab(n + 2, fout);
1400 fout << "}";
1401 tab(n + 1, fout);
1402 fout << "}";
1403 }
1404
1405 /**
1406 * Print an auxillary C++ class corresponding to an integer init signal
1407 */
println(int n,ostream & fout)1408 void SigIntGenKlass::println(int n, ostream& fout)
1409 {
1410 tab(n, fout);
1411 fout << "class " << fKlassName << " {";
1412
1413 tab(n, fout);
1414 fout << " private:";
1415 tab(n + 1, fout);
1416 fout << "int fSampleRate;";
1417
1418 for (const auto& k : fSubClassList) k->println(n + 1, fout);
1419
1420 printlines(n + 1, fDeclCode, fout);
1421
1422 tab(n, fout);
1423 fout << " public:";
1424
1425 tab(n + 1, fout);
1426 fout << "int getNumInputs() { "
1427 << "return " << fNumInputs << "; }";
1428 tab(n + 1, fout);
1429 fout << "int getNumOutputs() { "
1430 << "return " << fNumOutputs << "; }";
1431
1432 tab(n + 1, fout);
1433 fout << "void init(int sample_rate) {";
1434 tab(n + 2, fout);
1435 fout << "fSampleRate = sample_rate;";
1436 printlines(n + 2, fInitCode, fout);
1437 printlines(n + 2, fClearCode, fout);
1438 tab(n + 1, fout);
1439 fout << "}";
1440
1441 tab(n + 1, fout);
1442 fout << "void fill(int count, int output[]) {";
1443 printlines(n + 2, fZone1Code, fout);
1444 printlines(n + 2, fZone2Code, fout);
1445 printlines(n + 2, fZone2bCode, fout);
1446 printlines(n + 2, fZone3Code, fout);
1447 printLoopGraphInternal(n + 2, fout);
1448 tab(n + 1, fout);
1449 fout << "}";
1450
1451 tab(n, fout);
1452 fout << "};\n" << endl;
1453 }
1454
1455 /**
1456 * Print an auxillary C++ class corresponding to an float init signal
1457 */
println(int n,ostream & fout)1458 void SigFloatGenKlass::println(int n, ostream& fout)
1459 {
1460 tab(n, fout);
1461 fout << "class " << fKlassName << " {";
1462
1463 tab(n, fout);
1464 fout << " private:";
1465 tab(n + 1, fout);
1466 fout << "int fSampleRate;";
1467
1468 for (const auto& k : fSubClassList) k->println(n + 1, fout);
1469
1470 printlines(n + 1, fDeclCode, fout);
1471
1472 tab(n, fout);
1473 fout << " public:";
1474
1475 tab(n + 1, fout);
1476 fout << "int getNumInputs() { "
1477 << "return " << fNumInputs << "; }";
1478 tab(n + 1, fout);
1479 fout << "int getNumOutputs() { "
1480 << "return " << fNumOutputs << "; }";
1481
1482 tab(n + 1, fout);
1483 fout << "void init(int sample_rate) {";
1484 tab(n + 2, fout);
1485 fout << "fSampleRate = sample_rate;";
1486 printlines(n + 2, fInitCode, fout);
1487 printlines(n + 2, fClearCode, fout);
1488 tab(n + 1, fout);
1489 fout << "}";
1490
1491 tab(n + 1, fout);
1492 fout << subst("void fill(int count, $0 output[]) {", ifloat());
1493 printlines(n + 2, fZone1Code, fout);
1494 printlines(n + 2, fZone2Code, fout);
1495 printlines(n + 2, fZone2bCode, fout);
1496 printlines(n + 2, fZone3Code, fout);
1497 printLoopGraphInternal(n + 2, fout);
1498 tab(n + 1, fout);
1499 fout << "}";
1500
1501 tab(n, fout);
1502 fout << "};\n" << endl;
1503 }
1504
merge(set<string> & dst,set<string> & src)1505 static void merge(set<string>& dst, set<string>& src)
1506 {
1507 for (const auto& i : src) dst.insert(i);
1508 }
1509
collectIncludeFile(set<string> & S)1510 void Klass::collectIncludeFile(set<string>& S)
1511 {
1512 for (const auto& k : fSubClassList) k->collectIncludeFile(S);
1513 merge(S, fIncludeFileSet);
1514 }
1515
collectLibrary(set<string> & S)1516 void Klass::collectLibrary(set<string>& S)
1517 {
1518 for (const auto& k : fSubClassList) k->collectLibrary(S);
1519 merge(S, fLibrarySet);
1520 }
1521