1 /************************************************************************
2  ************************************************************************
3     FAUST compiler
4     Copyright (C) 2003-2018 GRAME, Centre National de Creation Musicale
5     ---------------------------------------------------------------------
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with this program; if not, write to the Free Software
18     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19  ************************************************************************
20  ************************************************************************/
21 
22 /**********************************************************************
23         - klass.cpp : class C++ to be filled (FAUST project) -
24 
25         History :
26         -----------
27         17-10-2001 : initial implementation  (yo)
28         18-10-2001 : add getFreshID (yo)
29         02-11-2001 : add sub-classes (yo)
30         06-11-2001 : change classers impression (yo)
31 
32 ***********************************************************************/
33 
34 #include <stdio.h>
35 #include <iostream>
36 #include <list>
37 #include <map>
38 #include <sstream>
39 #include <string>
40 
41 #include "Text.hh"
42 #include "floats.hh"
43 #include "klass.hh"
44 #include "ppsig.hh"
45 #include "recursivness.hh"
46 #include "signals.hh"
47 #include "uitree.hh"
48 
49 static int gTaskCount = 0;
50 
51 bool Klass::fNeedPowerDef = false;
52 
53 /**
54  * Store the loop used to compute a signal
55  */
setLoopProperty(Tree sig,Loop * l)56 void Klass::setLoopProperty(Tree sig, Loop* l)
57 {
58     fLoopProperty.set(sig, l);
59 }
60 
61 /**
62  * Returns the loop used to compute a signal
63  */
getLoopProperty(Tree sig,Loop * & l)64 bool Klass::getLoopProperty(Tree sig, Loop*& l)
65 {
66     return fLoopProperty.get(sig, l);
67 }
68 
69 /**
70  * Open a non-recursive loop on top of the stack of open loops.
71  * @param size the number of iterations of the loop
72  */
openLoop(const string & size)73 void Klass::openLoop(const string& size)
74 {
75     fTopLoop = new Loop(fTopLoop, size);
76     // cerr << "\nOPEN SHARED LOOP(" << size << ") ----> " << fTopLoop << endl;
77 }
78 
79 /**
80  * Open a recursive loop on top of the stack of open loops.
81  * @param recsymbol the recursive symbol defined in this loop
82  * @param size the number of iterations of the loop
83  */
openLoop(Tree recsymbol,const string & size)84 void Klass::openLoop(Tree recsymbol, const string& size)
85 {
86     fTopLoop = new Loop(recsymbol, fTopLoop, size);
87     // cerr << "\nOPEN REC LOOP(" << *recsymbol << ", " << size << ") ----> " << fTopLoop << endl;
88 }
89 
listAllLoopProperties(Tree sig,set<Loop * > & L,set<Tree> & visited)90 void Klass::listAllLoopProperties(Tree sig, set<Loop*>& L, set<Tree>& visited)
91 {
92     if (visited.count(sig)==0) {
93         visited.insert(sig);
94         Loop* l;
95         if (getLoopProperty(sig, l)) {
96             L.insert(l);
97         } else {
98             // we go down the expression
99             vector<Tree> subsigs;
100             int          n = getSubSignals(sig, subsigs, false);
101             for (int i = 0; i < n; i++) {
102                 listAllLoopProperties(subsigs[i], L, visited);
103             }
104         }
105     }
106 }
107 
108 /**
109  * Close the top loop and either keep it
110  * or absorb it within its enclosing loop.
111  */
closeLoop(Tree sig)112 void Klass::closeLoop(Tree sig)
113 {
114     faustassert(fTopLoop);
115 
116     // fix the missing dependencies
117     set<Loop*> L;
118     set<Tree> V;
119     listAllLoopProperties(sig, L, V);
120     for (Loop* l : L) {
121         fTopLoop->fBackwardLoopDependencies.insert(l);
122     }
123 
124     Loop* l  = fTopLoop;
125     fTopLoop = l->fEnclosingLoop;
126     faustassert(fTopLoop);
127 
128     // l->println(4, cerr);
129     // cerr << endl;
130 
131     Tree S = symlist(sig);
132     // cerr << "CLOSE LOOP :" << l << " with symbols " << *S << endl;
133     if (l->isEmpty() || fTopLoop->hasRecDependencyIn(S)) {
134         // cout << " will absorb" << endl;
135         // empty or dependent loop -> absorbed by enclosing one
136         // cerr << "absorbed by : " << fTopLoop << endl;
137         fTopLoop->absorb(l);
138         // delete l; HACK !!!
139     } else {
140         // cout << " will NOT absorb" << endl;
141         // we have an independent loop
142         setLoopProperty(sig, l);  // associate the signal
143         fTopLoop->fBackwardLoopDependencies.insert(l);
144         // we need to indicate that all recursive symbols defined
145         // in this loop are defined in this loop
146         for (Tree lsym = l->fRecSymbolSet; !isNil(lsym); lsym = tl(lsym)) {
147             this->setLoopProperty(hd(lsym), l);
148             // cerr << "loop " << l << " defines " << *hd(lsym) << endl;
149         }
150     }
151     // cerr << "\n" << endl;
152 }
153 
154 /**
155  * Print a list of elements (e1, e2,...)
156  */
printdecllist(int n,const string & decl,list<string> & content,ostream & fout)157 void printdecllist(int n, const string& decl, list<string>& content, ostream& fout)
158 {
159     if (!content.empty()) {
160         fout << "\\";
161         tab(n, fout);
162         fout << decl;
163         string sep = "(";
164         for (const auto& s : content) {
165             fout << sep << s;
166             sep = ", ";
167         }
168         fout << ')';
169     }
170 }
171 
172 /**
173  * Print the required C++ libraries as comments in source code
174  */
printLibrary(ostream & fout)175 void Klass::printLibrary(ostream& fout)
176 {
177     set<string>           S;
178     set<string>::iterator f;
179 
180     string sep;
181     collectLibrary(S);
182     fout << "/* link with ";
183     for (f = S.begin(), sep = ": "; f != S.end(); f++, sep = ", ") {
184         fout << sep << *f;
185     }
186     fout << " */\n";
187 }
188 
189 /**
190  * Print the required include files
191  */
printIncludeFile(ostream & fout)192 void Klass::printIncludeFile(ostream& fout)
193 {
194     if (gGlobal->gOpenMPSwitch) {
195         fout << "#include <omp.h>\n";
196     }
197 
198     set<string> S;
199     collectIncludeFile(S);
200     for (const auto& f : S) {
201         string inc = f;
202         // Only print non-empty include (inc is actually quoted)
203         if (inc.size() > 2) {
204             fout << "#include " << f << "\n";
205         }
206     }
207 }
208 
209 /**
210  * Print additional functions required by the generated code
211  */
printAdditionalCode(ostream & fout)212 void Klass::printAdditionalCode(ostream& fout)
213 {
214     if (fNeedPowerDef) {
215         // Add faustpower definition to C++ code
216         fout << "#ifndef FAUSTPOWER" << endl;
217         fout << "#define FAUSTPOWER" << endl;
218         fout << "#include <cmath>" << endl;
219 
220         fout << "template <int N> inline int faustpower(int x)              { return faustpower<N/2>(x) * "
221                 "faustpower<N-N/2>(x); } "
222              << endl;
223         fout << "template <> 	 inline int faustpower<0>(int x)            { return 1; }" << endl;
224         fout << "template <> 	 inline int faustpower<1>(int x)            { return x; }" << endl;
225         fout << "template <> 	 inline int faustpower<2>(int x)            { return x*x; }" << endl;
226 
227         if (gGlobal->gFloatSize == 1) {
228             fout << "template <int N> inline float faustpower(float x)            { return faustpower<N/2>(x) * "
229                     "faustpower<N-N/2>(x); } "
230                  << endl;
231             fout << "template <> 	 inline float faustpower<0>(float x)          { return 1; }" << endl;
232             fout << "template <> 	 inline float faustpower<1>(float x)          { return x; }" << endl;
233             fout << "template <> 	 inline float faustpower<2>(float x)          { return x*x; }" << endl;
234 
235         } else if (gGlobal->gFloatSize == 2) {
236             fout << "template <int N> inline double faustpower(double x)          { return faustpower<N/2>(x) * "
237                     "faustpower<N-N/2>(x); } "
238                  << endl;
239             fout << "template <> 	 inline double faustpower<0>(double x)        { return 1; }" << endl;
240             fout << "template <> 	 inline double faustpower<1>(double x)        { return x; }" << endl;
241             fout << "template <> 	 inline double faustpower<2>(double x)        { return x*x; }" << endl;
242 
243         } else if (gGlobal->gFloatSize == 3) {
244             fout << "template <int N> inline long double faustpower(long double x){ return powl(x,N); }" << endl;
245         }
246         fout << "#endif" << endl;
247     }
248 }
249 
250 /**
251  * Print metadata declaration
252  */
printMetadata(int n,const MetaDataSet & S,ostream & fout)253 void Klass::printMetadata(int n, const MetaDataSet& S, ostream& fout)
254 {
255     tab(n, fout);
256     fout << "virtual void metadata(Meta* m) { ";
257 
258     // We do not want to accumulate metadata from all hierachical levels, so the upper level only is kept
259     for (const auto& i : gGlobal->gMetaDataSet) {
260         if (i.first != tree("author")) {
261             tab(n + 1, fout);
262             fout << "m->declare(\"" << *(i.first) << "\", " << **(i.second.begin()) << ");";
263         } else {
264             // But the "author" meta data is accumulated, the upper level becomes the main author and sub-levels become
265             // "contributor"
266             for (const auto& j : i.second) {
267                 if (j == *i.second.begin()) {
268                     tab(n + 1, fout);
269                     fout << "m->declare(\"" << *(i.first) << "\", " << *j << ");";
270                 } else {
271                     tab(n + 1, fout);
272                     fout << "m->declare(\""
273                          << "contributor"
274                          << "\", " << *j << ");";
275                 }
276             }
277         }
278     }
279 
280     tab(n, fout);
281     fout << "}" << endl;
282 }
283 
isElement(const set<Loop * > & S,Loop * l)284 inline bool isElement(const set<Loop*>& S, Loop* l)
285 {
286     return S.find(l) != S.end();
287 }
288 
289 /**
290  * Print a loop graph deep first
291  */
printLoopDeepFirst(int n,ostream & fout,Loop * l,set<Loop * > & visited)292 void Klass::printLoopDeepFirst(int n, ostream& fout, Loop* l, set<Loop*>& visited)
293 {
294     // avoid printing already printed loops
295     if (isElement(visited, l)) return;
296 
297     // remember we have printed this loop
298     visited.insert(l);
299 
300     // print the dependencies loops (that need to be computed before this one)
301     for (lset::const_iterator p = l->fBackwardLoopDependencies.begin(); p != l->fBackwardLoopDependencies.end(); p++) {
302         printLoopDeepFirst(n, fout, *p, visited);
303     }
304     // the print the loop itself
305     tab(n, fout);
306     tab(n, fout);
307     fout << "// LOOP " << l << ", ORDER " << l->fOrder << endl;
308     l->println(n + 1, fout);
309 }
310 
311 /**
312  * Compute how many time each loop is used in a DAG
313  */
computeUseCount(Loop * l)314 static void computeUseCount(Loop* l)
315 {
316     l->fUseCount++;
317     if (l->fUseCount == 1) {
318         for (lset::iterator p = l->fBackwardLoopDependencies.begin(); p != l->fBackwardLoopDependencies.end(); p++) {
319             computeUseCount(*p);
320         }
321     }
322 }
323 
324 /**
325  * Group together sequences of loops
326  */
groupSeqLoops(Loop * l,set<Loop * > & visited)327 static void groupSeqLoops(Loop* l, set<Loop*>& visited)
328 {
329     if (visited.find(l) == visited.end()) {
330         visited.insert(l);
331         int n = (int)l->fBackwardLoopDependencies.size();
332         if (n == 0) {
333             return;
334         } else if (n == 1) {
335             Loop* f = *(l->fBackwardLoopDependencies.begin());
336             if (f->fUseCount == 1) {
337                 l->concat(f);
338                 groupSeqLoops(l, visited);
339             } else {
340                 groupSeqLoops(f, visited);
341             }
342             return;
343         } else if (n > 1) {
344             for (lset::iterator p = l->fBackwardLoopDependencies.begin(); p != l->fBackwardLoopDependencies.end();
345                  p++) {
346                 groupSeqLoops(*p, visited);
347             }
348         }
349     }
350 }
351 
352 #define WORK_STEALING_INDEX 0
353 #define LAST_TASK_INDEX 1
354 #define START_TASK_INDEX LAST_TASK_INDEX + 1
355 
356 #define START_TASK_MAX 2
357 
buildTasksList()358 void Klass::buildTasksList()
359 {
360     lgraph G;
361 
362     if (gGlobal->gGroupTaskSwitch) {
363         computeUseCount(fTopLoop);
364         set<Loop*> visited;
365         groupSeqLoops(fTopLoop, visited);
366     }
367 
368     sortGraph(fTopLoop, G);
369     int index_task = START_TASK_INDEX;
370 
371     addDeclCode("TaskGraph fGraph;");
372     addDeclCode("FAUSTFLOAT** input;");
373     addDeclCode("FAUSTFLOAT** output;");
374     addDeclCode("volatile bool fIsFinished;");
375     addDeclCode("int fCount;");
376     addDeclCode("int fIndex;");
377     addDeclCode("DSPThreadPool* fThreadPool;");
378     addDeclCode("int fStaticNumThreads;");
379     addDeclCode("int fDynamicNumThreads;");
380 
381     // Compute forward dependencies
382     for (int l = (int)G.size() - 1; l >= 0; l--) {
383         for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
384             for (lset::const_iterator p1 = (*p)->fBackwardLoopDependencies.begin();
385                  p1 != (*p)->fBackwardLoopDependencies.end(); p1++) {
386                 (*p1)->fForwardLoopDependencies.insert((*p));
387             }
388             (*p)->fIndex = index_task;
389             index_task++;
390         }
391     }
392 
393     // Compute ready tasks list
394     vector<int> task_num;
395     for (int l = (int)G.size() - 1; l >= 0; l--) {
396         lset::const_iterator next;
397         for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
398             if ((*p)->fBackwardLoopDependencies.size() == 0) {
399                 task_num.push_back((*p)->fIndex);
400             }
401         }
402     }
403 
404     if (task_num.size() < START_TASK_MAX) {
405         // Push ready tasks thread 0, execute one task directly
406 
407         addZone3("if (cur_thread == 0) {");
408 
409         Loop* keep = NULL;
410         for (int l = (int)G.size() - 1; l >= 0; l--) {
411             lset::const_iterator next;
412             for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
413                 if ((*p)->fBackwardLoopDependencies.size() == 0) {
414                     if (keep == NULL) {
415                         keep = *p;
416                     } else {
417                         addZone3(subst("    taskqueue.PushHead($0);", T((*p)->fIndex)));
418                     }
419                 }
420             }
421         }
422 
423         if (keep != NULL) {
424             addZone3(subst("    tasknum = $0;", T(keep->fIndex)));
425         }
426 
427         addZone3("} else {");
428         addZone3("    tasknum = TaskQueue::GetNextTask(cur_thread, fDynamicNumThreads);");
429         addZone3("}");
430 
431     } else {
432         // Cut ready tasks list and have each thread (dynamically) use a subpart
433         addZone3(subst("int task_list_size = $0;", T((int)task_num.size())));
434         stringstream buf;
435         buf << "int task_list[" << task_num.size() << "] = {";
436         for (size_t i = 0; i < task_num.size(); i++) {
437             buf << task_num[i];
438             if (i != (task_num.size() - 1)) buf << ",";
439         }
440         buf << "};";
441 
442         addZone3(buf.str());
443         addZone3("taskqueue.InitTaskList(task_list_size, task_list, fDynamicNumThreads, cur_thread, tasknum);");
444     }
445 
446     // Last stage connected to end task
447     if (G[0].size() > 1) {
448         addZone2c("// Initialize end task, if more than one input");
449         addZone2c(subst("fGraph.InitTask($0,$1);", T(LAST_TASK_INDEX), T((int)G[0].size())));
450     } else {
451         addZone2c("// End task has only one input, so will be directly activated");
452     }
453 
454     // Compute init section
455     addZone2c("// Only initialize taks with more than one input");
456     for (int l = (int)G.size() - 1; l >= 0; l--) {
457         for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
458             if ((*p)->fBackwardLoopDependencies.size() > 1) {  // Only initialize taks with more than 1 input, since
459                                                                // taks with one input are "directly" activated.
460                 addZone2c(subst("fGraph.InitTask($0,$1);", T(START_TASK_INDEX + gTaskCount++),
461                                 T((int)(*p)->fBackwardLoopDependencies.size())));
462             } else {
463                 gTaskCount++;
464             }
465         }
466     }
467 
468     addInitCode("fStaticNumThreads = get_max_cpu();");
469     addInitCode(
470         "fDynamicNumThreads = getenv(\"OMP_NUM_THREADS\") ? atoi(getenv(\"OMP_NUM_THREADS\")) : fStaticNumThreads;");
471     addInitCode("fThreadPool->StartAll(fStaticNumThreads - 1, false);");
472 
473     gTaskCount = 0;
474 }
475 
476 /**
477  * Print the loop graph (used for vector code)
478  */
printLoopGraphVector(int n,ostream & fout)479 void Klass::printLoopGraphVector(int n, ostream& fout)
480 {
481     if (gGlobal->gGroupTaskSwitch) {
482         computeUseCount(fTopLoop);
483         set<Loop*> visited;
484         groupSeqLoops(fTopLoop, visited);
485     }
486 
487     lgraph G;
488     sortGraph(fTopLoop, G);
489 
490 #if 1
491     // EXPERIMENTAL
492     if (gGlobal->gVectorSwitch && gGlobal->gDeepFirstSwitch) {
493         set<Loop*> visited;
494         printLoopDeepFirst(n, fout, fTopLoop, visited);
495         return;
496     }
497 #endif
498 
499     // normal mode
500     for (int l = (int)G.size() - 1; l >= 0; l--) {
501         if (gGlobal->gVectorSwitch) {
502             tab(n, fout);
503             fout << "// Section : " << G.size() - l;
504         }
505         for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
506             (*p)->println(n, fout);
507         }
508     }
509 }
510 
511 /**
512  * Print the loop graph as a serie of parallel loops
513  */
printLoopGraphOpenMP(int n,ostream & fout)514 void Klass::printLoopGraphOpenMP(int n, ostream& fout)
515 {
516     if (gGlobal->gGroupTaskSwitch) {
517         computeUseCount(fTopLoop);
518         set<Loop*> visited;
519         groupSeqLoops(fTopLoop, visited);
520     }
521 
522     lgraph G;
523     sortGraph(fTopLoop, G);
524 
525     // OpenMP mode : add OpenMP directives
526     for (int l = (int)G.size() - 1; l >= 0; l--) {
527         tab(n, fout);
528         fout << "// Section : " << G.size() - l;
529         printLoopLevelOpenMP(n, (int)G.size() - l, G[l], fout);
530     }
531 }
532 
533 /**
534  * Print the loop graph as a serie of parallel loops
535  */
printLoopGraphScheduler(int n,ostream & fout)536 void Klass::printLoopGraphScheduler(int n, ostream& fout)
537 {
538     if (gGlobal->gGroupTaskSwitch) {
539         computeUseCount(fTopLoop);
540         set<Loop*> visited;
541         groupSeqLoops(fTopLoop, visited);
542     }
543 
544     lgraph G;
545     sortGraph(fTopLoop, G);
546 
547     // OpenMP mode : add OpenMP directives
548     for (int l = (int)G.size() - 1; l > 0; l--) {
549         tab(n, fout);
550         fout << "// Section : " << G.size() - l;
551         printLoopLevelScheduler(n, (int)G.size() - l, G[l], fout);
552     }
553 
554     printLastLoopLevelScheduler(n, (int)G.size(), G[0], fout);
555 }
556 
557 /**
558  * Print the loop graph in dot format
559  */
printGraphDotFormat(ostream & fout)560 void Klass::printGraphDotFormat(ostream& fout)
561 {
562     lgraph G;
563     sortGraph(fTopLoop, G);
564 
565     fout << "strict digraph loopgraph {" << endl;
566     fout << '\t' << "rankdir=LR;" << endl;
567     fout << '\t' << "node[color=blue, fillcolor=lightblue, style=filled, fontsize=9];" << endl;
568 
569     int lnum = 0;  // used for loop numbers
570     // for each level of the graph
571     for (int l = (int)G.size() - 1; l >= 0; l--) {
572         // for each task in the level
573         for (lset::const_iterator t = G[l].begin(); t != G[l].end(); t++) {
574             // print task label "Lxxx : 0xffffff"
575             fout << '\t' << 'L' << (*t) << "[label=<<font face=\"verdana,bold\">L" << lnum++ << "</font> : " << (*t)
576                  << ">];" << endl;
577             // for each source of the task
578             for (lset::const_iterator src = (*t)->fBackwardLoopDependencies.begin();
579                  src != (*t)->fBackwardLoopDependencies.end(); src++) {
580                 // print the connection Lxxx -> Lyyy;
581                 fout << '\t' << 'L' << (*src) << "->" << 'L' << (*t) << ';' << endl;
582             }
583         }
584     }
585     fout << "}" << endl;
586 }
587 
588 /**
589  * Print the loop graph (used for internals classes)
590  */
printLoopGraphInternal(int n,ostream & fout)591 void Klass::printLoopGraphInternal(int n, ostream& fout)
592 {
593     lgraph G;
594     sortGraph(fTopLoop, G);
595 
596     // normal mode
597     for (int l = (int)G.size() - 1; l >= 0; l--) {
598         if (gGlobal->gVectorSwitch) {
599             tab(n, fout);
600             fout << "// Section : " << G.size() - l;
601         }
602         for (lset::const_iterator p = G[l].begin(); p != G[l].end(); p++) {
603             (*p)->printoneln(n, fout);
604         }
605     }
606 }
607 
608 /**
609  * Print the loop graph (scalar mode)
610  */
printLoopGraphScalar(int n,ostream & fout)611 void Klass::printLoopGraphScalar(int n, ostream& fout)
612 {
613     fTopLoop->printoneln(n, fout);
614 }
615 
616 /**
617  * returns true if all the loops are non recursive
618  */
nonRecursiveLevel(const lset & L)619 static bool nonRecursiveLevel(const lset& L)
620 {
621     for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
622         if ((*p)->fIsRecursive) return false;
623     }
624     return true;
625 }
626 
627 /**
628  * Print the 'level' of the loop graph as a set of
629  * parallel loops
630  */
printLoopLevelOpenMP(int n,int lnum,const lset & L,ostream & fout)631 void Klass::printLoopLevelOpenMP(int n, int lnum, const lset& L, ostream& fout)
632 {
633     if (nonRecursiveLevel(L) && L.size() == 1) {
634         for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
635             if ((*p)->isEmpty() == false) {
636                 if (gGlobal->gOpenMPLoop) {
637                     (*p)->printParLoopln(n, fout);
638                 } else {
639                     tab(n, fout);
640                     fout << "#pragma omp single ";
641                     tab(n, fout);
642                     fout << "{ ";
643                     (*p)->println(n + 1, fout);
644                     tab(n, fout);
645                     fout << "} ";
646                 }
647             }
648         }
649 
650     } else if (L.size() > 1) {
651         tab(n, fout);
652         fout << "#pragma omp sections ";
653         tab(n, fout);
654         fout << "{ ";
655         for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
656             tab(n + 1, fout);
657             fout << "#pragma omp section ";
658             tab(n + 1, fout);
659             fout << "{";
660             (*p)->println(n + 2, fout);
661             tab(n + 1, fout);
662             fout << "} ";
663         }
664         tab(n, fout);
665         fout << "} ";
666     } else if (L.size() == 1 && !(*L.begin())->isEmpty()) {
667         tab(n, fout);
668         fout << "#pragma omp single ";
669         tab(n, fout);
670         fout << "{ ";
671         for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
672             (*p)->println(n + 1, fout);
673         }
674         tab(n, fout);
675         fout << "} ";
676     }
677 }
678 
679 /**
680  * Print the 'level' of the loop graph as a set of
681  * parallel loops
682  */
printLastLoopLevelScheduler(int n,int lnum,const lset & L,ostream & fout)683 void Klass::printLastLoopLevelScheduler(int n, int lnum, const lset& L, ostream& fout)
684 {
685     if (nonRecursiveLevel(L) && L.size() == 1 && !(*L.begin())->isEmpty()) {
686         lset::const_iterator p = L.begin();
687         tab(n, fout);
688         fout << "case " << gTaskCount++ << ": { ";
689         (*p)->println(n + 1, fout);
690         tab(n + 1, fout);
691         fout << "tasknum = LAST_TASK_INDEX;";
692         tab(n + 1, fout);
693         fout << "break;";
694         tab(n, fout);
695         fout << "} ";
696 
697     } else if (L.size() > 1) {
698         for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
699             tab(n, fout);
700             fout << "case " << gTaskCount++ << ": { ";
701             (*p)->println(n + 1, fout);
702             tab(n + 1, fout);
703             fout << "fGraph.ActivateOneOutputTask(taskqueue, LAST_TASK_INDEX, tasknum);";
704             tab(n + 1, fout);
705             fout << "break;";
706             tab(n, fout);
707             fout << "} ";
708         }
709 
710     } else if (L.size() == 1 && !(*L.begin())->isEmpty()) {
711         lset::const_iterator p = L.begin();
712         tab(n, fout);
713         fout << "case " << gTaskCount++ << ": { ";
714         (*p)->println(n + 1, fout);
715         tab(n + 1, fout);
716         fout << "tasknum = LAST_TASK_INDEX;";
717         tab(n + 1, fout);
718         fout << "break;";
719         tab(n, fout);
720         fout << "} ";
721     }
722 }
723 
printOneLoopScheduler(lset::const_iterator p,int n,ostream & fout)724 void Klass::printOneLoopScheduler(lset::const_iterator p, int n, ostream& fout)
725 {
726     tab(n, fout);
727     fout << "case " << gTaskCount++ << ": { ";
728     (*p)->println(n + 1, fout);
729 
730     // One output only
731     if ((*p)->fForwardLoopDependencies.size() == 1) {
732         lset::const_iterator p1 = (*p)->fForwardLoopDependencies.begin();
733         if ((*p1)->fBackwardLoopDependencies.size() == 1) {
734             tab(n + 1, fout);
735             fout << subst("tasknum = $0;", T((*p1)->fIndex));
736         } else {
737             tab(n + 1, fout);
738             fout << subst("fGraph.ActivateOneOutputTask(taskqueue, $0, tasknum);", T((*p1)->fIndex));
739         }
740 
741     } else {
742         Loop* keep = NULL;
743         // Find one output with only one backward dependencies
744         for (lset::const_iterator p1 = (*p)->fForwardLoopDependencies.begin();
745              p1 != (*p)->fForwardLoopDependencies.end(); p1++) {
746             if ((*p1)->fBackwardLoopDependencies.size() == 1) {
747                 keep = *p1;
748                 break;
749             }
750         }
751 
752         if (keep == NULL) {
753             tab(n + 1, fout);
754             fout << "tasknum = WORK_STEALING_INDEX;";
755         }
756 
757         for (lset::const_iterator p1 = (*p)->fForwardLoopDependencies.begin();
758              p1 != (*p)->fForwardLoopDependencies.end(); p1++) {
759             if ((*p1)->fBackwardLoopDependencies.size() == 1) {  // Task is the only input
760                 if (*p1 != keep) {
761                     tab(n + 1, fout);
762                     fout << subst("taskqueue.PushHead($0);", T((*p1)->fIndex));
763                 }
764             } else {
765                 if (keep == NULL) {
766                     tab(n + 1, fout);
767                     fout << subst("fGraph.ActivateOutputTask(taskqueue, $0, tasknum);", T((*p1)->fIndex));
768                 } else {
769                     tab(n + 1, fout);
770                     fout << subst("fGraph.ActivateOutputTask(taskqueue, $0);", T((*p1)->fIndex));
771                 }
772             }
773         }
774 
775         if (keep != NULL) {
776             tab(n + 1, fout);
777             fout << subst("tasknum = $0;", T(keep->fIndex));  // Last one
778         } else {
779             tab(n + 1, fout);
780             fout << "fGraph.GetReadyTask(taskqueue, tasknum);";  // Last one
781         }
782     }
783 
784     tab(n + 1, fout);
785     fout << "break;";
786     tab(n, fout);
787     fout << "} ";
788 }
789 
790 /**
791  * Print the 'level' of the loop graph as a set of
792  * parallel loops
793  */
794 
printLoopLevelScheduler(int n,int lnum,const lset & L,ostream & fout)795 void Klass::printLoopLevelScheduler(int n, int lnum, const lset& L, ostream& fout)
796 {
797     if (nonRecursiveLevel(L) && L.size() == 1 && !(*L.begin())->isEmpty()) {
798         printOneLoopScheduler(L.begin(), n, fout);
799     } else if (L.size() > 1) {
800         for (lset::const_iterator p = L.begin(); p != L.end(); p++) {
801             printOneLoopScheduler(p, n, fout);
802         }
803     } else if (L.size() == 1 && !(*L.begin())->isEmpty()) {
804         printOneLoopScheduler(L.begin(), n, fout);
805     }
806 }
807 
808 /**
809  * Print a full C++ class corresponding to a Faust dsp
810  */
println(int n,ostream & fout)811 void Klass::println(int n, ostream& fout)
812 {
813     list<Klass*>::iterator k;
814 
815     tab(n, fout);
816     fout << "#ifndef FAUSTCLASS " << endl;
817     fout << "#define FAUSTCLASS " << fKlassName << endl;
818     fout << "#endif" << endl;
819 
820     if (gGlobal->gSchedulerSwitch) {
821         tab(n, fout);
822         fout << "class " << fKlassName << " : public " << fSuperKlassName << ", public Runnable {";
823     } else {
824         tab(n, fout);
825         fout << "class " << fKlassName << " : public " << fSuperKlassName << " {";
826     }
827 
828     if (gGlobal->gUIMacroSwitch) {
829         tab(n, fout);
830         fout << "  public:";
831     } else {
832         tab(n, fout);
833         fout << "  private:";
834     }
835 
836     for (k = fSubClassList.begin(); k != fSubClassList.end(); k++) (*k)->println(n + 1, fout);
837 
838     printlines(n + 1, fDeclCode, fout);
839 
840     tab(n + 1, fout);
841     fout << "int fSampleRate;\n";
842 
843     tab(n, fout);
844     fout << "  public:";
845 
846     if (gGlobal->gMemoryManager) {
847         tab(n + 1, fout);
848         fout << "static dsp_memory_manager* fManager;" << endl;
849     }
850 
851     printMetadata(n + 1, gGlobal->gMetaDataSet, fout);
852 
853     if (gGlobal->gSchedulerSwitch) {
854         tab(n + 1, fout);
855         fout << fKlassName << "() { "
856              << "fThreadPool = DSPThreadPool::Init(); }";
857 
858         tab(n + 1, fout);
859         fout << "virtual ~" << fKlassName << "() { "
860              << "DSPThreadPool::Destroy(); }";
861     }
862 
863     tab(n + 1, fout);
864     fout << "virtual int getNumInputs() { "
865          << "return " << fNumInputs << "; }";
866 
867     tab(n + 1, fout);
868     fout << "virtual int getNumOutputs() { "
869          << "return " << fNumOutputs << "; }";
870 
871     tab(n + 1, fout);
872     fout << "static void classInit(int sample_rate) {";
873     printlines(n + 2, fStaticInitCode, fout);
874     tab(n + 1, fout);
875     fout << "}";
876 
877     if (gGlobal->gMemoryManager) {
878         tab(n + 1, fout);
879         fout << "static void classDestroy() {";
880         printlines(n + 2, fStaticDestroyCode, fout);
881         tab(n + 1, fout);
882         fout << "}";
883     }
884 
885     tab(n + 1, fout);
886     fout << "virtual void instanceConstants(int sample_rate) {";
887     tab(n + 2, fout);
888     fout << "fSampleRate = sample_rate;";
889     printlines(n + 2, fInitCode, fout);
890     tab(n + 1, fout);
891     fout << "}";
892 
893     tab(n + 1, fout);
894     fout << "virtual void instanceResetUserInterface() {";
895     printlines(n + 2, fInitUICode, fout);
896     tab(n + 1, fout);
897     fout << "}";
898 
899     tab(n + 1, fout);
900     fout << "virtual void instanceClear() {";
901     printlines(n + 2, fClearCode, fout);
902     tab(n + 1, fout);
903     fout << "}";
904 
905     if (gGlobal->gMemoryManager) {
906         tab(n + 1, fout);
907         fout << "virtual void init(int sample_rate) {}";
908     } else {
909         tab(n + 1, fout);
910         fout << "virtual void init(int sample_rate) {";
911         tab(n + 2, fout);
912         fout << "classInit(sample_rate);";
913         tab(n + 2, fout);
914         fout << "instanceInit(sample_rate);";
915         tab(n + 1, fout);
916         fout << "}";
917     }
918 
919     tab(n + 1, fout);
920     fout << "virtual void instanceInit(int sample_rate) {";
921     tab(n + 2, fout);
922     fout << "instanceConstants(sample_rate);";
923     tab(n + 2, fout);
924     fout << "instanceResetUserInterface();";
925     tab(n + 2, fout);
926     fout << "instanceClear();";
927     tab(n + 1, fout);
928     fout << "}";
929 
930     tab(n + 1, fout);
931     fout << "virtual " << fKlassName << "* clone() {";
932     tab(n + 2, fout);
933     fout << "return new " << fKlassName << "();";
934     tab(n + 1, fout);
935     fout << "}";
936 
937     tab(n + 1, fout);
938     fout << "virtual int getSampleRate() {";
939     tab(n + 2, fout);
940     fout << "return fSampleRate;";
941     tab(n + 1, fout);
942     fout << "}";
943 
944     tab(n + 1, fout);
945     fout << "virtual void buildUserInterface(UI* ui_interface) {";
946     printlines(n + 2, fUICode, fout);
947     tab(n + 1, fout);
948     fout << "}";
949 
950     printComputeMethod(n, fout);
951 
952     tab(n, fout);
953     fout << "};\n" << endl;
954 
955     printlines(n, fStaticFields, fout);
956 
957     if (gGlobal->gMemoryManager) {
958         tab(n, fout);
959         fout << "dsp_memory_manager* " << fKlassName << "::fManager = 0;" << endl;
960     }
961 
962     // generate user interface macros if needed
963     if (gGlobal->gUIMacroSwitch) {
964         tab(n, fout);
965         fout << "#ifdef FAUST_UIMACROS";
966         tab(n + 1, fout);
967         tab(n + 1, fout);
968         for (const auto& it : gGlobal->gMetaDataSet) {
969             if (it.first == tree("filename")) {
970                 fout << "#define FAUST_FILE_NAME " << **(it.second.begin());
971                 break;
972             }
973         }
974         tab(n + 1, fout);
975         fout << "#define FAUST_CLASS_NAME " << "\"" << fKlassName << "\"";
976         tab(n + 1, fout);
977         fout << "#define FAUST_INPUTS " << fNumInputs;
978         tab(n + 1, fout);
979         fout << "#define FAUST_OUTPUTS " << fNumOutputs;
980         tab(n + 1, fout);
981         fout << "#define FAUST_ACTIVES " << fNumActives;
982         tab(n + 1, fout);
983         fout << "#define FAUST_PASSIVES " << fNumPassives;
984         printlines(n + 1, fUIMacro, fout);
985         tab(n, fout);
986         fout << "#endif" << endl;
987     }
988 
989     fout << endl;
990 }
991 
992 /**
993  * Print Compute() method according to the various switch
994  */
printComputeMethod(int n,ostream & fout)995 void Klass::printComputeMethod(int n, ostream& fout)
996 {
997     if (gGlobal->gSchedulerSwitch) {
998         printComputeMethodScheduler(n, fout);
999     } else if (gGlobal->gOpenMPSwitch) {
1000         printComputeMethodOpenMP(n, fout);
1001     } else if (gGlobal->gVectorSwitch) {
1002         switch (gGlobal->gVectorLoopVariant) {
1003             case 0:
1004                 printComputeMethodVectorFaster(n, fout);
1005                 break;
1006             case 1:
1007                 printComputeMethodVectorSimple(n, fout);
1008                 break;
1009             default: {
1010                 stringstream error;
1011                 error << "ERROR : unknown loop variant" << gGlobal->gVectorLoopVariant << endl;
1012                 throw faustexception(error.str());
1013             }
1014         }
1015     } else {
1016         printComputeMethodScalar(n, fout);
1017     }
1018 }
1019 
printComputeMethodScalar(int n,ostream & fout)1020 void Klass::printComputeMethodScalar(int n, ostream& fout)
1021 {
1022     tab(n + 1, fout);
1023     fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1024     tab(n + 2, fout);
1025     fout << "//zone1";
1026     printlines(n + 2, fZone1Code, fout);
1027     tab(n + 2, fout);
1028     fout << "//zone2";
1029     printlines(n + 2, fZone2Code, fout);
1030     tab(n + 2, fout);
1031     fout << "//zone2b";
1032     printlines(n + 2, fZone2bCode, fout);
1033     tab(n + 2, fout);
1034     fout << "//zone3";
1035     printlines(n + 2, fZone3Code, fout);
1036     tab(n + 2, fout);
1037     fout << "//LoopGraphScalar";
1038     printLoopGraphScalar(n + 2, fout);
1039     printlines(n + 2, fZone4Code, fout);
1040     tab(n + 1, fout);
1041     fout << "}";
1042 }
1043 
1044 /**
1045  * Uses loops of constant gVecSize boundary in order to provide the
1046  * C compiler with more optimisation opportunities. Improves performances
1047  * in general, but not always
1048  */
printComputeMethodVectorFaster(int n,ostream & fout)1049 void Klass::printComputeMethodVectorFaster(int n, ostream& fout)
1050 {
1051     // in vector mode we need to split loops in smaller pieces not larger
1052     // than gVecSize
1053     tab(n + 1, fout);
1054     fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1055     printlines(n + 2, fZone1Code, fout);
1056     printlines(n + 2, fZone2Code, fout);
1057     printlines(n + 2, fZone2bCode, fout);
1058 
1059     tab(n + 2, fout);
1060     fout << "int index;";
1061     tab(n + 2, fout);
1062     fout << "int fullcount = count;";
1063 
1064     tab(n + 2, fout);
1065     fout << "for (index = 0; index <= fullcount - " << gGlobal->gVecSize << "; index += " << gGlobal->gVecSize << ") {";
1066     tab(n + 3, fout);
1067     fout << "// compute by blocks of " << gGlobal->gVecSize << " samples";
1068     tab(n + 3, fout);
1069     fout << "const int count = " << gGlobal->gVecSize << ";";
1070     printlines(n + 3, fZone3Code, fout);
1071     printLoopGraphVector(n + 3, fout);
1072     tab(n + 2, fout);
1073     fout << "}";
1074 
1075     tab(n + 2, fout);
1076     fout << "if (index < fullcount) {";
1077     tab(n + 3, fout);
1078     fout << "// compute the remaining samples if any";
1079     tab(n + 3, fout);
1080     fout << "int count = fullcount-index;";
1081     printlines(n + 3, fZone3Code, fout);
1082     printLoopGraphVector(n + 3, fout);
1083     tab(n + 2, fout);
1084     fout << "}";
1085 
1086     printlines(n + 2, fZone4Code, fout);
1087     tab(n + 1, fout);
1088     fout << "}";
1089 }
1090 
1091 /**
1092  * Simple loop layout, generally less efficient than printComputeMethodVectorFaster
1093  */
printComputeMethodVectorSimple(int n,ostream & fout)1094 void Klass::printComputeMethodVectorSimple(int n, ostream& fout)
1095 {
1096     // in vector mode we need to split loops in smaller pieces not larger
1097     // than gVecSize
1098     tab(n + 1, fout);
1099     fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1100     printlines(n + 2, fZone1Code, fout);
1101     printlines(n + 2, fZone2Code, fout);
1102     printlines(n + 2, fZone2bCode, fout);
1103 
1104     tab(n + 2, fout);
1105     fout << "int fullcount = count;";
1106     tab(n + 2, fout);
1107     fout << "for (int index = 0; index < fullcount; index += " << gGlobal->gVecSize << ") {";
1108     tab(n + 3, fout);
1109     fout << "int count = min(" << gGlobal->gVecSize << ", fullcount-index);";
1110     printlines(n + 3, fZone3Code, fout);
1111     printLoopGraphVector(n + 3, fout);
1112     tab(n + 2, fout);
1113     fout << "}";
1114 
1115     printlines(n + 2, fZone4Code, fout);
1116     tab(n + 1, fout);
1117     fout << "}";
1118 }
1119 
1120 /*
1121 void Klass::printComputeMethodVectorFix0 (int n, ostream& fout)
1122 {
1123     // in vector mode we need to split loops in smaller pieces not larger
1124     // than gVecSize
1125     tab(n+1,fout); fout << "virtual void compute (int fullcount, float** input, float** output) {";
1126         printlines(n+2, fZone1Code, fout);
1127         printlines(n+2, fZone2Code, fout);
1128         printlines(n+2, fZone2bCode, fout);
1129         tab(n+2,fout); fout << "for (int index = 0; index < fullcount; index += " << gVecSize << ") {";
1130             tab(n+3,fout); fout << "if (fullcount >= index + " << gVecSize << ") {";
1131                 tab(n+4,fout); fout << "// compute by blocks of " << gVecSize << " samples";
1132                 tab(n+4,fout); fout << "const int count = " << gVecSize << ";"; // temporaire
1133                 printlines(n+4, fZone3Code, fout);
1134                 printLoopGraph (n+4,fout);
1135             tab(n+3,fout); fout << "} else if (fullcount > index) {";
1136                 //tab(n+3,fout); fout << "int count = min ("<< gVecSize << ", fullcount-index);";
1137                 tab(n+4,fout); fout << "// compute the remaining samples";
1138                 tab(n+4,fout); fout << "int count = fullcount-index;" ;
1139                 printlines(n+4, fZone3Code, fout);
1140                 printLoopGraph (n+4,fout);
1141             tab(n+3,fout); fout << "}";
1142         tab(n+2,fout); fout << "}";
1143     tab(n+1,fout); fout << "}";
1144 }
1145 
1146 void Klass::printComputeMethodVectorFix1 (int n, ostream& fout)
1147 {
1148     // in vector mode we need to split loops in smaller pieces not larger
1149     // than gVecSize
1150     tab(n+1,fout); fout << "virtual void compute (int fullcount, float** input, float** output) {";
1151         printlines(n+2, fZone1Code, fout);
1152         printlines(n+2, fZone2Code, fout);
1153         printlines(n+2, fZone2bCode, fout);
1154 
1155         tab(n+2,fout); fout << "int \tblock;";
1156         tab(n+2,fout); fout << "for (block = 0; block < fullcount/" << gVecSize << "; block++) {";
1157             tab(n+3,fout); fout << "// compute by blocks of " << gVecSize << " samples";
1158             tab(n+3,fout); fout << "const int index = block*" << gVecSize << ";";
1159             tab(n+3,fout); fout << "const int count = " << gVecSize << ";"; // temporaire
1160             printlines(n+3, fZone3Code, fout);
1161             printLoopGraph (n+3,fout);
1162         tab(n+2,fout); fout << "}";
1163 
1164         tab(n+2,fout); fout << "if (fullcount%" << gVecSize << " != 0) {";
1165             //tab(n+3,fout); fout << "int count = min ("<< gVecSize << ", fullcount-index);";
1166             tab(n+3,fout); fout << "// compute the remaining samples";
1167             tab(n+3,fout); fout << "const int index = block*" << gVecSize << ";";
1168             tab(n+3,fout); fout << "int count = fullcount%" << gVecSize << ";" ;
1169             printlines(n+3, fZone3Code, fout);
1170             printLoopGraph (n+3,fout);
1171         tab(n+2,fout); fout << "}";
1172     tab(n+1,fout); fout << "}";
1173 }*/
1174 
printComputeMethodOpenMP(int n,ostream & fout)1175 void Klass::printComputeMethodOpenMP(int n, ostream& fout)
1176 {
1177     // in openMP mode we need to split loops in smaller pieces not larger
1178     // than gVecSize and add OpenMP pragmas
1179     tab(n + 1, fout);
1180     fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1181     printlines(n + 2, fZone1Code, fout);
1182     printlines(n + 2, fZone2Code, fout);
1183     tab(n + 2, fout);
1184     fout << "int fullcount = count;";
1185     tab(n + 2, fout);
1186     fout << "#pragma omp parallel";
1187     printdecllist(n + 3, "firstprivate", fFirstPrivateDecl, fout);
1188 
1189     tab(n + 2, fout);
1190     fout << "{";
1191     if (!fZone2bCode.empty()) {
1192         tab(n + 3, fout);
1193         fout << "#pragma omp single";
1194         tab(n + 3, fout);
1195         fout << "{";
1196         printlines(n + 4, fZone2bCode, fout);
1197         tab(n + 3, fout);
1198         fout << "}";
1199     }
1200 
1201     tab(n + 3, fout);
1202     fout << "for (int index = 0; index < fullcount; index += " << gGlobal->gVecSize << ") {";
1203     tab(n + 4, fout);
1204     fout << "int count = min (" << gGlobal->gVecSize << ", fullcount-index);";
1205 
1206     printlines(n + 4, fZone3Code, fout);
1207     printLoopGraphOpenMP(n + 4, fout);
1208 
1209     tab(n + 3, fout);
1210     fout << "}";
1211 
1212     tab(n + 2, fout);
1213     fout << "}";
1214 
1215     printlines(n + 2, fZone4Code, fout);
1216     tab(n + 1, fout);
1217     fout << "}";
1218 }
1219 
1220 /*
1221 void Klass::printComputeMethodScheduler (int n, ostream& fout)
1222 {
1223     tab(n+1,fout); fout << subst("virtual void compute (int fullcount, $0** input, $0** output) {", xfloat());
1224         printlines(n+2, fZone1Code, fout);
1225         printlines(n+2, fZone2Code, fout);
1226 
1227         // Init input and output
1228         tab(n+2,fout); fout << "// Init input and output";
1229         printlines(n+2, fZone3aCode, fout);
1230         printlines(n+2, fZone3bCode, fout);
1231 
1232         tab(n+2,fout); fout << "// Init graph state";
1233         tab(n+2,fout); fout << "initState(fTasksList);";
1234         tab(n+2,fout); fout << "bool is_finished = false;";
1235         tab(n+2,fout); fout << "unsigned int index_in = 0;";
1236         tab(n+2,fout); fout << "unsigned int index_out = 0;";
1237         tab(n+2,fout); fout << "int count = min ("<< gVecSize << ", fullcount);";
1238 
1239         tab(n+2,fout); fout << "InitSchedulingMap();";
1240         tab(n+2,fout); fout << "#pragma omp parallel";
1241         printdecllist(n+3, "firstprivate", fFirstPrivateDecl, fout);
1242 
1243         tab(n+2,fout); fout << "{";
1244             tab(n+3,fout); fout << "while (!is_finished) {";
1245                 tab(n+4,fout); fout << "Task* task = searchTaskToAcquire(fTasksList);";
1246                 tab(n+4,fout); fout << "if (task != NULL) {";
1247                     tab(n+5,fout); fout << "bool last_cycle_for_thread = false;";
1248                     tab(n+5,fout); fout << "do {";
1249                         tab(n+6,fout); fout << "AddTaskToScheduling(task);";
1250                         tab(n+6,fout); fout << "switch (task->fNum) {";
1251 
1252                             // DSP tasks
1253                             printLoopGraph (n+7,fout);
1254 
1255                             // Input task
1256                             tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1257                             printlines(n+8, fZone6Code, fout);
1258                             tab(n+8, fout); fout << "index_in += count;";
1259                             tab(n+8, fout); fout << "last_cycle_for_thread = (index_in > fullcount);";
1260                             tab(n+8, fout); fout << "break;";
1261                             tab(n+7, fout); fout << "} ";
1262 
1263                             // Output task
1264                             tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1265                             printlines(n+8, fZone7Code, fout);
1266                             tab(n+8, fout); fout << "index_out += count;";
1267                             tab(n+8, fout); fout << "last_cycle_for_thread = (index_out > fullcount);";
1268                             tab(n+8, fout); fout << "break;";
1269                             tab(n+7, fout); fout << "} ";
1270 
1271                             // End task
1272                             tab(n+7, fout); fout << "case " << gTaskCount++ << ": { ";
1273                             tab(n+8, fout); fout << "is_finished = ((index_in >= fullcount) && (index_out >=
1274 fullcount));"; tab(n+8, fout); fout << "break;"; tab(n+7, fout); fout << "} ";
1275 
1276                         tab(n+6,fout); fout << "}";
1277                         tab(n+6,fout); fout << "if (last_cycle_for_thread) break;";
1278 
1279                     tab(n+5,fout); fout << "} while ((task = task->concludeAndTryToAcquireNext()) != NULL);";
1280                 tab(n+4,fout); fout << "}";
1281             tab(n+3,fout); fout << "}";
1282         tab(n+2,fout); fout << "}";
1283         tab(n+2,fout); fout << "PrintSchedulingMap();";
1284     tab(n+1,fout); fout << "}";
1285 }
1286 */
1287 
printComputeMethodScheduler(int n,ostream & fout)1288 void Klass::printComputeMethodScheduler(int n, ostream& fout)
1289 {
1290     tab(n + 1, fout);
1291     fout << "void display() {";
1292     tab(n + 2, fout);
1293     fout << "fGraph.Display();";
1294     tab(n + 1, fout);
1295     fout << "}";
1296 
1297     tab(n + 1, fout);
1298     fout << subst("virtual void compute (int count, $0** input, $0** output) {", xfloat());
1299 
1300     tab(n + 2, fout);
1301     fout << "GetRealTime();";
1302 
1303     tab(n + 2, fout);
1304     fout << "this->input = input;";
1305     tab(n + 2, fout);
1306     fout << "this->output = output;";
1307 
1308     tab(n + 2, fout);
1309     fout << "StartMeasure();";
1310 
1311     tab(n + 2, fout);
1312     fout << "int fullcount = count;";
1313 
1314     tab(n + 2, fout);
1315     fout << "for (fIndex = 0; fIndex < fullcount; fIndex += " << gGlobal->gVecSize << ") {";
1316 
1317     tab(n + 3, fout);
1318     fout << "fCount = min (" << gGlobal->gVecSize << ", fullcount-fIndex);";
1319     tab(n + 3, fout);
1320     fout << "TaskQueue::Init();";
1321     printlines(n + 3, fZone2cCode, fout);
1322 
1323     tab(n + 3, fout);
1324     fout << "fIsFinished = false;";
1325     tab(n + 3, fout);
1326     fout << "fThreadPool->SignalAll(fDynamicNumThreads - 1, this);";
1327     tab(n + 3, fout);
1328     fout << "computeThread(0);";
1329     tab(n + 3, fout);
1330     fout << "while (!fThreadPool->IsFinished()) {}";
1331 
1332     tab(n + 2, fout);
1333     fout << "}";
1334 
1335     tab(n + 2, fout);
1336     fout << "StopMeasure(fStaticNumThreads, fDynamicNumThreads);";
1337 
1338     tab(n + 1, fout);
1339     fout << "}";
1340 
1341     tab(n + 1, fout);
1342     fout << "void computeThread(int cur_thread) {";
1343 
1344     tab(n + 2, fout);
1345     fout << "int count = fCount;";
1346 
1347     printlines(n + 2, fZone1Code, fout);
1348     printlines(n + 2, fZone2Code, fout);
1349 
1350     tab(n + 2, fout);
1351     fout << "// Init graph state";
1352 
1353     tab(n + 2, fout);
1354     fout << "{";
1355     tab(n + 3, fout);
1356     fout << "TaskQueue taskqueue(cur_thread);";
1357     tab(n + 3, fout);
1358     fout << "int tasknum = -1;";
1359 
1360     // Init input and output
1361     tab(n + 3, fout);
1362     fout << "// Init input and output";
1363     printlines(n + 3, fZone3Code, fout);
1364 
1365     tab(n + 3, fout);
1366     fout << "while (!fIsFinished) {";
1367     tab(n + 4, fout);
1368     fout << "switch (tasknum) {";
1369 
1370     // Work stealing task
1371     tab(n + 5, fout);
1372     fout << "case WORK_STEALING_INDEX: { ";
1373     tab(n + 6, fout);
1374     fout << "tasknum = TaskQueue::GetNextTask(cur_thread, fDynamicNumThreads);";
1375     tab(n + 6, fout);
1376     fout << "break;";
1377     tab(n + 5, fout);
1378     fout << "} ";
1379 
1380     // End task
1381     tab(n + 5, fout);
1382     fout << "case LAST_TASK_INDEX: { ";
1383     tab(n + 6, fout);
1384     fout << "fIsFinished = true;";
1385     tab(n + 6, fout);
1386     fout << "break;";
1387     tab(n + 5, fout);
1388     fout << "} ";
1389 
1390     gTaskCount = START_TASK_INDEX;
1391 
1392     // DSP tasks
1393     printLoopGraphScheduler(n + 5, fout);
1394 
1395     tab(n + 4, fout);
1396     fout << "}";
1397     tab(n + 3, fout);
1398     fout << "}";
1399     tab(n + 2, fout);
1400     fout << "}";
1401     tab(n + 1, fout);
1402     fout << "}";
1403 }
1404 
1405 /**
1406  * Print an auxillary C++ class corresponding to an integer init signal
1407  */
println(int n,ostream & fout)1408 void SigIntGenKlass::println(int n, ostream& fout)
1409 {
1410     tab(n, fout);
1411     fout << "class " << fKlassName << " {";
1412 
1413     tab(n, fout);
1414     fout << "  private:";
1415     tab(n + 1, fout);
1416     fout << "int fSampleRate;";
1417 
1418     for (const auto& k : fSubClassList) k->println(n + 1, fout);
1419 
1420     printlines(n + 1, fDeclCode, fout);
1421 
1422     tab(n, fout);
1423     fout << "  public:";
1424 
1425     tab(n + 1, fout);
1426     fout << "int getNumInputs() { "
1427          << "return " << fNumInputs << "; }";
1428     tab(n + 1, fout);
1429     fout << "int getNumOutputs() { "
1430          << "return " << fNumOutputs << "; }";
1431 
1432     tab(n + 1, fout);
1433     fout << "void init(int sample_rate) {";
1434     tab(n + 2, fout);
1435     fout << "fSampleRate = sample_rate;";
1436     printlines(n + 2, fInitCode, fout);
1437     printlines(n + 2, fClearCode, fout);
1438     tab(n + 1, fout);
1439     fout << "}";
1440 
1441     tab(n + 1, fout);
1442     fout << "void fill(int count, int output[]) {";
1443     printlines(n + 2, fZone1Code, fout);
1444     printlines(n + 2, fZone2Code, fout);
1445     printlines(n + 2, fZone2bCode, fout);
1446     printlines(n + 2, fZone3Code, fout);
1447     printLoopGraphInternal(n + 2, fout);
1448     tab(n + 1, fout);
1449     fout << "}";
1450 
1451     tab(n, fout);
1452     fout << "};\n" << endl;
1453 }
1454 
1455 /**
1456  * Print an auxillary C++ class corresponding to an float init signal
1457  */
println(int n,ostream & fout)1458 void SigFloatGenKlass::println(int n, ostream& fout)
1459 {
1460     tab(n, fout);
1461     fout << "class " << fKlassName << " {";
1462 
1463     tab(n, fout);
1464     fout << "  private:";
1465     tab(n + 1, fout);
1466     fout << "int fSampleRate;";
1467 
1468     for (const auto& k : fSubClassList) k->println(n + 1, fout);
1469 
1470     printlines(n + 1, fDeclCode, fout);
1471 
1472     tab(n, fout);
1473     fout << "  public:";
1474 
1475     tab(n + 1, fout);
1476     fout << "int getNumInputs() { "
1477          << "return " << fNumInputs << "; }";
1478     tab(n + 1, fout);
1479     fout << "int getNumOutputs() { "
1480          << "return " << fNumOutputs << "; }";
1481 
1482     tab(n + 1, fout);
1483     fout << "void init(int sample_rate) {";
1484     tab(n + 2, fout);
1485     fout << "fSampleRate = sample_rate;";
1486     printlines(n + 2, fInitCode, fout);
1487     printlines(n + 2, fClearCode, fout);
1488     tab(n + 1, fout);
1489     fout << "}";
1490 
1491     tab(n + 1, fout);
1492     fout << subst("void fill(int count, $0 output[]) {", ifloat());
1493     printlines(n + 2, fZone1Code, fout);
1494     printlines(n + 2, fZone2Code, fout);
1495     printlines(n + 2, fZone2bCode, fout);
1496     printlines(n + 2, fZone3Code, fout);
1497     printLoopGraphInternal(n + 2, fout);
1498     tab(n + 1, fout);
1499     fout << "}";
1500 
1501     tab(n, fout);
1502     fout << "};\n" << endl;
1503 }
1504 
merge(set<string> & dst,set<string> & src)1505 static void merge(set<string>& dst, set<string>& src)
1506 {
1507     for (const auto& i : src) dst.insert(i);
1508 }
1509 
collectIncludeFile(set<string> & S)1510 void Klass::collectIncludeFile(set<string>& S)
1511 {
1512     for (const auto& k : fSubClassList) k->collectIncludeFile(S);
1513     merge(S, fIncludeFileSet);
1514 }
1515 
collectLibrary(set<string> & S)1516 void Klass::collectLibrary(set<string>& S)
1517 {
1518     for (const auto& k : fSubClassList) k->collectLibrary(S);
1519     merge(S, fLibrarySet);
1520 }
1521