1 /*
2 
3  HyPhy - Hypothesis Testing Using Phylogenies.
4 
5  Copyright (C) 1997-now
6  Core Developers:
7  Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8  Art FY Poon    (apoon42@uwo.ca)
9  Steven Weaver (sweaver@temple.edu)
10 
11  Module Developers:
12  Lance Hepler (nlhepler@gmail.com)
13  Martin Smith (martin.audacis@gmail.com)
14 
15  Significant contributions from:
16  Spencer V Muse (muse@stat.ncsu.edu)
17  Simon DW Frost (sdf22@cam.ac.uk)
18 
19  Permission is hereby granted, free of charge, to any person obtaining a
20  copy of this software and associated documentation files (the
21  "Software"), to deal in the Software without restriction, including
22  without limitation the rights to use, copy, modify, merge, publish,
23  distribute, sublicense, and/or sell copies of the Software, and to
24  permit persons to whom the Software is furnished to do so, subject to
25  the following conditions:
26 
27  The above copyright notice and this permission notice shall be included
28  in all copies or substantial portions of the Software.
29 
30  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 
38  */
39 
40 
41 #if not defined __AFYP_REWRITE_BGM__
42 
43 #ifdef __HYPHYQT__
44     #include "hyphymain.h"
45 #endif
46 
47 #if defined __MAC__ || defined __WINDOZE__ || defined __HYPHY_GTK__
48     #include "HYConsoleWindow.h"
49     #include "HYDialogs.h"
50 
51 #endif
52 
53 
54 //#define       __AFYP_DEVELOPMENT__
55 #define     __MISSING_DATA__
56 
57 
58 #include "bgm.h"
59 
60 _String     _HYBgm_BAN_PARENT_KEY   ("BanParent"),
61             _HYBgm_BAN_CHILD_KEY  ("BanChild"),
62             _HYBgm_ENFORCE_PARENT_KEY ("EnforceParent"),
63             _HYBgm_ENFORCE_CHILD_KEY  ("EnforceChild"),
64 
65             _HYBgm_NODE_INDEX_KEY ("NodeID"),
66             _HYBgm_PRIOR_SIZE_KEY ("PriorSize"),
67             _HYBgm_MAX_PARENT_KEY ("MaxParents"),
68 
69             _HYBgm_PRIOR_MEAN_KEY ("PriorMean"),      /* for continuous (Gaussian) nodes */
70             _HYBgm_PRIOR_PREC_KEY ("PriorVar"),
71 
72             /*SLKP 20070926; add string constants for progress report updates */
73             _HYBgm_STATUS_LINE_MCMC           ("Running Bgm MCMC"),
74             _HYBgm_STATUS_LINE_MCMC_DONE  ("Finished Bgm MCMC"),
75             _HYBgm_STATUS_LINE_CACHE      ("Caching Bgm scores"),
76             _HYBgm_STATUS_LINE_CACHE_DONE ("Done caching Bgm scores"),
77             /*SLKP*/
78 
79             _HYBgm_MPI_CACHING ("USE_MPI_CACHING"),
80 
81             /* maxParentString ("Bgm_MAXIMUM_PARENTS"), */
82             maxNumRestart ("BGM_NUM_RESTARTS"),
83             numRandomize  ("BGM_NUM_RANDOMIZE"),
84             useNodeOrder  ("BGM_USE_NODEORDER"),
85             bgmOptimizationMethod ("BGM_OPTIMIZATION_METHOD"),
86 
87             useMPIcaching ("USE_MPI_CACHING"),
88 
89             mcmcNumChains ("BGM_MCMC_NCHAINS"),       // re-use parameter
90             mcmcTemperature ("BGM_MCMC_TEMPERATURE"),
91             mcmcSteps     ("BGM_MCMC_DURATION"),
92             mcmcBurnin        ("BGM_MCMC_BURNIN"),
93             mcmcSamples       ("BGM_MCMC_SAMPLES"),
94 
95             mcemMaxSteps  ("BGM_MCEM_MAXSTEPS"),
96             mcemBurnin        ("BGM_MCEM_BURNIN"),
97             mcemSampleSize    ("BGM_MCEM_SAMPLES");
98 
99 
100 #ifdef      __UNIX__
101 
102 void        ConsoleBGMStatus (_String, hyFloat, _String * fileName);
103 
104 //__________________________________________________________________________________
105 
ConsoleBGMStatus(_String statusLine,hyFloat percentDone,_String * fileName=nil)106 void        ConsoleBGMStatus (_String statusLine, hyFloat percentDone, _String * fileName = nil)
107 {
108     FILE           *outFile = fileName?doFileOpen (fileName->sData,"w"):nil;
109 
110     _String        reportLine (statusLine);
111 
112 
113 
114 
115     if (percentDone >= 0.0) {
116         reportLine = reportLine & ". " & percentDone & "% done.";
117     }
118 
119     if (outFile) {
120         fprintf (outFile,"%s", reportLine.sData);
121     } else if (verbosity_level == 1) {
122         printf ("\033\015 %s", reportLine.sData);
123     }
124 
125     if (percentDone < -1.5) {
126         printf ("\033\015 ");
127         setvbuf (stdout,nil,_IOLBF,1024);
128     } else if (percentDone < -0.5) {
129         setvbuf (stdout,nil, _IONBF,1);
130     }
131     if (outFile) {
132         fclose (outFile);
133     }
134 
135 }
136 
137 #endif
138 
139 
140 //___________________________________________________________________________________________
141 //  afyp Oct. 15, 2008, replaced wrapper function for _Constant member function
142 //  with this local function.
LnGamma(hyFloat theValue)143 hyFloat Bgm::LnGamma(hyFloat theValue)
144 {
145     if (theValue <= 0) {
146 
147 
148         _String oops ("ERROR: Requested Bgm::LnGamma(x) for x <= 0.");
149         WarnError (oops);
150 
151         return 0;
152     }
153 
154     static hyFloat lngammaCoeff [6] = {   76.18009172947146,
155                                          -86.50532032941677,
156                                          24.01409824083091,
157                                          - 1.231739572450155,
158                                          0.1208650973866179e-2,
159                                          - 0.5395239384953e-5
160                                          };
161 
162     static hyFloat lookUpTable [20] = {  0., 0., 0.6931472, 1.7917595, 3.1780538,
163                                             4.7874917, 6.5792512, 8.5251614, 10.6046029, 12.8018275,
164                                             15.1044126, 17.5023078, 19.9872145, 22.5521639, 25.1912212,
165                                             27.8992714, 30.6718601, 33.5050735, 36.3954452, 39.3398842
166                                          };
167 
168     // use look-up table for small integer values
169     if (theValue <= 20 && (theValue - (long)theValue) > 0.) {
170         return (lookUpTable [(long) theValue - 1]);
171     }
172 
173 
174     // else do it the hard way
175     hyFloat  x, y, tmp, ser;
176 
177     y = x = theValue;
178     tmp = x + 5.5;
179     tmp -= (x+0.5) * log(tmp);
180     ser = 1.000000000190015;
181 
182     for (long j = 0; j <= 5; j++) {
183         ser += lngammaCoeff[j] / ++y;
184     }
185 
186     return (-tmp + log(2.506628274631005*ser/x));
187 }
188 
189 
190 
191 //___________________________________________________________________________________________
Bgm(_AssociativeList * dnodes,_AssociativeList * cnodes)192 Bgm::Bgm(_AssociativeList * dnodes, _AssociativeList * cnodes)
193 {
194     _String     errorMessage;
195 
196     long        num_discrete    = dnodes->avl.countitems(),
197                 num_continuous   = cnodes->avl.countitems(),
198                 node_index;
199 
200     num_nodes   = num_discrete + num_continuous;    // set member variable
201 
202 
203 
204     _Constant   * node_id,                      /* pointers into HBL associative array arguments */
205                 * mp,
206                 * size, * mean, * precision;
207 
208     _AssociativeList    * discreteNode, * continuousNode;
209 
210 
211 
212     // set member variables to default values
213     calc_bit            = new _Constant ();         // for calculating LnGamma in member functions
214     max_max_parents     = 0;                        // record the most parents a node can have
215     obsData             = NULL;
216     obsWeights          = NULL;
217 
218 
219 
220     // extract number of variables in data
221     CreateMatrix (&dag, num_nodes, num_nodes, false, true, false);      // allocate space for matrices
222     CreateMatrix (&banned_edges, num_nodes, num_nodes, false, true, false);
223     CreateMatrix (&enforced_edges, num_nodes, num_nodes, false, true, false);
224     CreateMatrix (&prior_sample_size, num_nodes, 1, false, true, false);    // for storing floats as hyFloat objects
225     CreateMatrix (&prior_mean, num_nodes, 1, false, true, false);
226     CreateMatrix (&prior_precision, num_nodes, 1, false, true, false);
227 
228 
229     // allocate space for _SimpleList objects
230     is_discrete.Populate (num_nodes, 0, 0);
231     num_levels.Populate (num_nodes, 0, 0);
232     max_parents.Populate (num_nodes, 0, 0);
233     has_missing.Populate (num_nodes, 0, 0);
234 
235 
236 
237     // initialize discrete nodes
238     for (long dn = 0; dn < num_discrete; dn++) {
239         discreteNode    = (_AssociativeList *) (dnodes->GetByKey (dn, ASSOCIATIVE_LIST));
240 
241         if (discreteNode) {
242             node_id     = (_Constant *) (discreteNode->GetByKey (_HYBgm_NODE_INDEX_KEY, NUMBER));
243             mp          = (_Constant *) (discreteNode->GetByKey (_HYBgm_MAX_PARENT_KEY, NUMBER));
244             size        = (_Constant *) (discreteNode->GetByKey (_HYBgm_PRIOR_SIZE_KEY, NUMBER));
245 
246             if (node_id && mp && size) {
247                 node_index  = (long) (node_id->Value());
248 
249                 is_discrete.list_data[node_index] = 1.;
250                 max_parents.list_data[node_index] = (long) mp->Value();
251 
252                 if ((long) mp->Value() > max_max_parents) {
253                     max_max_parents = (long) mp->Value();
254                 }
255 
256                 prior_sample_size.Store(node_index, 0, (long) size->Value());
257             } else {
258                 errorMessage = _String ("Missing key (NodeID, MaxParents, PriorSize) in associative array for discrete node ")
259                                & dn;
260                 break;
261             }
262         } else {
263             errorMessage = _String("Failed to retrieve discrete node specification at index ") & dn;
264             break;
265         }
266     }
267     if (errorMessage.sLength) {
268         WarnError (errorMessage);
269         errorMessage = _String();   // reset to empty string
270     }
271 
272 
273 
274     // initialize continuous nodes
275     for (long cn = 0; cn < num_continuous; cn++) {
276         continuousNode  = (_AssociativeList *) (cnodes->GetByKey (cn, ASSOCIATIVE_LIST));
277 
278         if (continuousNode) {
279             node_id     = (_Constant *) (continuousNode->GetByKey (_HYBgm_NODE_INDEX_KEY, NUMBER));
280             mp          = (_Constant *) (continuousNode->GetByKey (_HYBgm_MAX_PARENT_KEY, NUMBER));
281             size        = (_Constant *) (continuousNode->GetByKey (_HYBgm_PRIOR_SIZE_KEY, NUMBER));
282             mean        = (_Constant *) (continuousNode->GetByKey (_HYBgm_PRIOR_MEAN_KEY, NUMBER));
283             precision   = (_Constant *) (continuousNode->GetByKey (_HYBgm_PRIOR_PREC_KEY, NUMBER));
284 
285             if (node_id && mp && size && mean && precision) {
286                 node_index = (long) (node_id->Value());
287 
288                 is_discrete.list_data[node_index] = 0.;
289                 max_parents.list_data[node_index] = (long) mp->Value();
290 
291                 if (mp->Value() > max_max_parents) {
292                     max_max_parents = (long) mp->Value();
293                 }
294 
295                 prior_sample_size.Store (node_index, 0, (hyFloat) size->Value());
296                 prior_mean.Store (node_index, 0, (hyFloat) mean->Value());
297                 prior_precision.Store (node_index, 0, (hyFloat) precision->Value());
298 
299             } else {
300                 errorMessage = _String ("Missing key (NodeID, MaxParents, PriorSize, PriorMean, PriorVar)")
301                                & "in associative array for continuous node " & cn;
302                 break;
303             }
304         } else {
305             errorMessage = _String ("Failed to retrieve continuous node specification at index ") & cn;
306             break;
307         }
308     }
309 
310     if (errorMessage.sLength) {
311         WarnError (errorMessage);
312         errorMessage = _String();   // reset to empty string
313     }
314 
315 
316 
317     // append duplicates of _List object to store pointers to _Constant, _Matrix, _NTupleStorage objects
318     _List       emptyList (max_max_parents+1);
319     for (long node = 0; node < num_nodes; node++) {
320         node_scores && (&emptyList);
321     }
322 
323 
324     // require Bgm to recalculate node scores
325     scores_cached = FALSE;
326 }
327 
328 
329 
330 //___________________________________________________________________________________________
~Bgm(void)331 Bgm::~Bgm (void)
332 {
333     DeleteObject (calc_bit);
334 }
335 
336 
337 
338 //___________________________________________________________________________________________
SetDataMatrix(_Matrix * data)339 void Bgm::SetDataMatrix (_Matrix * data)
340 {
341     // reset cached node scores and edge posteriors
342     // DumpComputeLists ();
343     scores_cached   = FALSE;
344 
345     if (obsData)    {
346         DeleteObject (obsData);    // dispense with previous data matrix
347     }
348 
349     obsData         = data;
350 
351     obsData->CheckIfSparseEnough(TRUE);     // check if we can use a more compact representation of the
352     // matrix; before including this command, we suffered a
353     // noticeable slow-down in this routine. - AFYP
354 
355     /*
356     // allocate space to weight matrix
357     if (obsWeights)
358     {
359         DeleteObject(obsWeights);
360     }
361     obsWeights = new _Matrix (obsData->GetHDim(), obsData->GetVDim(), false, true);
362 
363     // by default all cases are weighted equally - require user to call SetWeightMatrix() if otherwise
364     for (long row = 0; row < obsData->GetHDim(); row++)
365         for (long col = 0; col < obsData->GetVDim(); col++)
366             obsWeights->Store(row, col, 1.);
367 
368     */
369 
370     // reset data-dependent member variables
371     for (long node = 0; node < num_nodes; node++) {
372         has_missing.list_data[node] = 0;
373         num_levels.list_data[node] = 0;
374     }
375 
376 
377 
378 #ifdef DEBUG_SDM
379     char buf [256];
380     for (long i = 0; i < obsData->GetHDim(); i++) {
381         for (long j = 0; j < obsData->GetVDim(); j++) {
382             snprintf (buf, sizeof(buf), "%d ", (long) (*obsData)(i,j));
383             BufferToConsole (buf);
384         }
385         NLToConsole ();
386     }
387 #endif
388 
389 
390 
391     if (obsData->GetVDim() == num_nodes) {  // make sure the data matrix is compatible with graph
392 #ifdef __MISSING_DATA__
393         long    nrows = obsData->GetHDim();
394 
395         for (long node = 0; node < num_nodes; node++) {
396             if (is_discrete.list_data[node]) {
397                 num_levels.list_data[node] = 1;
398 
399                 for (long obs, row = 0; row < nrows; row++) {
400                     obs = (*obsData)(row, node);
401 
402                     if (has_missing.list_data[node] == 0 && obs < 0) {  // use negative integer values to annotate missing data
403                         has_missing.list_data[node] = 1;
404                         continue;   // skip next step to check levels
405                     }
406 
407                     if (obs+1 > num_levels.list_data[node]) {
408                         num_levels.list_data[node] = num_levels.list_data[node] + 1;
409                     }
410                 }
411             } else {
412                 num_levels.list_data[node] = 0;
413 
414                 // not implementing missing data for continuous nodes yet
415                 //  not until I've decided on annotation anyhow :-/  afyp
416             }
417         }
418 
419 #ifdef DEBUG_SDM
420         snprintf (buf, sizeof(buf), "Levels: ");
421         BufferToConsole (buf);
422         for (long i = 0; i < num_nodes; i++) {
423             snprintf (buf, sizeof(buf), "%d ", num_levels.list_data[i]);
424             BufferToConsole (buf);
425         }
426         NLToConsole ();
427 
428         snprintf (buf, sizeof(buf), "Missing (0=FALSE, 1=TRUE): ");
429         BufferToConsole (buf);
430         for (long i = 0; i < num_nodes; i++) {
431             snprintf (buf, sizeof(buf), "%d ", has_missing.list_data[i]);
432             BufferToConsole (buf);
433         }
434         NLToConsole ();
435 #endif
436 
437 #else
438         for (long node = 0; node < num_nodes; node++) { // for every column in matrix
439             if (is_discrete.list_data[node]) {
440                 // calculate number of levels for discrete node
441                 num_levels.list_data[node] = 1;
442 
443                 for (long obs = 0; obs < obsData->GetHDim(); obs++) {
444                     // adjust for zero-indexing
445                     if ( ((*obsData)(obs,node) + 1) > num_levels.list_data[node]) {
446                         num_levels.list_data[node] = num_levels.list_data[node] + 1;
447                     }
448                 }
449             } else {
450                 // continuous node defined to have no levels
451                 num_levels.list_data[node] = 0;
452             }
453         }
454 #endif
455     } else {
456         _String errorMsg ("Number of variables in data matrix do not match number of nodes in graph.");
457         WarnError (errorMsg);       // note, this produces a crash because the batch file proceeds to execute
458         // BGM routines without data. - AFYP
459     }
460 
461 
462     last_node_order.Clear();        // forget the last step taken in MCMC chain
463     best_node_order.Clear();
464 
465     CacheNodeScores();
466 
467     // re-allocate memory to lists
468     // InitComputeLists ();
469 }
470 
471 
472 
473 //___________________________________________________________________________________________
SetWeightMatrix(_Matrix * weights)474 void    Bgm::SetWeightMatrix (_Matrix * weights)
475 {
476     if ( obsData
477             && (long) (obsData->GetHDim()) == (long) (weights->GetHDim())
478             /*&& (long) (obsData->GetVDim()) == (long) (weights->GetVDim())*/ ) {
479         obsWeights = weights;
480         ReportWarning( _String("Set weight matrix to ") & (_String *) obsWeights->toStr() );
481     } else {
482         _String errorMsg ("Number of weights does not match number of observations in current data set.");
483         WarnError (errorMsg);
484     }
485 
486     scores_cached = FALSE;
487 }
488 
489 
490 
491 
492 //___________________________________________________________________________________________
SetGraphMatrix(_Matrix * graph)493 void    Bgm::SetGraphMatrix (_Matrix *graph)
494 {
495     /*
496     char    bug [255];
497 
498     snprintf (bug, sizeof(bug), "Entered Bgm::SetGraphMatrix()\n");
499     BufferToConsole (bug);
500     */
501     dag = (_Matrix &) (*graph); // matrix assignment
502     ReportWarning (_String("set graph matrix to:\n") & (_String *) dag.toStr() & "\n" );
503 }
504 
505 
SetBanMatrix(_Matrix * banMx)506 void    Bgm::SetBanMatrix (_Matrix *banMx)
507 {
508     banned_edges = (_Matrix &) (*banMx);
509 
510     ReportWarning (_String("Set ban matrix to ") & (_String *) banned_edges.toStr() & "\n" );
511 }
512 
SetEnforceMatrix(_Matrix * enforceMx)513 void    Bgm::SetEnforceMatrix (_Matrix *enforceMx)
514 {
515     enforced_edges = (_Matrix &) (*enforceMx);
516     ReportWarning (_String("Set enforce matrix to ") & (_String *) enforced_edges.toStr() & "\n" );
517 }
518 
SetBestOrder(_SimpleList * orderList)519 void    Bgm::SetBestOrder (_SimpleList * orderList)
520 {
521     best_node_order.Populate(num_nodes, 0, 0);
522 
523     for (long i = 0; i < num_nodes; i++) {
524         best_node_order.list_data[i] = orderList->list_data[i];
525     }
526 }
527 
528 //___________________________________________________________________________________________
529 //  For debugging..
PrintGraph(_Matrix * g)530 void Bgm::PrintGraph (_Matrix * g)
531 {
532     char    buf [255];
533 
534     if (g) {
535         for (long row = 0; row < g->GetHDim(); row++) {
536             for (long col = 0; col < g->GetVDim(); col++) {
537                 snprintf (buf, sizeof(buf), "%ld ", (long) (*g)(row,col));
538                 BufferToConsole (buf);
539             }
540             snprintf (buf, sizeof(buf), "\n");
541             BufferToConsole (buf);
542         }
543     } else {
544         for (long row = 0; row < dag.GetHDim(); row++) {
545             for (long col = 0; col < dag.GetVDim(); col++) {
546                 snprintf (buf, sizeof(buf), "%ld ", (long)dag(row,col));
547                 BufferToConsole (buf);
548             }
549             snprintf (buf, sizeof(buf), "\n");
550             BufferToConsole (buf);
551         }
552     }
553     NLToConsole();
554 }
555 
556 
557 //___________________________________________________________________________________________
ResetGraph(_Matrix * g)558 void    Bgm::ResetGraph (_Matrix * g)
559 {
560     if (g) {
561         for (long row = 0; row < num_nodes; row++) {
562             for (long col = 0; col < num_nodes; col++) {
563                 g->Store (row, col, dag (row, col));
564             }
565         }
566         ReportWarning (_String("Reset graph to dag argument\n"));
567     } else {
568         for (long row = 0; row < num_nodes; row++) {
569             for (long col = 0; col < num_nodes; col++) {
570                 // enforced edges must always appear in DAG
571                 // if none specified, all entries are zeroed
572                 dag.Store (row, col, enforced_edges (row, col));
573             }
574         }
575     }
576 }
577 
578 
579 
580 //___________________________________________________________________________________________
MarginalSum(bool over_rows,long offset)581 long    Bgm::MarginalSum (bool over_rows, long offset)
582 {
583     long    res     = 0;
584 
585     if (over_rows)
586         for (long i = 0; i < num_nodes; i++) {
587             res += dag(offset,i);    // sum of n-th row
588         }
589     else
590         for (long i = 0; i < num_nodes; i++) {
591             res += dag(i,offset);    // sum of n-th column
592         }
593 
594     return res;
595 }
596 
597 
598 
599 //___________________________________________________________________________________________
600 //  DEPRECATED
IsCyclic(void)601 bool    Bgm::IsCyclic (void)
602 {
603     _Matrix     nodes_left (num_nodes, 1, false, true);
604 
605     for (long i = 0; i < num_nodes; i++) {  // populate vector with 1's
606         nodes_left.Store (i,0,1.);
607     }
608 
609     long        one_count = num_nodes,
610                 last_count,
611                 num_children;
612 
613     do {    // until no more leaves are removed
614         last_count = one_count;
615         one_count = 0;
616 
617         // find and remove all nodes with no children (i.e. leaf nodes)
618         for (long parent = 0; parent < num_nodes; parent++) {
619             if (nodes_left(parent,0) == 1.) {
620                 one_count++;
621 
622                 num_children = 0;
623                 // count all existing nodes that have this parent
624                 for (long child = 0; child < num_nodes; child++)
625                     if (nodes_left(child,0) == 1. && child != parent)
626                         if (dag(parent,child) == 1.) {
627                             num_children++;
628                         }
629 
630                 if (num_children == 0) {    // count number of nodes with this node as parent
631                     nodes_left.Store (parent, 0, 0.);
632                     one_count--;
633                 }
634             }
635         }
636     } while (one_count < last_count);
637 
638 
639     if (one_count > 0) {
640         return TRUE;    // dag is cyclic
641     } else {
642         return FALSE;   // removed all nodes as leaves, dag is acyclic
643     }
644 }
645 
646 
647 
648 
649 //___________________________________________________________________________________________
RandomizeGraph(_Matrix * graphMx,_SimpleList * order,long num_steps,bool fixed_order)650 void    Bgm::RandomizeGraph (_Matrix * graphMx, _SimpleList * order, long num_steps, bool fixed_order)
651 {
652     long    step = 0, fail = 0;
653 
654 
655     // convert order into matrix format of edge permissions for more rapid look-up later  /* DEBUGGING ONLY */
656 #ifdef __DEBUG_RG__
657     long    mode = 0;
658     _Matrix orderMx (num_nodes, num_nodes, false, true);
659 
660     for (long p_rank = 0; p_rank < num_nodes; p_rank++) {
661         for (long c_rank = 0; c_rank < num_nodes; c_rank++) {
662             // use actual node id's to index into matrix - 1 indicates a permitted edge
663             orderMx.Store (order->list_data[p_rank], order->list_data[c_rank], p_rank > c_rank ? 1 : 0);
664         }
665     }
666 #endif
667 
668 
669     // before we do anything, make sure that graph complies with order /* DEBUGGING */
670     // calculate number of parents for each child for rapid access later
671     _SimpleList     num_parents;
672 
673     num_parents.Populate (num_nodes, 0, 0);
674 
675     for (long child = 0; child < num_nodes; child++) {
676         for (long parent = 0; parent < num_nodes; parent++) {
677 #ifdef __DEBUG_RG__
678             if ((*graphMx)(parent,child) > 0 && banned_edges(parent,child) > 0) {
679                 WarnError (_String("BEFORE RandomizeGraph() there is a banned edge: ") & parent & "->" & child & " mode " & mode);
680                 break;
681             }
682             if ((*graphMx)(parent,child) == 0 && enforced_edges(parent,child) > 0) {
683                 WarnError (_String("BEFORE RandomizeGraph() there is a missing enforced edge: ") & parent & "->" & child);
684                 break;
685             }
686 
687             if ( (*graphMx)(parent, child)==1 && orderMx(parent, child)==0 ) {
688                 WarnError (_String ("Order-network discrepancy at start of RandomizeGraph() at edge ") & parent & "->" & child);
689                 break;
690             }
691 #endif
692 
693             if ( (*graphMx)(parent, child) > 0) {
694                 num_parents.list_data[child]++;
695             }
696         }
697         // end for
698 
699         if (num_parents.list_data[child] > max_parents.list_data[child]) {
700             WarnError (_String ("Number of parents exceeds maximum BEFORE randomization of graph at node ") & child & " (" & num_parents.list_data[child] & " > " & max_parents.list_data[child] & ")\n" );
701             break;
702         }
703     }
704 
705 
706 
707 
708     // randomize graph
709     long    p_rank, c_rank, parent, child;
710 
711     do {
712         // a fail-safe to avoid infinite loops
713         if (fail > MAX_FAIL_RANDOMIZE) {
714             WarnError(_String("Bgm::RandomizeGraph() failed to modify the graph in GraphMCMC() after MAX_FAIL_RANDOMIZE (") & (long)MAX_FAIL_RANDOMIZE & ") attempts.\n");
715             break;
716         }
717 
718         // pick a random edge permitted under ordering
719         p_rank  = (genrand_int32() % (num_nodes-1)) + 1;    // shift right to not include lowest node in order
720         c_rank  = genrand_int32() % p_rank;
721 
722         child = order->list_data[c_rank];
723         parent = order->list_data[p_rank];
724 
725 
726         if (fixed_order || genrand_real2() > RANDOMIZE_PROB_SWAP) { // attempt an add or delete
727             if ( (*graphMx)(parent,child) == 0 && !banned_edges(parent,child)) {    // add an edge
728 #ifdef __DEBUG_RG__
729                 mode = 0;
730 #endif
731                 if (num_parents.list_data[child] == max_parents.list_data[child]) { // child cannot accept any additional edges
732                     // move an edge from an existing parent to target parent
733                     _SimpleList     removeable_edges;
734 
735                     // build a list of current parents
736                     for (long par = 0; par < num_nodes; par++)
737                         if ((*graphMx)(par,child) && !enforced_edges(par,child)) {
738                             removeable_edges << par;
739                         }
740 
741                     if (removeable_edges.lLength > 0) {
742                         // shuffle the list and remove the first parent
743                         removeable_edges.Permute(1);
744                         graphMx->Store (removeable_edges.list_data[0], child, 0.);
745                         graphMx->Store (parent, child, 1.);
746                         step++;
747                     } else {
748                         // none of the edges can be removed
749                         fail++;
750                     }
751                 } else {
752                     // child can accept another edge
753                     graphMx->Store (parent, child, 1.);
754                     num_parents.list_data[child]++;
755                     step++;
756                 }
757             } else if ( (*graphMx)(parent,child) == 1 && !enforced_edges(parent,child)) {   // delete an edge
758 #ifdef __DEBUG_RG__
759                 mode = 1;
760 #endif
761                 graphMx->Store (parent, child, 0.);
762                 num_parents.list_data[child]--;
763                 step++;
764             } else {
765                 fail++;
766             }
767         } else {    // swap nodes in ordering and flip edge if present
768             long    ok_to_go = 1;
769 #ifdef __DEBUG_RG__
770             mode = 2;
771 #endif
772             // edge cannot be flipped
773             if ( (*graphMx)(parent,child) == 1  &&  ( banned_edges(child,parent) || enforced_edges(parent,child) )  ) {
774                 ok_to_go = 0;
775             }
776 
777 
778             // check all other nodes affected by the swap
779             if (ok_to_go) {
780                 for (long bystander, i=c_rank+1; i < p_rank; i++) {
781                     bystander = order->list_data[i];    // retrieve node id
782 
783                     if (
784                         ( (*graphMx)(parent,bystander)==1 && enforced_edges (parent, bystander) )  ||
785                         ( (*graphMx)(bystander,child)==1 && enforced_edges (bystander, child) )  ||
786                         banned_edges (bystander,parent)  ||  banned_edges (child,bystander)
787                     ) {
788                         // by flipping the parent->child edge, we would screw up ordering for
789                         // an enforced edge:  C < B < P   becomes  P < B < C  where B->C or P->B is enforced
790 
791                         // also, a banned edge implies a node ordering constraint
792                         ok_to_go = 0;
793                         break;
794                     }
795                 }
796             }
797 
798 
799             // if everything checks out OK
800             if ( ok_to_go ) {
801                 // flip the target edge
802                 if ( (*graphMx)(parent,child) == 1 && !banned_edges(child,parent) ) {
803                     graphMx->Store (parent, child, 0);
804                     graphMx->Store (child, parent, 1);
805                     num_parents.list_data[child]--;
806                     num_parents.list_data[parent]++;
807 
808                     if (num_parents.list_data[parent] > max_parents.list_data[parent]) {
809                         // parent cannot accept any more edges, delete one of the edges at random (including the edge to flip)
810                         _SimpleList     removeable_edges;
811 
812                         for (long par = 0; par < num_nodes; par++)
813                             if (  (*graphMx)(par, parent)  &&  !enforced_edges(par, parent)  ) {
814                                 removeable_edges << par;
815                             }
816 
817                         removeable_edges.Permute(1);
818                         graphMx->Store (removeable_edges.list_data[0], parent, 0.);
819                         num_parents.list_data[parent]--;
820                     }
821 
822                     step++;
823                 }
824                 // if number of parents for parent node will exceed maximum, then the edge is deleted instead of flipped
825 
826                 // swap nodes in order
827                 order->list_data[p_rank] = child;
828                 order->list_data[c_rank] = parent;  // remember to update order matrix also!
829 
830 
831                 // flip the other edges affected by node swap
832                 //  child <-  N   ...   N <- parent                   _________________,
833                 //                                    becomes        |                 v
834                 //                                                 parent    N         N    child
835                 //                                                           ^                |
836                 //                                                           `----------------+
837                 for (long bystander, i = c_rank+1; i < p_rank; i++) {
838                     bystander = order->list_data[i];
839 
840                     if ( (*graphMx)(bystander, child) == 1 ) {
841                         graphMx->Store (bystander, child, 0);
842                         num_parents.list_data[child]--;
843 
844                         graphMx->Store (child, bystander, 1);
845                         num_parents.list_data[bystander]++;
846 
847                         if (num_parents.list_data[bystander] > max_parents.list_data[bystander]) {
848                             _SimpleList     removeable_edges;
849 
850                             for (long par = 0; par < num_nodes; par++)
851                                 if (  (*graphMx)(par, bystander)  &&  !enforced_edges(par, bystander)  ) {
852                                     removeable_edges << par;
853                                 }
854 
855                             removeable_edges.Permute(1);
856                             graphMx->Store (removeable_edges.list_data[0], bystander, 0.);
857                             num_parents.list_data[bystander]--;
858                         }
859                     }
860 
861                     if ( (*graphMx)(parent,bystander) == 1) {
862                         graphMx->Store (parent, bystander, 0);
863                         num_parents.list_data[bystander]--;
864 
865                         graphMx->Store (bystander, parent, 1);
866                         num_parents.list_data[parent]++;
867 
868                         if (num_parents.list_data[parent] > max_parents.list_data[parent]) {
869                             _SimpleList     removeable_edges;
870 
871                             for (long par = 0; par < num_nodes; par++)
872                                 if (  (*graphMx)(par, parent)  &&  !enforced_edges(par, parent)  ) {
873                                     removeable_edges << par;
874                                 }
875 
876                             removeable_edges.Permute(1);
877                             graphMx->Store (removeable_edges.list_data[0], parent, 0.);
878                             num_parents.list_data[parent]--;
879                         }
880                     }
881                 }
882 #ifdef __DEBUG_RG__
883                 // refresh order matrix
884                 for (long p_rank = 0; p_rank < num_nodes; p_rank++) {
885                     for (long c_rank = 0; c_rank < p_rank; c_rank++) {
886                         orderMx.Store (order->list_data[p_rank], order->list_data[c_rank], 1);
887                     }
888                 }
889 #endif
890                 step++;
891             } else {
892                 fail++;
893             }
894         }
895     } while (step < num_steps);
896 
897 
898     // a final check to make sure we haven't screwed up!
899 #ifdef __DEBUG_RG__
900     num_parents.Populate (num_nodes, 0, 0);
901 
902     for (long child = 0; child < num_nodes; child++) {
903         for (long parent = 0; parent < num_nodes; parent++) {
904             if ((*graphMx)(parent,child) > 0 && banned_edges(parent,child) > 0) {
905                 WarnError (_String("Aw crap!  RandomizeGraph() introduced a banned edge: ") & parent & "->" & child & " mode " & mode);
906                 break;
907             }
908             if ((*graphMx)(parent,child) == 0 && enforced_edges(parent,child) > 0) {
909                 WarnError (_String("Aw crap!  RandomizeGraph() deleted an enforced edge: ") & parent & "->" & child);
910                 break;
911             }
912             if ( (*graphMx)(parent, child)==1 && orderMx(parent, child)==0 ) {
913                 WarnError (_String ("Order-network discrepancy at end of RandomizeGraph(), mode ") & mode);
914                 break;
915             }
916 
917             if ( (*graphMx)(parent, child) > 0) {
918                 num_parents.list_data[child]++;
919             }
920         }
921 
922         if (num_parents.list_data[child] > max_parents.list_data[child]) {
923             WarnError (_String ("Exceeded acceptable number of parents for node ") & child &  ", mode " & mode);
924             break;
925         }
926     }
927 #endif
928 }
929 
930 
931 
932 //___________________________________________________________________________________________
RandomizeDag(long num_steps)933 void    Bgm::RandomizeDag (long num_steps)
934 {
935 
936     for (long step = 0; step < num_steps; step++) {
937         // store current graph in case altered graph becomes cyclic
938         _Matrix reset_dag (num_nodes, num_nodes, false, true);
939         for (long h = 0; h < num_nodes; h++)
940             for (long v = 0; v < num_nodes; v++) {
941                 reset_dag.Store (h,v,dag(h,v));
942             }
943 
944         do {
945             for (long row = 0; row < num_nodes; row++) {
946                 for (long col = 0; col < num_nodes; col++) {
947                     dag.Store (row,col,reset_dag(row,col));
948                 }
949             }
950 
951 
952             // select a random arc, discarding cycles and banned edges
953             long    row     = 0,
954                     col      = row;
955 
956             while (col == row && banned_edges(row,col) == 1) {
957                 row = genrand_int32() % num_nodes;
958                 col = genrand_int32() % num_nodes;
959             }
960 
961             // printf ("RandomizeDag() picked %d,%d\n", row, col);
962 
963             if (dag(row,col) == 0.) {
964                 if (dag(col,row) == 0.) {   // no arc
965                     // add an arc -- sum over n-th column gives number of parents for n-th child
966                     if (MarginalSum(FALSE,col) < max_parents.list_data[col]) {
967                         dag.Store (row,col,1.);
968                     }
969                 } else {
970                     // reverse or remove arc
971                     dag.Store (col,row,0.);
972                     if (genrand_int32() % 2 == 0) {
973                         dag.Store (row,col,1.);
974                     }
975                 }
976             } else {
977                 if (dag(col,row) == 0.) {
978                     // reverse or remove
979                     dag.Store (row,col,0.);
980                     if (genrand_int32() % 2 == 0) {
981                         dag.Store (col,row,1.);
982                     }
983                 } else {
984                     // double arc, set to one of three legal arcs
985                     long    option = genrand_int32() % 3;
986                     if (option == 0) {
987                         dag.Store (row,col,0.);
988                     } else if (option == 1) {
989                         dag.Store (col,row,0.);
990                     } else {
991                         dag.Store (row,col,0.);
992                         dag.Store (col,row,0.);
993                     }
994                 }
995             }
996 
997             // PrintGraph(nil);
998 
999         } while ( IsCyclic() );
1000     }
1001     // end loop over steps
1002 }
1003 
1004 
1005 
1006 
1007 //___________________________________________________________________________________________
1008 //  Wrappers to retain original functionality.
ComputeDiscreteScore(long node_id)1009 hyFloat  Bgm::ComputeDiscreteScore (long node_id)
1010 {
1011     _SimpleList     parents;
1012 
1013     for (long par = 0; par < num_nodes; par++) {
1014         if (dag(par, node_id) == 1 && is_discrete.list_data[par]) {
1015             parents << par;
1016         }
1017     }
1018 
1019     return ComputeDiscreteScore (node_id, parents);
1020 }
1021 
ComputeDiscreteScore(long node_id,_Matrix * g)1022 hyFloat  Bgm::ComputeDiscreteScore (long node_id, _Matrix * g)
1023 {
1024     _SimpleList     parents;
1025 
1026     for (long par = 0; par < num_nodes; par++) {
1027         if ((*g)(par, node_id) == 1 && is_discrete.list_data[par]) {
1028             parents << par;
1029         }
1030     }
1031 
1032     return ComputeDiscreteScore (node_id, parents);
1033 }
1034 
1035 
1036 //___________________________________________________________________________________________
1037 //#define BGM_DEBUG_CDS
ComputeDiscreteScore(long node_id,_SimpleList & parents)1038 hyFloat  Bgm::ComputeDiscreteScore (long node_id, _SimpleList & parents)
1039 {
1040     //char          buf [255];
1041 
1042     // use cached node scores if possible
1043     if (scores_cached) {
1044         _List *     scores  = (_List *) node_scores.list_data[node_id];
1045 
1046         if (parents.lLength == 0) {
1047             _Constant *     orphan_score = (_Constant *) scores->list_data[0];
1048             return (hyFloat) orphan_score->Value();
1049         } else if (parents.lLength == 1) {
1050             _Matrix *   single_parent_scores = (_Matrix *) scores->list_data[1];
1051             return (hyFloat) (*single_parent_scores) (parents.list_data[0], 0);
1052         } else {
1053             _NTupleStorage *    family_scores   = (_NTupleStorage *) scores->list_data[parents.lLength];
1054             _SimpleList         nktuple;
1055 
1056             for (long i = 0; i < parents.lLength; i++) {    // map parents back to nk-tuple
1057                 long    par = parents.list_data[i];
1058                 if (par > node_id) {
1059                     par--;
1060                 }
1061                 nktuple << par;
1062             }
1063             return (hyFloat) family_scores->Retrieve (nktuple);  // using nk-tuple
1064         }
1065     }
1066 
1067 
1068 
1069 #ifdef __MISSING_DATA__
1070     //  Are any of the edges banned?
1071     for (long par = 0; par < parents.lLength; par++) {
1072         if (banned_edges(parents.list_data[par], node_id) > 0) {
1073             // score should never be used
1074             ReportWarning(_String("Skipping node score for family containing banned edge ") & parents.list_data[par] & "->" & node_id & "\n");
1075             return (-INFINITY);
1076         }
1077     }
1078 
1079 
1080     //  Is node with missing data in Markov blanket of focal node?
1081     if (has_missing.list_data[node_id]) {
1082         //return (ImputeDiscreteScore (node_id, parents));
1083         return (GibbsApproximateDiscreteScore (node_id, parents));
1084     } else {
1085         for (long par = 0; par < parents.lLength; par++) {
1086             if (has_missing.list_data[parents.list_data[par]]) {
1087                 // return (ImputeDiscreteScore (node_id, parents));
1088                 return (GibbsApproximateDiscreteScore (node_id, parents));
1089             }
1090         }
1091     }
1092 #endif
1093 
1094 
1095     _SimpleList     multipliers ((long)1);
1096 
1097     // else need to compute de novo
1098     _Matrix     n_ijk,
1099                 n_ij;       // [i] indexes child nodes,
1100     // [j] indexes combinations of values for parents of i-th node,
1101     // [k] indexes values of i-th node
1102 
1103     long        num_parent_combos   = 1,                    // i.e. 'q'
1104                 r_i                   = num_levels.list_data[node_id];
1105 
1106     hyFloat  n_prior_ijk = 0,
1107                 n_prior_ij  = 0,
1108                 log_score  = 0;
1109 
1110 
1111 
1112     // how many combinations of parental states are there?
1113     for (long par = 0; par < parents.lLength; par++) {
1114         num_parent_combos *= num_levels.list_data[parents.list_data[par]];
1115         multipliers << num_parent_combos;
1116     }
1117 
1118 
1119 #ifdef BGM_DEBUG_CDS
1120     snprintf (buf, sizeof(buf), "Multipliers: ");
1121     BufferToConsole (buf);
1122     for (long i = 0; i < multipliers.lLength; i++) {
1123         snprintf (buf, sizeof(buf), "%d ", multipliers.list_data[i]);
1124         BufferToConsole (buf);
1125     }
1126     NLToConsole();
1127 #endif
1128 
1129 
1130     /* count observations by parent combination, using direct indexing */
1131     CreateMatrix (&n_ijk, num_parent_combos, r_i, false, true, false);
1132     CreateMatrix (&n_ij, num_parent_combos, 1, false, true, false);
1133 
1134 
1135 #ifdef __MISSING_DATA__
1136     /*  METHODS FOR COMPLETE DATA  */
1137     for (long obs = 0; obs < obsData->GetHDim(); obs++) {
1138         long    index           = 0,
1139                 //multiplier    = 1,
1140                 child_state     = (*obsData)(obs, node_id);
1141 
1142         for (long par = 0; par < parents.lLength; par++) {
1143             long    this_parent         = parents.list_data[par],
1144                     this_parent_state = (*obsData)(obs, this_parent);
1145 
1146             index += this_parent_state * multipliers.list_data[par];
1147         }
1148 
1149         n_ijk.Store ((long) index, child_state, n_ijk(index, child_state) + 1 );
1150         n_ij.Store ((long) index, 0, n_ij(index, 0) + 1 );
1151     }
1152 
1153 
1154 #ifdef BGM_DEBUG_CDS
1155     snprintf (buf, sizeof(buf), "Node %d, parent(s) ", node_id);
1156     BufferToConsole (buf);
1157 
1158     for (long k = 0; k < parents.lLength; k++) {
1159         snprintf (buf, sizeof(buf), "%d ", parents.list_data[k]);
1160         BufferToConsole (buf);
1161     }
1162     NLToConsole();
1163 
1164     for (long j = 0; j < num_parent_combos; j++) {
1165         snprintf (buf, sizeof(buf), "N(%d,%d) = %f\n", node_id, j, n_ij(j,0));
1166         BufferToConsole (buf);
1167 
1168         for (long k = 0; k < r_i; k++) {
1169             snprintf (buf, sizeof(buf), "N(%d,%d,%d) = %f\n", node_id, j, k, n_ijk(j,k));
1170             BufferToConsole (buf);
1171         }
1172     }
1173 #endif
1174 
1175 
1176     if (prior_sample_size (node_id, 0) == 0) {  // K2
1177         for (long j = 0; j < num_parent_combos; j++) {
1178             log_score += LnGamma(num_levels.list_data[node_id]);    // (r-1)!
1179             log_score -= LnGamma(n_ij(j, 0) + num_levels.list_data[node_id]);   // (N+r-1)!
1180 
1181             for (long k = 0; k < r_i; k++) {
1182                 log_score += LnGamma (n_ijk(j,k) + 1);    // (N_ijk)!
1183             }
1184 
1185 #ifdef BGM_DEBUG_CDS
1186             snprintf (buf, sizeof(buf), "\tlog(r-1)! = log %d! = %lf\n", num_levels.list_data[node_id] - 1, LnGamma(num_levels.list_data[node_id]));
1187             BufferToConsole (buf);
1188 
1189             snprintf (buf, sizeof(buf), "\tlog(N(%d,%d)+r-1)! = log %d! = %lf\n", node_id, j, ((long)n_ij(j, 0)) + r_i - 1,
1190                      LnGamma(n_ij(j, 0) + num_levels.list_data[node_id]));
1191             BufferToConsole (buf);
1192 
1193             for (long k = 0; k < r_i; k++) {
1194                 snprintf (buf, sizeof(buf), "\tlog (N(%d,%d,%d)!) = log %d! = %lf\n", node_id, j, k, ((long)n_ijk(j,k)), LnGamma(n_ijk(j,k) + 1));
1195                 BufferToConsole (buf);
1196             }
1197 
1198             snprintf (buf, sizeof(buf), "j = %d\tcumulative log score = %lf\n", j, log_score);
1199             BufferToConsole (buf);
1200 #endif
1201         }
1202     } else {    // BDe
1203         n_prior_ij = prior_sample_size (node_id, 0) / num_parent_combos;
1204         n_prior_ijk = n_prior_ij / num_levels.list_data[node_id];
1205 
1206         for (long j = 0; j < num_parent_combos; j++) {
1207             log_score += LnGamma(n_prior_ij) - LnGamma(n_prior_ij + n_ij(j,0));
1208 
1209             for (long k = 0; k < num_levels.list_data[node_id]; k++) {
1210                 log_score += LnGamma(n_prior_ijk + n_ijk(j,k)) - LnGamma(n_prior_ijk);
1211             }
1212         }
1213     }
1214 
1215 #else
1216     for (long obs = 0; obs < obsData->GetHDim(); obs++) {
1217         long    index       = 0,
1218                 //multiplier   = 1,
1219                 child_state = (*obsData)(obs, node_id);
1220 
1221         for (long par = 0; par < parents.lLength; par++) {
1222             long    this_parent         = parents.list_data[par],
1223                     this_parent_state = (*obsData)(obs, this_parent);
1224 
1225 
1226             /*      // this system doesn't work with unequal levels!  :-P  afyp March 31, 2008
1227             index += this_parent_state * multiplier;
1228             multiplier *= num_levels.list_data[this_parent];
1229              */
1230             index += this_parent_state * multipliers.list_data[par];
1231         }
1232 
1233         n_ijk.Store ((long) index, child_state, n_ijk(index, child_state) + 1);
1234         n_ij.Store ((long) index, 0, n_ij(index, 0) + 1);
1235 
1236     }
1237 
1238 
1239 
1240     /* compute scoring metric */
1241     if (prior_sample_size (node_id, 0) == 0) {
1242         /* assume no prior information, use K2 metric */
1243         for (long j = 0; j < num_parent_combos; j++) {
1244             log_score += LnGamma(r_i);  // (r-1)!
1245             log_score -= LnGamma(n_ij(j, 0) + r_i); // (N+r-1)!
1246 
1247             for (long k = 0; k < num_levels.list_data[node_id]; k++) {
1248                 log_score += LnGamma(n_ijk(j,k) + 1);    // (N_ijk)!
1249             }
1250 
1251 
1252 
1253             snprintf (buf, sizeof(buf), "Node %d, parent(s) ", node_id);
1254             BufferToConsole (buf);
1255 
1256             for (long k = 0; k < parents.lLength; k++) {
1257                 snprintf (buf, sizeof(buf), "%d ", parents.list_data[k]);
1258                 BufferToConsole (buf);
1259             }
1260             NLToConsole();
1261 
1262             snprintf (buf, sizeof(buf), "j = %d\tcumulative log score = %lf\n", j, log_score);
1263             BufferToConsole (buf);
1264 
1265 #ifdef BGM_DEBUG_CDS
1266             snprintf (buf, sizeof(buf), "\tlog(r-1)! = log %d! = %lf\n", num_levels.list_data[node_id] - 1, LnGamma(num_levels.list_data[node_id]));
1267             BufferToConsole (buf);
1268 
1269             snprintf (buf, sizeof(buf), "\tlog(N+r-1)! = log %d! = %lf\n", n_ij(j, 0) + num_levels.list_data[node_id] - 1,
1270                      LnGamma(n_ij(j, 0) + num_levels.list_data[node_id]));
1271             BufferToConsole (buf);
1272 
1273             for (long k = 0; k < num_levels.list_data[node_id]; k++) {
1274                 snprintf (buf, sizeof(buf), "\tlog (N_ijk)! = log %d! = %lf\n", ((long)n_ijk(j,k)), LnGamma(n_ijk(j,k) + 1));
1275                 BufferToConsole (buf);
1276             }
1277 
1278 
1279 #endif
1280         }
1281     } else {
1282         /* calculate Bayesian Dirichlet metric (BDeu) for this node */
1283         /* see p212 in Heckerman, Geiger, and Chickering (1995) Machine Learning 20, 197-243 */
1284         n_prior_ij = prior_sample_size (node_id, 0) / num_parent_combos;
1285         n_prior_ijk = n_prior_ij / num_levels.list_data[node_id];
1286 
1287         for (long j = 0; j < num_parent_combos; j++) {
1288             log_score += LnGamma(n_prior_ij) - LnGamma(n_prior_ij + n_ij(j,0));
1289 
1290             for (long k = 0; k < num_levels.list_data[node_id]; k++) {
1291                 log_score += LnGamma(n_prior_ijk + n_ijk(j,k)) - LnGamma(n_prior_ijk);
1292             }
1293         }
1294     }
1295 #endif
1296 
1297     /*
1298     snprintf (buf, sizeof(buf), "Node %d, parents (%d): ", node_id, parents.lLength);
1299     BufferToConsole (buf);
1300 
1301     for (long par = 0; par < parents.lLength; par++)
1302     {
1303         snprintf (buf, sizeof(buf), " %d", parents.list_data[par]);
1304         BufferToConsole (buf);
1305     }
1306     snprintf (buf, sizeof(buf), " Log score = %f\n", log_score);
1307     BufferToConsole (buf);
1308     */
1309 
1310     return (log_score);
1311 }
1312 
1313 
1314 
1315 
1316 //___________________________________________________________________________________________
1317 //#define __DEBUG_CNS__
CacheNodeScores(void)1318 void Bgm::CacheNodeScores (void)
1319 {
1320     if (scores_cached) {
1321         return;
1322     }
1323 
1324     /*
1325     char buf [255];
1326     snprintf (buf, sizeof(buf), "\nCaching node scores...\n");
1327     BufferToConsole (buf);
1328     */
1329 
1330 
1331 #if defined __HYPHYMPI_ND__
1332     hyFloat  use_mpi_caching;
1333     checkParameter (useMPIcaching, use_mpi_caching, 0);
1334 
1335     if (use_mpi_caching) {
1336 
1337         // MPI_Init() is called in main()
1338         int     size,
1339                 rank;
1340 
1341         long    mpi_node;
1342 
1343         _Matrix     single_parent_scores (num_nodes, 1, false, true);
1344 
1345         _SimpleList parents,
1346                     all_but_one (num_nodes-1, 0, 1),
1347                     aux_list,
1348                     nk_tuple;
1349 
1350         hyFloat  score;
1351 
1352         char        mpi_message [256];
1353 
1354         MPI_Status  status; // contains source, tag, and error code
1355 
1356         MPI_Comm_size (MPI_COMM_WORLD, &size);  // number of processes
1357         MPI_Comm_rank (MPI_COMM_WORLD, &rank);
1358 
1359 
1360         if (rank == 0) {
1361             _String     bgmSwitch ("_BGM_SWITCH_"),
1362                         bgmStr;
1363 
1364             _List   *   this_list;
1365 
1366             SerializeBgm (bgmStr);
1367 #ifdef __DEBUG_CNS__
1368             ReportWarning (bgmStr);
1369 #endif
1370             _Matrix     * mpi_node_status = new _Matrix ((long)size, 2, false, true);
1371             long        senderID;
1372 
1373 
1374 
1375             // switch compute nodes from mpiNormal to bgm loop context
1376             for (long ni = 1; ni < size; ni++) {
1377                 MPISendString (bgmSwitch, ni);
1378             }
1379 
1380 
1381 
1382             // receive confirmation of successful switch
1383             for (long ni = 1; ni < size; ni++) {
1384                 long fromNode = ni;
1385 
1386                 _String t (MPIRecvString (ni,fromNode));
1387                 if (!t.Equal (&bgmSwitch)) {
1388                     WarnError (_String("Failed to confirm MPI mode switch at node ") & ni);
1389                     return;
1390                 } else {
1391                     ReportWarning (_String("Successful mode switch to Bgm MPI confirmed from node ") & ni);
1392                     MPISendString (bgmStr, ni);
1393                 }
1394             }
1395 
1396 
1397             long node_id;
1398 
1399             // farm out jobs to idle nodes until none are left
1400             for ( node_id = 0; node_id < num_nodes; node_id++) {
1401                 long        maxp            = max_parents.list_data[node_id],
1402                             ntuple_receipt,
1403                             this_node;
1404 
1405                 bool        remaining;
1406 
1407                 _String     mxString,
1408                             mxName;
1409 
1410 
1411                 this_list   = (_List *) node_scores.list_data[node_id];
1412 
1413                 // [_SimpleList parents] should always be empty here
1414                 hyFloat  score       = is_discrete.list_data[node_id] ? ComputeDiscreteScore (node_id, parents) : ComputeContinuousScore (node_id);
1415                 _Constant   orphan_score (score);
1416 
1417 
1418                 this_list->Clear();
1419                 (*this_list) && (&orphan_score);    // handle orphan score locally
1420 
1421 
1422                 if (maxp > 0) { // don't bother to farm out trivial cases
1423                     // look for idle nodes
1424                     mpi_node = 1;
1425                     do {
1426                         if ((*mpi_node_status)(mpi_node, 0) == 0) {
1427                             ReportMPIError(MPI_Send(&node_id, 1, MPI_LONG, mpi_node, HYPHY_MPI_VARS_TAG, MPI_COMM_WORLD), true);
1428                             ReportWarning (_String ("Sent child ") & node_id & " to node " & mpi_node);
1429 
1430                             mpi_node_status->Store (mpi_node, 0, 1);    // set busy signal
1431                             mpi_node_status->Store (mpi_node, 1, node_id);
1432 
1433                             break;
1434                         }
1435                         mpi_node++;
1436                     } while (mpi_node < size);
1437                 }
1438 
1439 
1440                 if (mpi_node == size) { // all nodes are busy, wait for one to send results
1441                     MPIReceiveScores (mpi_node_status, true, node_id);
1442                 }
1443             }
1444             // end loop over child nodes
1445 
1446 
1447             // collect remaining jobs
1448             while (1) {
1449                 // look for a busy node
1450                 mpi_node = 1;
1451                 do {
1452                     if ( (*mpi_node_status)(mpi_node,0) == 1) {
1453                         break;
1454                     }
1455                     mpi_node++;
1456                 } while (mpi_node < size);
1457 
1458 
1459                 if (mpi_node < size) {  // at least one node is busy
1460                     MPIReceiveScores (mpi_node_status, false, 0);
1461                 } else {
1462                     break;
1463                 }
1464             }
1465 
1466 
1467             // shut down compute nodes
1468             for (long shutdown = -1, mpi_node = 1; mpi_node < size; mpi_node++) {
1469                 ReportMPIError(MPI_Send(&shutdown, 1, MPI_LONG, mpi_node, HYPHY_MPI_VARS_TAG, MPI_COMM_WORLD), true);
1470                 ReportWarning (_String ("Node 0 sending shutdown signal to node ") & mpi_node);
1471             }
1472 
1473         } else {    // compute node
1474             long        node_id,
1475                         maxp;
1476 
1477             _List       list_of_matrices;
1478 
1479             while (1) {
1480                 _String     mxString,
1481                             mxName;
1482 
1483                 list_of_matrices.Clear();
1484 
1485 
1486                 // wait for master node to issue node ID to compute
1487                 ReportMPIError (MPI_Recv (&node_id, 1, MPI_LONG, 0, HYPHY_MPI_VARS_TAG, MPI_COMM_WORLD, &status), false);
1488                 ReportWarning (_String("Node ") & (long)rank & " received child " & (long)node_id & " from node " & (long)status.MPI_SOURCE & "\n");
1489 
1490                 if (node_id < 0) {
1491                     ReportWarning (_String("Node") & (long)rank & " recognizes shutdown signal.\n");
1492                     break;  // received shutdown message (-1)
1493                 }
1494 
1495 
1496                 maxp = max_parents.list_data[node_id];
1497 
1498                 parents.Clear();
1499                 parents.Populate (1,0,0);
1500 
1501 
1502                 // compute single parent scores
1503                 for (long par = 0; par < num_nodes; par++) {
1504                     if (par == node_id) {   // child cannot be its own parent, except in Kansas
1505                         single_parent_scores.Store (par, 0, 0.);
1506                     } else {
1507                         parents.list_data[0] = par;
1508                         single_parent_scores.Store (par, 0, is_discrete.list_data[node_id] ? ComputeDiscreteScore (node_id, parents) :
1509                                                     ComputeContinuousScore (node_id));
1510                     }
1511                 }
1512 
1513 
1514                 // compute multiple parents cores
1515                 for (long np = 2; np <= maxp; np++) {
1516                     parents.Clear();
1517                     parents.Populate (np, 0, 0);
1518 
1519                     if (all_but_one.NChooseKInit (aux_list, nk_tuple, np, false)) {
1520                         bool        remaining;
1521                         long        tuple_index     = 0,
1522                                     num_ktuples      = exp(LnGamma(num_nodes) - LnGamma(num_nodes - np) - LnGamma(np+1));
1523 
1524 
1525                         _Matrix     tuple_scores (num_ktuples, 1, false, true);
1526 
1527                         for (long tuple_index = 0; tuple_index < num_ktuples; tuple_index++) {
1528                             remaining = all_but_one.NChooseK (aux_list, nk_tuple);
1529 
1530                             if (!remaining && tuple_index < num_ktuples-1) {
1531                                 ReportWarning (_String ("ERROR: Ran out of (n,k)tuples in CacheNodeScores()."));
1532                             }
1533 
1534                             for (long par_idx = 0; par_idx < np; par_idx++) {
1535                                 long par = nk_tuple.list_data[par_idx];
1536                                 if (par >= node_id) {
1537                                     par++;
1538                                 }
1539                                 parents.list_data[par_idx] = par;
1540                             }
1541 
1542                             score = is_discrete.list_data[node_id] ? ComputeDiscreteScore (node_id, parents) :
1543                                     ComputeContinuousScore (node_id);
1544 
1545                             tuple_scores.Store (tuple_index, 0, (double)score);
1546                         }
1547 
1548                         if (remaining) {
1549                             ReportWarning (_String ("ERROR: Did not compute all nk-tuples in CacheNodeScores()"));
1550                         }
1551 
1552                         list_of_matrices && (&tuple_scores);        // append duplicate
1553                     } else {
1554                         ReportWarning (_String ("Failed to initialize _NTupleStorage object in Bgm::CacheNodeScores()"));
1555                     }
1556 
1557                 }
1558 
1559                 // send results to master node
1560 
1561                 ReportMPIError (MPI_Send (single_parent_scores.theData, num_nodes, MPI_DOUBLE, 0, HYPHY_MPI_DATA_TAG, MPI_COMM_WORLD), true);
1562 
1563 
1564                 for (long np = 2; np <= maxp; np++) {
1565                     _Matrix * storedMx      = (_Matrix *) list_of_matrices.list_data[np-2];
1566                     ReportMPIError (MPI_Send (storedMx->theData, storedMx->GetHDim(), MPI_DOUBLE, 0, HYPHY_MPI_DATA_TAG, MPI_COMM_WORLD), true);
1567                 }
1568             }
1569         }
1570     } else {    // perform single-threaded
1571 #else
1572 
1573 
1574 #if !defined __UNIX__ || defined __HEADLESS__
1575     TimerDifferenceFunction(false); // save initial timer; will only update every 1 second
1576 #if !defined __HEADLESS__
1577     SetStatusLine     (empty,_HYBgm_STATUS_LINE_CACHE, empty, 0, HY_SL_TASK|HY_SL_PERCENT);
1578 #else
1579     SetStatusLine     (_HYBgm_STATUS_LINE_CACHE);
1580 #endif
1581     hyFloat  seconds_accumulator = .0,
1582                 temp;
1583 #endif
1584 
1585     for (long node_id = 0; node_id < num_nodes; node_id++) {
1586         long        maxp        = max_parents.list_data[node_id];
1587         _List   *   this_list   = (_List *) node_scores.list_data[node_id]; // retrieve pointer to list of scores for this child node
1588 
1589         this_list->Clear();     // reset the list
1590 
1591         // prepare some containers
1592         _SimpleList parents;
1593         hyFloat  score = is_discrete.list_data[node_id] ? ComputeDiscreteScore (node_id, parents) : ComputeContinuousScore (node_id);
1594         _Constant   orphan_score (score);
1595 
1596         (*this_list) && (&orphan_score);
1597 
1598 #if !defined __UNIX__ || defined __HEADLESS__
1599         temp = .0;
1600 #endif
1601 
1602         if (maxp > 0) {
1603             _Matrix     single_parent_scores (num_nodes, 1, false, true);
1604             for (long par = 0; par < num_nodes; par++) {
1605                 if (par == node_id) {   // child cannot be its own parent, except in Kansas
1606                     continue;
1607                 }
1608 
1609                 parents << par;
1610                 single_parent_scores.Store (par, 0, is_discrete.list_data[node_id] ? ComputeDiscreteScore (node_id, parents) :
1611                                             ComputeContinuousScore (node_id));
1612                 parents.Clear();
1613             }
1614             (*this_list) && (&single_parent_scores);
1615         }
1616 
1617         if (maxp > 1) {
1618             _SimpleList     all_but_one (num_nodes-1, 0, 1),    // 0, 1, 2, ... , n-1
1619                             aux_list,
1620                             nk_tuple;
1621 
1622             for (long np = 2; np <= maxp; np++) {
1623                 _NTupleStorage  family_scores (num_nodes-1, np);
1624 
1625                 if (all_but_one.NChooseKInit (aux_list, nk_tuple, np, false)) {
1626                     bool    remaining;
1627                     long    res;
1628                     do {
1629                         remaining = all_but_one.NChooseK (aux_list, nk_tuple);
1630                         for (long par_idx = 0; par_idx < np; par_idx++) {
1631                             long par = nk_tuple.list_data[par_idx];
1632                             if (par >= node_id) {
1633                                 par++;
1634                             }
1635                             parents << par;
1636                         }
1637                         score = is_discrete.list_data[node_id]  ?   ComputeDiscreteScore (node_id, parents) :
1638                                 ComputeContinuousScore (node_id);
1639                         res = family_scores.Store (score, nk_tuple);
1640                         parents.Clear();
1641                     } while (remaining);
1642                 } else {
1643                     _String oops ("Failed to initialize _NTupleStorage object in Bgm::CacheNodeScores().\n");
1644                     WarnError(oops);
1645                 }
1646                 (*this_list) && (&family_scores);   // append duplicate to storage
1647             }
1648         }
1649 
1650 #if !defined __UNIX__ || defined __HEADLESS__
1651         if ((temp=TimerDifferenceFunction(true))>1.0) { // time to update
1652             seconds_accumulator += temp;
1653 
1654             _String statusLine = _HYBgm_STATUS_LINE_CACHE & " " & (node_id+1) & "/" & num_nodes
1655                                  & " nodes (" & (1.0+node_id)/seconds_accumulator & "/second)";
1656 
1657 #if defined __HEADLESS__
1658             SetStatusLine (statusLine);
1659 #else
1660             SetStatusLine (empty,statusLine,empty,100*(float)node_id/(num_nodes),HY_SL_TASK|HY_SL_PERCENT);
1661 #endif
1662             TimerDifferenceFunction (false); // reset timer for the next second
1663             yieldCPUTime (); // let the GUI handle user actions
1664 
1665             if (terminate_execution) { // user wants to cancel the analysis
1666                 break;
1667             }
1668         }
1669 #endif
1670 
1671     } // end for loop over nodes
1672 #endif
1673 
1674 
1675 #if defined __HYPHYMPI_ND__
1676     }
1677 #endif
1678 
1679 #if !defined __UNIX__ || defined __HEADLESS__
1680     SetStatusLine     (_HYBgm_STATUS_LINE_CACHE_DONE);
1681 #endif
1682 
1683     scores_cached = TRUE;
1684 
1685 
1686 }
1687 
1688 
1689 
1690 
1691 //___________________________________________________________________________________________
1692 #if defined __HYPHYMPI__
MPIReceiveScores(_Matrix * mpi_node_status,bool sendNextJob,long node_id)1693 void    Bgm::MPIReceiveScores (_Matrix * mpi_node_status, bool sendNextJob, long node_id)
1694 {
1695     _Matrix     single_parent_scores (num_nodes, 1, false, true);
1696     MPI_Status  status;
1697 
1698     ReportMPIError (MPI_Recv (single_parent_scores.theData, num_nodes, MPI_DOUBLE, MPI_ANY_SOURCE, HYPHY_MPI_DATA_TAG, MPI_COMM_WORLD, &status), false);
1699 
1700 
1701     long        senderID    = (long) status.MPI_SOURCE,
1702                 this_node    = (long) (*mpi_node_status) (senderID, 1),
1703                 maxp       = max_parents.list_data[this_node];
1704 
1705     _List   *   this_list   = (_List *) node_scores.list_data[this_node];
1706 
1707 
1708     mpi_node_status->Store (senderID, 0, 0);    // set node status to idle
1709 
1710 
1711     _String     mxString,
1712                 mxName;
1713 
1714     ReportWarning (_String("Received scores for child ") & this_node & " from node " & senderID);
1715 #ifdef __DEBUG_MPIRS__
1716     mxName = _String ("single_parent_scores");
1717     single_parent_scores.Serialize (mxString, mxName);
1718     ReportWarning (mxString);
1719 #endif
1720     (*this_list) && (&single_parent_scores);
1721 
1722 
1723 
1724     _SimpleList     parents,
1725                     all_but_one (num_nodes-1, 0, 1),
1726                     aux_list,
1727                     nk_tuple;
1728 
1729     hyFloat      score;
1730 
1731     // parse nk-tuple results
1732     for (long np = 2; np <= maxp; np++) {
1733         _NTupleStorage  family_scores (num_nodes-1, np);
1734         bool            remaining;
1735 
1736 
1737         if (all_but_one.NChooseKInit (aux_list, nk_tuple, np, false)) {
1738             long        score_index     = 0,
1739                         num_ktuples      = exp(LnGamma(num_nodes) - LnGamma(num_nodes - np) - LnGamma(np+1)),
1740                         ntuple_receipt;
1741 
1742             _Matrix     scores_to_store (num_ktuples, 1, false, true);
1743 
1744             // receive nk-tuple indexed node scores from same compute node
1745             ReportMPIError (MPI_Recv (scores_to_store.theData, num_ktuples, MPI_DOUBLE, senderID, HYPHY_MPI_DATA_TAG, MPI_COMM_WORLD, &status), false);
1746 #ifdef __DEBUG_MPIRS__
1747             mxName = _String("tuple_scores") & np;
1748             mxString.Initialize();
1749             scores_to_store.Serialize (mxString, mxName);
1750             ReportWarning (mxString);
1751 #endif
1752 
1753             do {
1754                 remaining = all_but_one.NChooseK (aux_list, nk_tuple);  // update nk-tuple in aux_list
1755                 ntuple_receipt = family_scores.Store (scores_to_store(score_index, 0), nk_tuple);
1756                 score_index++;
1757             } while (remaining);
1758         } else {
1759             _String oops ("Failed to initialize _NTupleStorage object in Bgm::CacheNodeScores().\n");
1760             WarnError(oops);
1761         }
1762 
1763         (*this_list) && (&family_scores);
1764     }
1765 
1766 
1767     // send next job
1768     if (sendNextJob) {
1769         ReportMPIError(MPI_Send(&node_id, 1, MPI_LONG, senderID, HYPHY_MPI_VARS_TAG, MPI_COMM_WORLD), true);
1770         mpi_node_status->Store (senderID, 0, 1);    // reset busy signal
1771         mpi_node_status->Store (senderID, 1, node_id);
1772         ReportWarning (_String ("Sent child ") & node_id & " to node " & senderID);
1773     }
1774 }
1775 #endif
1776 
1777 
1778 
1779 //___________________________________________________________________________________________
ExportGraph(void)1780 _Matrix *   Bgm::ExportGraph (void)
1781 {
1782     _Matrix * export_graph = new _Matrix ((_Matrix &)dag);  // duplicator
1783 
1784     return (_Matrix *) (export_graph->makeDynamic());
1785 }
1786 
1787 
1788 
1789 
1790 //___________________________________________________________________________________________
1791 //  Allocate memory for storing node scores and edge posteriors
1792 
InitComputeLists(_List * compute_list)1793 void    Bgm::InitComputeLists (_List * compute_list)
1794 {
1795     // adaptive storage for floating point numbers
1796     _GrowingVector *    newstore;
1797     checkPointer (newstore = new _GrowingVector);
1798 
1799     for (long i = 0; i < num_nodes * num_nodes; i++) {
1800         (*compute_list) && newstore;
1801     }
1802 
1803     DeleteObject (newstore);
1804 }
1805 
1806 
1807 
1808 //___________________________________________________________________________________________
1809 //  Free up memory allocated to cached node scores and edge posteriors
1810 
DumpComputeLists(_List * compute_list)1811 void        Bgm::DumpComputeLists (_List * compute_list)
1812 {
1813     for (long i = 0; i < compute_list->lLength; i++) {
1814         ((_GrowingVector *) compute_list->list_data[i]) -> Clear();
1815     }
1816 
1817     compute_list->Clear();
1818 }
1819 
1820 
1821 
1822 //___________________________________________________________________________________________
Compute(_SimpleList * node_order,_List * results)1823 hyFloat  Bgm::Compute (_SimpleList * node_order, _List * results)
1824 {
1825     /*  Calculate equation (8) from Friedman and Koller (2003), i.e. joint probability
1826         of data by summing all families at i-th node that are consistent with node order,
1827         then taking product across all nodes in the network.  Traverse using AVL indexing.
1828 
1829         Node order is a vector of length [num_nodes] containing rank of each node, i.e.
1830         a position in an ordered sequence.  In addition, compute marginal posteriors of
1831         each potential edge in the graph, by Proposition 3.2.                               */
1832 
1833 
1834 
1835     hyFloat          log_likel   = 0.;
1836     _GrowingVector      *gv1, *gv2;
1837 
1838 
1839     // reset _GrowingVector objects stored in _List object
1840     for (long i = 0; i < num_nodes * num_nodes; i++) {
1841         gv1 = (_GrowingVector *) results->list_data[i];
1842         gv1 -> ZeroUsed();
1843     }
1844 
1845 
1846     for (long nodeIndex = 0; nodeIndex < node_order->lLength; nodeIndex++) {
1847         long                child_node      = node_order->list_data[nodeIndex],
1848                             maxp            = max_parents.list_data[child_node];
1849 
1850         _List           *   score_lists     = (_List *) node_scores.list_data[child_node];
1851         _Constant       *   orphan_score    = (_Constant *) (score_lists->list_data[0]);
1852 
1853 
1854         gv1 = (_GrowingVector *) results->list_data[child_node * num_nodes + child_node];   // store denominator in diagonal
1855         gv1->ZeroUsed();
1856         gv1 -> Store (orphan_score->Value());   // handle case of no parents
1857 
1858 
1859 
1860         if (maxp > 0) {
1861             // all nodes to the right are potential parents, except banned parents!
1862             _SimpleList     precedes;
1863             for (long parIndex = nodeIndex + 1; parIndex < node_order->lLength; parIndex++) {
1864                 long    par = node_order->list_data[parIndex];
1865 
1866                 if (banned_edges(par, child_node) == 0) {
1867                     precedes << par;
1868                 }
1869             }
1870 
1871 
1872             // handle trivial case of one parent
1873             _Matrix *   single_parent_scores    = (_Matrix *) (score_lists->list_data[1]);
1874 
1875             for (long i = 0; i < precedes.lLength; i++) {
1876                 long    par = precedes.list_data[i];
1877 
1878                 gv1 -> Store ((*single_parent_scores) (par, 0));
1879                 gv2 = (_GrowingVector *) results->list_data[child_node * num_nodes + par];
1880                 gv2 -> Store ((*single_parent_scores) (par, 0));
1881             }
1882 
1883 
1884             // more than one parent requires k-tuples
1885             if (maxp > 1) {
1886                 _SimpleList         indices (precedes.lLength, 0, 1);   // populates list with 0, 1, 2, ..., M-1
1887                 // where M is the number of eligible parents in [precedes]
1888                 _NTupleStorage *    family_scores;
1889 
1890                 for (long nparents = 2; nparents <= maxp; nparents++) {
1891                     _SimpleList     subset,
1892                                     auxil;
1893 
1894                     bool            not_finished;
1895 
1896 
1897                     if (nparents > precedes.lLength) {  // not enough eligible parents to form tuples!
1898                         break;
1899                     }
1900 
1901 
1902                     if (indices.NChooseKInit (auxil, subset, nparents, false)) {
1903                         hyFloat      tuple_score;
1904                         _SimpleList     parents;
1905 
1906                         parents.Populate (nparents, 0, 0);  // allocate memory
1907 
1908                         family_scores = (_NTupleStorage *) (score_lists->list_data[nparents]);
1909 
1910 
1911                         do {
1912                             //parents.Clear();
1913                             not_finished = indices.NChooseK (auxil, subset);    // cycle through index combinations
1914 
1915 
1916                             for (long i = 0; i < nparents; i++) {   // convert indices to parent IDs (skipping child)
1917                                 long    realized = precedes.list_data[subset.list_data[i]];
1918                                 if (realized >= child_node) {
1919                                     realized--;
1920                                 }
1921                                 parents.list_data[i] = realized;
1922                             }
1923                             parents.Sort(TRUE);
1924 
1925                             tuple_score = family_scores -> Retrieve (parents);
1926 
1927                             gv1 -> Store (tuple_score);
1928 
1929                             for (long i = 0; i < nparents; i++) {
1930                                 gv2 = (_GrowingVector *) results->list_data[child_node * num_nodes + precedes.list_data[subset.list_data[i]]];
1931                                 gv2 -> Store (tuple_score);
1932                             }
1933                         } while (not_finished);
1934                     }
1935                 }
1936             }
1937         }
1938 
1939         gv1 -> _Matrix::Store (0, 0, LogSumExpo(gv1));  // replace first entry with sum, i.e. marginal log-likelihood of child node
1940         log_likel += (*gv1)(0, 0);
1941 
1942     }
1943     // end loop over child nodes
1944 
1945     return log_likel;
1946 }
1947 
1948 
1949 
1950 //___________________________________________________________________________________________
Compute(void)1951 hyFloat Bgm::Compute (void)
1952 {
1953     //CacheNodeScores();
1954 
1955     // return posterior probability of a given network defined by 'dag' matrix
1956     hyFloat  log_score = 0.;
1957 
1958     for (long node_id = 0; node_id < num_nodes; node_id++) {
1959         log_score += is_discrete.list_data[node_id] ? ComputeDiscreteScore (node_id) : ComputeContinuousScore (node_id);
1960     }
1961 
1962     return log_score;
1963 }
1964 
1965 
1966 
1967 //___________________________________________________________________________________________
Compute(_Matrix * g)1968 hyFloat Bgm::Compute (_Matrix * g)
1969 {
1970     //CacheNodeScores();
1971 
1972     // return posterior probability of a given network defined by 'dag' matrix
1973     hyFloat  log_score = 0.;
1974 
1975     for (long node_id = 0; node_id < num_nodes; node_id++) {
1976         log_score += is_discrete.list_data[node_id] ? ComputeDiscreteScore (node_id, g) : ComputeContinuousScore (node_id, g);
1977     }
1978 
1979     return log_score;
1980 }
1981 
1982 
1983 
1984 
1985 //___________________________________________________________________________________________
Optimize(_AssociativeList const * options)1986 _Matrix *   Bgm::Optimize (_AssociativeList const * options) {
1987 #ifdef DEBUG_OPTIMIZE
1988     char        buf [255];
1989 #endif
1990 
1991     if (!scores_cached) {
1992         CacheNodeScores();
1993     }
1994 
1995 
1996     hyFloat  num_restarts,           // HBL settings
1997                 num_randomize;
1998 
1999     checkParameter (maxNumRestart, num_restarts, 1.);
2000     checkParameter (numRandomize, num_randomize, num_nodes);
2001     //checkParameter (useNodeOrder, use_node_order, 0);
2002 
2003 
2004 
2005     // char     bug [255];
2006     hyFloat      optMethod;  /* 0 = K2 fixed order; 1 = K2 shuffle order with restarts */
2007     /* 2 = MCMC fixed order; 3 = MCMC over networks and orders */
2008     checkParameter (bgmOptimizationMethod, optMethod, 0.);
2009 
2010     if (optMethod > 1) {
2011         return GraphMCMC( (optMethod < 3) ? TRUE : FALSE);      // use MCMC to evaluate posterior probability of network space
2012     }
2013 
2014 
2015 
2016     hyFloat  this_score, best_score, next_score;
2017 
2018     _Matrix     orderMx (num_nodes, num_nodes, false, true),
2019                 best_dag (num_nodes, num_nodes, false, true);
2020 
2021     bool        reshuffle_order = FALSE;
2022 
2023 
2024     // if best node order hasn't been estimated,
2025     if (optMethod == 1 && best_node_order.lLength == 0) {
2026         reshuffle_order = TRUE;
2027         best_node_order.Populate (num_nodes, 0, 1);
2028         best_node_order.Permute (1);
2029     }
2030 
2031 
2032     //  Convert node order to binary matrix where edge A->B is permitted if
2033     //  orderMx[B][A] = 1, i.e. B is to the right of A in node order.
2034     for (long i = 0; i < num_nodes; i++) {
2035         long    child = best_node_order.list_data[i];
2036 
2037         for (long j = 0; j < num_nodes; j++) {
2038             long    parent = best_node_order.list_data[j];
2039 
2040             orderMx.Store (parent, child, (j > i) ? 1 : 0);
2041         }
2042     }
2043 
2044 
2045     // greedy hill-climbing algorithm (K2)
2046     ResetGraph (nil);
2047     best_score = Compute();     // best over all node orders (if we're reshuffling)
2048 
2049 
2050     for (long iter = 0; iter < num_restarts; iter++) {
2051         next_score = Compute();     // reset to empty graph score
2052 
2053 
2054         for (long child = 0; child < num_nodes; child++) {
2055             long        num_parents = 0,
2056                         improvement_flag = 0,
2057                         next_parent_to_add;
2058 
2059             do {
2060                 for (long parent = 0; parent < num_nodes; parent++) {
2061                     if ( (parent != child)                      // must meet all conditions!
2062                             && (dag(parent, child) == 0)
2063                             && (orderMx (parent, child) == 1)
2064                             && banned_edges(parent, child) == 0) {
2065                         dag.Store (parent, child, 1);
2066                         this_score = Compute();
2067 
2068                         if (this_score > next_score) {
2069                             improvement_flag    = 1;
2070                             next_score          = this_score;
2071                             next_parent_to_add  = parent;
2072                         }
2073                         dag.Store (parent, child, 0);   // revert
2074                     }
2075                 }
2076 
2077                 if (improvement_flag) { // adding another parent improves network score
2078                     dag.Store (next_parent_to_add, child, 1);
2079                     num_parents++;
2080                     improvement_flag = 0;   // reset for next parent
2081                 } else {
2082                     break;  // unable to improve further
2083                 }
2084             } while (num_parents < max_parents.list_data[child]);
2085         }
2086 
2087 
2088         if (reshuffle_order) {
2089             this_score = Compute();
2090 
2091             if (this_score > best_score) {
2092                 best_score = this_score;
2093                 best_dag = (_Matrix &) dag;     // store graph optimized from the current ordering
2094             }
2095 
2096             ResetGraph (nil);
2097 
2098             best_node_order.Permute (1);
2099 
2100             for (long i = 0; i < num_nodes; i++) {
2101                 long    child = best_node_order.list_data[i];
2102                 for (long j = 0; j < num_nodes; j++) {
2103                     long    parent = best_node_order.list_data[j];
2104                     orderMx.Store (parent, child, (j > i) ? 1 : 0);
2105                 }
2106             }
2107         } else {
2108             break;  // only one iteration when node ordering is known
2109         }
2110     }
2111 
2112     if (reshuffle_order) {
2113         dag = (_Matrix &) best_dag;
2114         best_node_order.Clear();    // reset to initial state
2115     }
2116 
2117 
2118     _Matrix * result = new _Matrix(num_nodes * num_nodes, 2, false, true);
2119     result->Store (0, 0, Compute());
2120 
2121     for (long row = 0; row < num_nodes; row++) {
2122         for (long col = 0; col < num_nodes; col++) {
2123             result->Store (row*num_nodes+col, 1, dag(row, col));
2124         }
2125     }
2126 
2127     return (_Matrix*)(result->makeDynamic());
2128 }
2129 
2130 
2131 
2132 //___________________________________________________________________________________________
GraphMCMC(bool fixed_order)2133 _Matrix *   Bgm::GraphMCMC (bool fixed_order)
2134 {
2135     _Matrix     *   proposed_graph  = new _Matrix (num_nodes, num_nodes, false, true),
2136     *   orderMx         = new _Matrix (num_nodes, num_nodes, false, true);
2137 
2138     _Matrix         current_graph (num_nodes, num_nodes, false, true),
2139                     best_graph (num_nodes, num_nodes, false, true);
2140 
2141     hyFloat      mcmc_steps, mcmc_burnin, mcmc_samples,
2142                     current_score, proposed_score, best_score,
2143                     num_randomize,
2144                     lk_ratio;
2145 
2146     long            sampling_interval;
2147 
2148     _SimpleList *   proposed_order  = new _SimpleList();
2149     _SimpleList     current_order;
2150 
2151 
2152     // parse HBL settings
2153     checkParameter (mcmcSteps, mcmc_steps, 0);
2154     if (mcmc_steps == 0) {
2155         _String oops ("You asked HyPhy to run MCMC with zero steps in the chain! Did you forget to set Bgm_MCMC_STEPS?\n");
2156         WarnError (oops);
2157     }
2158 
2159     checkParameter (mcmcBurnin, mcmc_burnin, 0);
2160     checkParameter (mcmcSamples, mcmc_samples, 0);
2161     if (mcmc_samples == 0) {
2162         _String oops ("You're asking me to run MCMC without reporting any results.  Did you forget to set Bgm_MCMC_SAMPLES?\n");
2163         WarnError (oops);
2164     }
2165 
2166     sampling_interval = (long) mcmc_steps / (long) mcmc_samples;
2167 
2168     checkParameter (numRandomize, num_randomize, num_nodes*num_nodes);
2169 
2170 
2171     //  Contents:   (1) chain trace;
2172     //              (2) model-averaged edge probabilities;
2173     //              (3) best graph;
2174     _Matrix     * result;
2175 
2176     checkPointer (result = new _Matrix ( (mcmc_samples > num_nodes*num_nodes) ? mcmc_samples : num_nodes*num_nodes, 3, false, true));
2177 
2178 
2179 
2180     //  Set current graph to member object (dag, an empty graph by default)
2181     ResetGraph (proposed_graph);
2182 
2183     //  does this graph conform to enforced/banned edges, maximum parentage settings?
2184     _SimpleList pars;
2185     for (long chi = 0; chi < num_nodes; chi++) {
2186         pars.Clear();
2187         for (long par = 0; par < num_nodes; par++) {
2188             if ( (*proposed_graph)(par,chi) == 1) {
2189                 if ( banned_edges(par,chi) ) {
2190                     proposed_graph->Store(par, chi, 0);
2191                     ReportWarning(_String("Deleted banned edge ") & par & "->" & chi & " from graph");
2192                 }
2193                 pars << par;
2194             } else if ( (*proposed_graph)(par,chi) == 0 && enforced_edges(par,chi) ) {
2195                 proposed_graph->Store(par, chi, 1);
2196                 ReportWarning(_String("Restored enforced edge ") & par & "->" & chi & " to graph");
2197             }
2198         }
2199 
2200         if (pars.lLength > max_parents.list_data[chi]) {
2201             ReportWarning(_String("Number of parents (") & (long)pars.lLength & ")exceed maximum setting for child node " & chi & ": " & max_parents.list_data[chi] & "\n");
2202             pars.Permute(1);
2203             for (long p = 0; p < (long)pars.lLength - max_parents.list_data[chi]; p++) {
2204                 proposed_graph->Store(pars.list_data[p], chi, 0);
2205                 ReportWarning(_String("Deleted excess edge ") & pars.list_data[p] & "->" & chi & " from graph");
2206             }
2207         }
2208     }
2209 
2210 
2211 
2212     if (fixed_order) {
2213         // coerce graph to conform to user-defined node order
2214         if (best_node_order.lLength == 0) {
2215             _String oops ("Cannot run fixed-order graph MCMC without defined order (via order-MCMC). Run CovarianceMatrix(receptacle, BGM) first.");
2216             WarnError (oops);
2217 
2218             result = new _Matrix();     // return an empty matrix
2219         } else {
2220             proposed_order->Populate(num_nodes, 0, 0);  // allocate memory
2221             for (long i = 0; i < num_nodes; i++) {
2222                 proposed_order->list_data[i] = best_node_order.list_data[i];
2223             }
2224 
2225             ReportWarning (_String("Transferring best node order to proposal: ") & (_String *)proposed_order->toStr());
2226         }
2227     } else {
2228         if (best_node_order.lLength == 0) {
2229             // quickly generate a node order from graph
2230             for (long order_index, onode, node = 0; node < num_nodes; node++) {
2231                 // locate the lowest-ranked parent in the order, if any
2232                 for (order_index = 0; order_index < proposed_order->lLength; order_index++) {
2233                     onode = proposed_order->list_data[order_index];
2234                     if ((*proposed_graph) (onode, node)) {
2235                         // insert new node immediately left of this parent
2236                         proposed_order->InsertElement ((BaseRef) node, order_index, false, false);
2237                         break;
2238                     }
2239                 }
2240                 // if reached end of order list without finding a parent, push node onto start of list
2241                 if (order_index == proposed_order->lLength) {
2242                     proposed_order->InsertElement ((BaseRef) node, 0, false, false);
2243                 }
2244             }
2245             ReportWarning(_String("Constructed node order from graph:\n") & (_String *)proposed_order->toStr() & "\n");
2246         } else {
2247             // restore order
2248             proposed_order->Populate(num_nodes,0,1);
2249             for (long i = 0; i < num_nodes; i++) {
2250                 proposed_order->list_data[i] = best_node_order.list_data[i];
2251             }
2252         }
2253     }
2254 
2255 
2256     // randomize graph
2257     if (num_randomize > 0) {
2258         RandomizeGraph (proposed_graph, proposed_order, (long)num_randomize, fixed_order);
2259     } else {
2260         ReportWarning(_String("NUM_RANDOMIZE set to 0, skipping initial RandomizeGraph()\n"));
2261     }
2262 
2263 
2264 
2265     //  Populate order matrix -- and while we're at it, set proposed order to current order
2266     for (long i = 0; i < num_nodes; i++) {
2267         long    child = proposed_order->list_data[i];
2268 
2269         for (long j = 0; j < num_nodes; j++) {
2270             //  if A precedes B, then set entry (A,B) == 1, permitting edge A->B
2271             orderMx->Store (proposed_order->list_data[j], child, (j > i) ? 1 : 0);
2272         }
2273     }
2274     ReportWarning(_String("Populated order matrix\n"));
2275 
2276 
2277     // status line
2278 #if !defined __UNIX__ || defined __HEADLESS__
2279     long    updates = 0;
2280     TimerDifferenceFunction (false);
2281 #if defined __HEADLESS__
2282     SetStatusLine (_HYBgm_STATUS_LINE_MCMC);
2283 #else
2284     SetStatusLine (empty, _HYBgm_STATUS_LINE_MCMC, empty, 0, HY_SL_TASK|HY_SL_PERCENT);
2285 #endif
2286 #endif
2287 
2288 
2289 
2290 
2291 
2292     current_order = (*proposed_order);
2293     current_graph = (_Matrix &) (*proposed_graph);
2294     best_graph = (_Matrix &) (*proposed_graph);
2295     best_score = proposed_score = current_score = Compute(proposed_graph);
2296 
2297     ReportWarning(_String("Initiating MCMC with graph:\n") & (_String *) current_graph.toStr() );
2298 
2299 
2300     // MAIN LOOP
2301     for (long step = 0; step < mcmc_steps + mcmc_burnin; step++) {
2302         RandomizeGraph (proposed_graph, proposed_order, 1, fixed_order);
2303 
2304         proposed_score = Compute(proposed_graph);
2305 
2306 
2307 #ifdef __DEBUG_GMCMC__
2308         snprintf (bug, sizeof(bug), "Current score = %f\n", current_score);
2309         BufferToConsole (bug);
2310         snprintf (bug, sizeof(bug), "Propose graph:\n");
2311         BufferToConsole (bug);
2312         PrintGraph (proposed_graph);
2313         snprintf (bug, sizeof(bug), "Proposed score = %f\n", proposed_score);
2314         BufferToConsole (bug);
2315 #endif
2316 
2317 
2318         lk_ratio = exp(proposed_score - current_score);
2319 
2320         if (lk_ratio > 1. || genrand_real2() < lk_ratio) {  // Metropolis-Hastings
2321 #ifdef __DEBUG_GMCMC__
2322             snprintf (bug, sizeof(bug), "Accept move\n");
2323             BufferToConsole (bug);
2324 #endif
2325             current_graph       = (_Matrix &) (*proposed_graph);        // accept proposed graph
2326             current_score   = proposed_score;
2327 
2328             for (long foo = 0; foo < num_nodes; foo++) {
2329                 current_order.list_data[foo] = proposed_order->list_data[foo];
2330             }
2331 
2332             if (current_score > best_score) {
2333                 best_score = current_score;
2334                 best_graph = (_Matrix &) current_graph;         // keep track of best graph ever visited by chain
2335 
2336 #ifdef __DEBUG_GMCMC__
2337                 PrintGraph (&best_graph);
2338 
2339                 snprintf (bug, sizeof(bug), "Update best node ordering: ");
2340                 BufferToConsole (bug);
2341                 for (long deb = 0; deb < num_nodes; deb++) {
2342                     snprintf (bug, sizeof(bug), "%d ", current_order.list_data[deb]);
2343                     BufferToConsole (bug);
2344                 }
2345                 NLToConsole ();
2346 #endif
2347             }
2348         } else {
2349 #ifdef __DEBUG_GMCMC__
2350             snprintf (bug, sizeof(bug), "Reject move\n");
2351             BufferToConsole (bug);
2352 #endif
2353             // revert proposal
2354             for (long row = 0; row < num_nodes; row++) {
2355                 proposed_order->list_data[row] = current_order.list_data[row];
2356 
2357                 for (long col = 0; col < num_nodes; col++) {
2358                     proposed_graph->Store(row, col, current_graph(row, col));
2359                 }
2360             }
2361         }
2362 
2363 
2364 
2365         if (step >= mcmc_burnin) {  // handle output
2366             if ( (step-(long)mcmc_burnin) % sampling_interval == 0) {
2367                 long    entry   = (long int) ((step-mcmc_burnin) / sampling_interval);
2368 
2369                 result->Store (entry, 0, current_score);        // update chain trace
2370 
2371                 for (long row = 0; row < num_nodes; row++) {    // update edge tallies
2372                     for (long offset=row*num_nodes, col = 0; col < num_nodes; col++) {
2373                         // row = parent, col = child
2374                         result->Store (offset+col, 1, (*result)(offset+col,1) + current_graph(row, col));
2375                     }
2376                 }
2377             }
2378         }
2379 
2380 #if !defined __UNIX__ || defined __HEADLESS__
2381         if (TimerDifferenceFunction(true)>1.0) { // time to update
2382             updates ++;
2383             _String statusLine = _HYBgm_STATUS_LINE_MCMC & " " & (step+1) & "/" & (mcmc_steps + mcmc_burnin)
2384                                  & " steps (" & (1.0+step)/updates & "/second)";
2385 #if defined __HEADLESS__
2386             SetStatusLine     (statusLine);
2387 #else
2388             SetStatusLine     (empty,statusLine,empty,100*step/(mcmc_steps + mcmc_burnin),HY_SL_TASK|HY_SL_PERCENT);
2389 #endif
2390             TimerDifferenceFunction(false); // reset timer for the next second
2391             yieldCPUTime(); // let the GUI handle user actions
2392             if (terminate_execution) { // user wants to cancel the analysis
2393                 break;
2394             }
2395         }
2396 #else
2397         if (step % (long)((mcmc_steps + mcmc_burnin)/100) == 0) {
2398             ReportWarning (_String ("GraphMCMC at step ") & step & " of " & (mcmc_steps+mcmc_burnin) & " with posterior " & current_score & " and graph:\n" & (_String *)current_graph.toStr() & " and order:\n" & (_String *)current_order.toStr() & "\n");
2399         }
2400 #endif
2401     }
2402 
2403     // convert edge tallies to frequencies, and report best graph
2404     for (long row = 0; row < num_nodes; row++) {
2405         for (long offset=row*num_nodes, col = 0; col < num_nodes; col++) {
2406             result->Store (offset+col, 1, (*result)(offset+col,1)/mcmc_samples);
2407             result->Store (offset+col, 2, best_graph(row, col));
2408         }
2409     }
2410 
2411 
2412     // set dag member to last visit graph
2413     dag = (_Matrix &) current_graph;
2414     best_node_order = current_order;
2415 
2416     delete (proposed_graph);
2417     delete (orderMx);
2418     delete (proposed_order);
2419 
2420     return (_Matrix *) (result->makeDynamic());
2421 }
2422 
2423 
2424 
2425 //___________________________________________________________________________________________
2426 //  DEPRECATED
TryEdge(long child,long parent,long operation,hyFloat old_score)2427 hyFloat  Bgm::TryEdge (long child, long parent, long operation, hyFloat old_score)
2428 {
2429 #ifdef _NEVER_DEFINED_
2430     hyFloat  log_score,
2431                 last_state1 = dag (child, parent),
2432                 last_state2 = dag (parent, child);
2433 
2434     /*
2435     if (is_marginal)    // modification affects only child node, don't recalculate other scores
2436         log_marginal = is_discrete(child,0) ? ComputeDiscreteScore (child) : ComputeContinuousScore (child);
2437      */
2438 
2439     switch (operation) {
2440     case 0:
2441         if (MarginalSum (FALSE, child) < max_parents && banned_edges(parent,child) == 0) {
2442             dag.Store (parent, child, 1.);  // add an edge [parent] --> [child]
2443             dag.Store (child, parent, 0.);
2444         } else {
2445             return old_score;
2446         }
2447         break;
2448 
2449     case 1: // reverse an edge
2450         if (MarginalSum (FALSE, parent) < max_parents && banned_edges(child,parent) == 0) {
2451             dag.Store (child, parent, last_state2); // switch states
2452             dag.Store (parent, child, last_state1);
2453         } else {
2454             return old_score;
2455         }
2456         break;
2457 
2458     case 2: // delete an edge
2459         dag.Store (child, parent, 0.);
2460         dag.Store (parent, child, 0.);
2461         break;
2462 
2463     default:
2464         return old_score;
2465         break;
2466     }
2467 
2468     // PrintGraph(nil);
2469 
2470     if (IsCyclic()) {   // new graph is cyclic, reject
2471         dag.Store (child, parent, last_state1);
2472         dag.Store (parent, child, last_state2);
2473         return old_score;
2474     }
2475 
2476 
2477     /*
2478     if (is_marginal)        // update one term only
2479         log_score = old_score - log_marginal + (is_discrete(child,0) ? ComputeDiscreteScore (child) : ComputeContinuousScore (child));
2480     else
2481         log_score = Compute();
2482      */
2483 
2484 
2485     log_score = Compute();
2486 
2487     if (log_score > old_score) {    // keep new graph
2488         return log_score;
2489     } else {
2490         // restore previous settings
2491         dag.Store (child, parent, last_state1);
2492         dag.Store (parent, child, last_state2);
2493     }
2494 
2495     return old_score;
2496 #endif
2497     return 0;
2498 }
2499 
2500 
2501 
2502 
2503 //___________________________________________________________________________________________
2504 //  Calculating equation (8) of Friedman and Koller (2003) requires the term:
2505 //      log ( Sum ( score(x_i, U | D) ) )
2506 //  where x_i are very small numbers stored as log(x_i)'s but taking the exponential of
2507 //  the log values can result in numerical underflow.
2508 
LogSumExpo(_GrowingVector * log_values)2509 hyFloat      Bgm::LogSumExpo (_GrowingVector * log_values)
2510 {
2511     long        size            = log_values->GetUsed();
2512     hyFloat  sum_exponents   = 0.;
2513 
2514 
2515     // handle some trivial cases
2516     if (size == 0) {
2517         return 0.;    // log(exp(0)) = log(1) = 0
2518     } else if (size == 1) {
2519         return (*log_values)(0,0);    // log(exp(log(x)))
2520     }
2521 
2522 
2523     // find the largest (least negative) log-value
2524     hyFloat      max_log  = (*log_values) (0, 0),
2525                     this_log;
2526 
2527     for (long val = 1; val < size; val++) {
2528         this_log = (*log_values) (val, 0);
2529 
2530         if (this_log > max_log) {
2531             max_log = this_log;
2532         }
2533     }
2534 
2535     // go back through log values and increment by max log value
2536     //      This will cause some underflow for the smallest values, but we can
2537     //  use this approximation to handle very large ranges of values.
2538     //  NOTE: subtracting a negative value.
2539     for (long val = 0; val < size; val++) {
2540         sum_exponents += exp( (*log_values) (val, 0) - max_log );
2541     }
2542 
2543     return (log(sum_exponents) + max_log);
2544 }
2545 
2546 
2547 //___________________________________________________________________________________________
2548 //  Markov chain Monte Carlo for Bgm
CovarianceMatrix(_SimpleList * unused)2549 _PMathObj Bgm::CovarianceMatrix (_SimpleList * unused)
2550 {
2551     //char  bug [255];
2552 
2553 
2554     hyFloat      mcmc_steps,
2555                     mcmc_burnin,
2556                     mcmc_samples,
2557                     mcmc_nchains,
2558                     mcmc_temp;
2559 
2560     _SimpleList *   node_order;
2561 
2562     if (last_node_order.lLength == 0) {
2563         node_order = new _SimpleList (num_nodes, 0, 1);     // populate with series (0, 1, ..., num_nodes)
2564 
2565 
2566         node_order->Permute(1);     // initialize with randomized node ordering
2567 
2568     } else {
2569         node_order = new _SimpleList (last_node_order);
2570     }
2571 
2572 
2573 
2574     _Matrix *       mcmc_output;
2575 
2576 
2577     // acquisition of HBL arguments with a few sanity checks
2578     checkParameter (mcmcSteps, mcmc_steps, 0);
2579     checkParameter (mcmcBurnin, mcmc_burnin, 0);
2580     checkParameter (mcmcSamples, mcmc_samples, 0);
2581     checkParameter (mcmcNumChains, mcmc_nchains, 1);
2582     checkParameter (mcmcTemperature, mcmc_temp, 1);
2583 
2584     if (mcmc_steps == 0) {
2585         _String oops ("You asked HyPhy to run MCMC with zero steps in the chain! Did you forget to set Bgm_MCMC_STEPS?\n");
2586         WarnError (oops);
2587     }
2588 
2589     if (mcmc_samples == 0) {
2590         _String oops ("You're asking me to run MCMC without reporting any results.  Did you forget to set Bgm_MCMC_SAMPLES?\n");
2591         WarnError (oops);
2592     }
2593 
2594 
2595 
2596     // allocate space for output
2597     if (mcmc_nchains == 1) {
2598         mcmc_output = RunColdChain (node_order, (long) mcmc_burnin, -1);    // negative argument indicates no sampling
2599         delete (mcmc_output);       // discard burn-in
2600 
2601         mcmc_output = RunColdChain (node_order, (long) mcmc_steps, (long int) (mcmc_steps / mcmc_samples) );
2602     } else {    // run coupled MCMC
2603         node_order->Permute(1);
2604         // RunHotChain (node_order, (long)
2605 
2606         /* STILL UNDER DEVELOPMENT! */
2607     }
2608 
2609     DeleteObject (node_order);
2610 
2611     return mcmc_output;
2612 }
2613 
2614 
2615 
2616 //___________________________________________________________________________________________
RunColdChain(_SimpleList * current_order,long nsteps,long sample_lag)2617 _Matrix *   Bgm::RunColdChain (_SimpleList * current_order, long nsteps, long sample_lag)
2618 {
2619     /*  Execute Metropolis sampler using a swap of two nodes in an ordered sequence
2620         as a proposal function.  The posterior probabilities of edges in the network are
2621         stored as a member matrix [edge_posteriors].  Note that the total number of
2622         possible orderings (i.e. permutations of a sequence of length N) is factorial(N),
2623         and can possibly be computed exactly for N < 8.                                     */
2624 
2625 #ifdef DEBUG_RCC
2626     char        buf [255];
2627 #endif
2628 
2629     long        row,
2630                 first_node, second_node,
2631                 mcmc_samples = (long int) (nsteps / sample_lag);
2632 
2633     _SimpleList proposed_order;
2634 
2635     _Matrix *   mcmc_output     = new _Matrix (mcmc_samples > (num_nodes*num_nodes) ? mcmc_samples : (num_nodes*num_nodes), 4, false, true);
2636     //  Contents:   (1) chain trace; (2) edge marginal posterior probabilities;
2637     //              (3) best node ordering; (4) last node ordering (for restarting chains).
2638 
2639     _List   *   clist           = new _List ();     // compute list
2640 
2641 
2642     InitComputeLists (clist);       // storage for marginal node- and edge-scores
2643 
2644 
2645 
2646     hyFloat  lk_ratio,
2647                 prob_current_order  = Compute (current_order, clist),
2648                 prob_proposed_order = 0.,
2649                 best_prob = prob_current_order;
2650 
2651 
2652     /* SLKP 20070926
2653         Add user feedback via the console window status bar
2654     */
2655     VerbosityLevel();
2656     long howManyTimesUpdated = 0; // how many times has the line been updated; is the same as the # of seconds
2657     TimerDifferenceFunction(false); // save initial timer; will only update every 1 second
2658 #ifdef __HEADLESS__
2659     SetStatusLine (_HYBgm_STATUS_LINE_MCMC & (sample_lag < 0 ? _String(" burnin"):empty));
2660 #else
2661 #ifndef __UNIX__
2662     SetStatusLine     (empty,_HYBgm_STATUS_LINE_MCMC & (sample_lag < 0 ? _String(" burnin"):empty),empty,0,HY_SL_TASK|HY_SL_PERCENT);
2663 #else
2664     _String         * progressReportFile = NULL;
2665     _Variable       * progressFile = CheckReceptacle (&optimizationStatusFile, empty, false);
2666 
2667     if (progressFile->ObjectClass () == STRING) {
2668         progressReportFile = ((_FString*)progressFile->Compute())->theString;
2669     }
2670     ConsoleBGMStatus (_HYBgm_STATUS_LINE_MCMC & (sample_lag < 0 ?  _String(" burnin"):empty), -1., progressReportFile);
2671 #endif
2672 #endif
2673 
2674     /* SLKP */
2675 
2676 
2677     best_node_order = (_SimpleList &) (*current_order);     // initialize class member variable
2678 
2679 #ifdef DEBUG_RCC
2680     snprintf (buf, sizeof(buf), "Initial node order (log L = %f): ", prob_current_order);
2681     BufferToConsole (buf);
2682     for (long i = 0; i < num_nodes; i++) {
2683         snprintf (buf, sizeof(buf), "%d ", best_node_order.list_data[i]);
2684         BufferToConsole (buf);
2685     }
2686     NLToConsole();
2687 #endif
2688 
2689     for (long step = 0; step < nsteps; step++) {
2690         // copy contents of current order to proposed ordering for permutation
2691         proposed_order.Clear();
2692         proposed_order.Duplicate (current_order);
2693 
2694 
2695         // swap random nodes in ordered sequence
2696         first_node  = genrand_int32() % num_nodes;
2697         second_node = genrand_int32() % num_nodes;
2698 
2699         while (first_node == second_node) { // keep sampling until pick different node
2700             second_node = genrand_int32() % num_nodes;
2701         }
2702 
2703         proposed_order.Swap (first_node, second_node);
2704 
2705 
2706         // calculate probability of proposed order --- edge posterior probs loaded into member variable
2707         prob_proposed_order = Compute (&proposed_order, clist);
2708 
2709 #ifdef DEBUG_RCC
2710         snprintf (buf, sizeof(buf), "Proposed node order (log L = %f): ", prob_proposed_order);
2711         BufferToConsole (buf);
2712         for (long i = 0; i < num_nodes; i++) {
2713             snprintf (buf, sizeof(buf), "%d ", proposed_order.list_data[i]);
2714             BufferToConsole (buf);
2715         }
2716         NLToConsole();
2717 #endif
2718 
2719 
2720         lk_ratio    = exp(prob_proposed_order - prob_current_order);
2721 
2722         if (lk_ratio > 1. || genrand_real2() < lk_ratio) {  // then set current to proposed order
2723             (*current_order).Clear();
2724             (*current_order) << proposed_order;
2725             prob_current_order = prob_proposed_order;
2726 
2727 #ifdef DEBUG_RCC
2728             snprintf (buf, sizeof(buf), "Accept\n");
2729             BufferToConsole (buf);
2730 #endif
2731 
2732             if (prob_proposed_order > best_prob) {
2733                 best_prob = prob_proposed_order;        // update best node ordering
2734                 best_node_order = proposed_order;
2735             }
2736         }
2737 
2738 
2739         if (sample_lag >= 0) {
2740             // write Markov chain state to output matrix
2741 
2742             if (step % sample_lag == 0 && step > 0) {   // AFYP 2011/3/2 added this second conditional because the zero-th step was getting recorded
2743                 _GrowingVector *    gv;
2744                 hyFloat          denom;
2745 
2746                 row = (long int) (step / sample_lag) - 1;   // AFYP 2011/3/2 need to shift over other info to maintain 0-index
2747 
2748                 // report log likelhood
2749                 mcmc_output->Store (row, 0, prob_current_order);
2750 
2751 
2752 #ifdef _DEBUG_RCC
2753                 snprintf (buf, sizeof(buf), "Contents of clist:\n");
2754                 BufferToConsole (buf);
2755 
2756                 for (long i = 0; i < num_nodes; i++) {
2757                     for (long j = 0; j < num_nodes; j++) {
2758                         gv = (_GrowingVector *) clist->list_data[i * num_nodes + j];
2759 
2760                         snprintf (buf, sizeof(buf), "i=%d, j=%d, n=%d: ", i, j, gv->GetUsed());
2761                         BufferToConsole (buf);
2762 
2763                         for (long k = 0; k < gv->GetUsed(); k++) {
2764                             snprintf (buf, sizeof(buf), "%f ", (*gv)(k,0));
2765                             BufferToConsole (buf);
2766                         }
2767 
2768                         NLToConsole ();
2769                     }
2770                 }
2771 #endif
2772 
2773 
2774 
2775                 // compute marginal edge posteriors
2776                 for (long child = 0; child < num_nodes; child++) {
2777                     gv      = (_GrowingVector *) clist->list_data[child * num_nodes + child];
2778                     denom   = (*gv)(0, 0);  // first entry holds node marginal post pr
2779 
2780 
2781                     for (long edge, parent = 0; parent < num_nodes; parent++) {
2782                         if (parent == child) {
2783                             continue;
2784                         }
2785 
2786                         edge    = child * num_nodes + parent;
2787                         gv      = (_GrowingVector *) clist->list_data[edge];
2788 
2789 
2790                         if (gv->GetUsed() > 0) {
2791 #ifdef DEBUG_RCC
2792                             snprintf (buf, sizeof(buf), "edge %d: pp = %f, gv = ", edge, (*mcmc_output)(edge, 1));
2793                             BufferToConsole (buf);
2794 
2795                             for (long i = 0; i < gv->GetUsed(); i++) {
2796                                 snprintf (buf, sizeof(buf), "%f ", (*gv) (i,0));
2797                                 BufferToConsole (buf);
2798                             }
2799 
2800                             snprintf (buf, sizeof(buf), "sum to %f", LogSumExpo (gv));
2801                             BufferToConsole (buf);
2802 #endif
2803 
2804                             mcmc_output->Store (edge, 1, (*mcmc_output)(edge, 1) + exp (LogSumExpo(gv) - denom));
2805 
2806 #ifdef DEBUG_RCC
2807                             snprintf (buf, sizeof(buf), "= %f\n", (*mcmc_output)(edge, 1));
2808                             BufferToConsole (buf);
2809 #endif
2810                         }
2811 
2812 
2813                     }
2814                 }
2815             }
2816         }
2817 
2818 
2819         /*SLKP 20070926; include progress report updates */
2820         if (TimerDifferenceFunction(true)>1.0) { // time to update
2821             howManyTimesUpdated ++;
2822             _String statusLine = _HYBgm_STATUS_LINE_MCMC & (sample_lag < 0 ? _String(" burnin"):empty) & " " & (step+1) & "/" & nsteps
2823                                  & " steps (" & (1.0+step)/howManyTimesUpdated & "/second)";
2824 #if  defined __HEADLESS__
2825             SetStatusLine (statusLine);
2826 #else
2827 #if !defined __UNIX__ || defined __HYPHYQT__ || defined __HYPHY_GTK__
2828             SetStatusLine     (empty,statusLine,empty,100*step/(nsteps),HY_SL_TASK|HY_SL_PERCENT);
2829             yieldCPUTime(); // let the GUI handle user actions
2830             if (terminate_execution) { // user wants to cancel the analysis
2831                 break;
2832             }
2833 #endif
2834 #if defined __UNIX__ && ! defined __HEADLESS__ && !defined __HYPHYQT__ && !defined __HYPHY_GTK__
2835             ConsoleBGMStatus (statusLine, 100.*step/(nsteps), progressReportFile);
2836 #endif
2837 #endif
2838 
2839             TimerDifferenceFunction(false); // reset timer for the next second
2840         }
2841         /* SLKP */
2842 
2843     }
2844     // end loop over steps
2845 
2846 
2847     if (sample_lag >= 0) {
2848         // convert sums of edge posterior probs. in container to means
2849         for (long edge = 0; edge < num_nodes * num_nodes; edge++) {
2850             mcmc_output->Store (edge, 1, (*mcmc_output)(edge,1) / mcmc_samples);
2851         }
2852     }
2853 
2854 
2855     // export node ordering info
2856     for (long node = 0; node < num_nodes; node++) {
2857         mcmc_output->Store (node, 2, (hyFloat) (best_node_order.list_data[node]));
2858         mcmc_output->Store (node, 3, (hyFloat) (current_order->list_data[node]));
2859     }
2860 
2861     last_node_order = (_SimpleList &) (*current_order);
2862 
2863 
2864 
2865     // release cached scores (assuming that this is the last analysis to be done!)
2866     DumpComputeLists (clist);
2867 
2868     /*SLKP 20070926; include progress report updates */
2869 #if !defined __UNIX__ || defined __HEADLESS__ || defined __HYPHYQT__ || defined __HYPHY_GTK__
2870     SetStatusLine     (_HYBgm_STATUS_LINE_MCMC_DONE & (sample_lag < 0 ? _String(" burnin"):empty));
2871 #endif
2872 #if defined __UNIX__ && ! defined __HEADLESS__ && !defined __HYPHYQT__ && !defined __HYPHY_GTK__
2873     ConsoleBGMStatus (_HYBgm_STATUS_LINE_MCMC_DONE & (sample_lag < 0 ? _String(" burnin"):empty), -1.0, progressReportFile);
2874 #endif
2875     /* SLKP */
2876 
2877     return mcmc_output;
2878 }
2879 
2880 
2881 
2882 //___________________________________________________________________________________________
2883 #ifdef __NEVER_DEFINED__
RunHotChain(_SimpleList * current_order,long nsteps,long sample_lag,hyFloat temper)2884 void    Bgm::RunHotChain (_SimpleList * current_order, long nsteps, long sample_lag, hyFloat temper)
2885 {
2886     /*  Execute Metropolis sampler using a swap of two nodes in an ordered sequence
2887      as a proposal function.  The posterior probabilities of edges in the network are
2888      stored as a member matrix [edge_posteriors].  Note that the total number of
2889      possible orderings (i.e. permutations of a sequence of length N) is factorial(N),
2890      and can possibly be computed exactly for N < 8.                                        */
2891 
2892     _List   *   clist;
2893 
2894     hyFloat  prob_current_order = Compute (current_order, clist),
2895                 prob_proposed_order = 0.;
2896 
2897     static long step_counter = 0;
2898 
2899     long        sample_interval = (long int) ((nsteps - mcmc_burnin) / mcmc_samples);
2900 
2901     _SimpleList proposed_order;
2902 
2903     bool        accept_step = FALSE;
2904 
2905     /* SLKP 20070926
2906      Add user feedback via the console window status bar
2907      */
2908 #if !defined __UNIX__ || defined __HEADLESS__
2909     long howManyTimesUpdated = 0; // how many times has the line been updated; is the same as the # of seconds
2910     TimerDifferenceFunction(false); // save initial timer; will only update every 1 second
2911     SetStatusLine     (empty,_HYBgm_STATUS_LINE_MCMC,empty,0,HY_SL_TASK|HY_SL_PERCENT);
2912 #endif
2913     /* SLKP */
2914 
2915 
2916     for (long step = 0; step < mcmc_steps; step++) {
2917         // copy contents of current order to proposed ordering for permutation
2918         proposed_order.Clear();
2919         proposed_order.Duplicate (current_order);
2920 
2921 
2922         // swap random nodes in ordered sequence
2923         long    first_node  = genrand_int32 () % proposed_order.lLength,
2924                 second_node = genrand_int32 () % proposed_order.lLength;
2925 
2926         while (first_node == second_node) { // keep sampling until pick different node
2927             second_node = genrand_int32 () % proposed_order.lLength;
2928         }
2929 
2930         proposed_order.Swap(first_node, second_node);
2931 
2932 
2933         // calculate probability of proposed order --- edge posterior probs loaded into member variable
2934         prob_proposed_order = Compute (&proposed_order, FALSE);
2935 
2936 
2937         hyFloat  lk_ratio    = exp(prob_proposed_order - prob_current_order);
2938 
2939         accept_step = FALSE;
2940 
2941         if (lk_ratio > 1.)  {
2942             accept_step = TRUE;    // accept greater likelihood
2943         } else if (genrand_real2() < lk_ratio) {
2944             accept_step = TRUE;
2945         }
2946 
2947 
2948         if (accept_step) {  // then set current to proposed order
2949             (*current_order).Clear();
2950             (*current_order) << proposed_order;
2951             prob_current_order = prob_proposed_order;
2952         }
2953 
2954         // write Markov chain state to output matrix
2955         if (step % sample_interval == 0 && step >= mcmc_burnin) {
2956             long    row = (long int) ((step-mcmc_burnin) / sample_interval);
2957 
2958             mcmc_chain->Store (row, 0, prob_current_order);
2959 
2960             for (long edge = 0; edge < num_nodes*num_nodes; edge++) {
2961                 long    child   = edge % num_nodes,
2962                         parent    = edge / num_nodes;
2963 
2964                 mcmc_chain->Store (edge, 1, (*mcmc_chain)(edge, 1) + edge_posteriors(child, parent));
2965             }
2966         }
2967 
2968         /*SLKP 20070926; include progress report updates */
2969 #if !defined __UNIX__ || defined __HEADLESS__
2970         if (TimerDifferenceFunction(true)>1.0) { // time to update
2971             howManyTimesUpdated ++;
2972             _String statusLine = _HYBgm_STATUS_LINE_MCMC & " " & (step+1) & "/" & mcmc_steps
2973                                  & " steps (" & (1.0+step)/howManyTimesUpdated & "/second)";
2974             SetStatusLine     (empty,statusLine,empty,100*step/(mcmc_steps),HY_SL_TASK|HY_SL_PERCENT);
2975             TimerDifferenceFunction(false); // reset timer for the next second
2976             yieldCPUTime(); // let the GUI handle user actions
2977             if (terminate_execution) { // user wants to cancel the analysis
2978                 break;
2979             }
2980         }
2981 #endif
2982         /* SLKP */
2983     }
2984     // end loop over steps
2985 
2986 
2987     for (long edge = 0; edge < num_nodes * num_nodes; edge++) {
2988         // convert sums of edge posterior probs. in container to means
2989         mcmc_chain->Store (edge, 1, (*mcmc_chain)(edge,1) / mcmc_samples);
2990     }
2991 
2992 
2993     // release cached scores (assuming that this is the last analysis to be done!)
2994     DumpComputeLists ();
2995 
2996     /*SLKP 20070926; include progress report updates */
2997 #if !defined __UNIX__ || defined __HEADLESS__
2998     SetStatusLine     (_HYBgm_STATUS_LINE_MCMC_DONE);
2999 #endif
3000     /* SLKP */
3001 
3002 }
3003 
3004 #endif
3005 
3006 
3007 
3008 //___________________________________________________________________________________________
3009 #ifdef __AFYP_DEVELOPMENT2__
CacheNetworkParameters(void)3010 void        Bgm::CacheNetworkParameters (void)
3011 {
3012     _SimpleList     multipliers,    // for indexing parent combinations (j)
3013                     parents;
3014 
3015     long            num_parent_combos,
3016                     r_i;
3017 
3018     _Matrix         n_ijk,
3019                     theta_ijk;
3020 
3021     network_parameters.Clear();     // reset cache
3022 
3023 
3024     for (long i = 0; i < num_nodes; i++) {
3025         // reset variables to defaults
3026         multipliers.Clear();
3027         multipliers << 1;
3028 
3029         n_ijk.Clear();
3030         theta_ijk.Clear();
3031 
3032         num_parent_combos   = 1;
3033         r_i                 = num_levels.list_data[i];
3034 
3035 
3036         // assemble parents based on current graph
3037         for (long par = 0; par < num_nodes; par++) {
3038             if (dag(par, i) == 1 && is_discrete.list_data[par]) {
3039                 parents << par; // list of parents will be in ascending numerical order
3040                 num_parent_combos *= num_levels.list_data[par];
3041                 multipliers << num_parent_combos;
3042             }
3043         }
3044 
3045 
3046         // re-allocate space in matrix
3047         CreateMatrix (&n_ijk, num_parent_combos, r_i+1, false, true, false);
3048         CreateMatrix (&theta_ijk, num_parent_combos, r_i, false, true, false);
3049 
3050 
3051         // tally observations
3052         for (long obs = 0; obs < obsData->GetHDim(); obs++) {
3053             long    index           = 0,
3054                     child_state     = (*obsData)(obs, i);
3055 
3056             // is there a faster algorithm for this? - afyp
3057             for (long par = 0; par < parents.lLength; par++) {
3058                 long    this_parent         = parents.list_data[par],
3059                         this_parent_state = (*obsData)(obs, this_parent);
3060 
3061                 index += this_parent_state * multipliers.list_data[par];
3062             }
3063 
3064             n_ijk.Store ((long) index, child_state, n_ijk(index, child_state) + 1);
3065             n_ijk.Store ((long) index, r_i, n_ijk(index, r_i) + 1); // store n_ij sum in last entry
3066         }
3067 
3068 
3069         // compute expected network conditional probabilities (Cooper and Herskovits 1992 eq.16)
3070         for (long j = 0; j < num_parent_combos; j++) {
3071             hyFloat  denom = n_ijk(j,r_i+1) + r_i;
3072 
3073             for (long k = 0; k < r_i; k++) {
3074                 theta_ijk.Store (j, k, (n_ijk(j,k)+1.) / denom);
3075             }
3076         }
3077 
3078 
3079         // append parameters to member list
3080         network_parameters && (&theta_ijk);
3081     }
3082 }
3083 #endif
3084 
3085 
3086 //___________________________________________________________________________________________
3087 #ifdef __AFYP_DEVELOPMENT2__
SimulateDiscreteCases(long ncases)3088 _Matrix *   Bgm::SimulateDiscreteCases (long ncases)
3089 {
3090     /*  Identify nodes without parents in given graph.
3091         Draw random state assignments based on learned parameters (orphan score).
3092         Use graph matrix transpose to index child nodes.
3093         Visit children and randomly assign states dependent on parent assignment.
3094         Rinse and repeat.
3095     */
3096     CacheNetworkParameters();
3097 
3098     _Matrix     gad     = dag.Transpose();
3099     _Matrix *   cases   = new _Matrix (num_nodes, ncases, false, true);
3100 
3101     // proceed using postorder traversal (check for parents before resolving node)
3102 
3103 }
3104 #endif
3105 
3106 
3107 
3108 //___________________________________________________________________________________________
3109 #ifdef __AFYP_DEVELOPMENT2__
PostOrderInstantiate(long node_id,_SimpleList & results)3110 void        Bgm::PostOrderInstantiate (long node_id, _SimpleList & results)
3111 {
3112     _SimpleList     parents;
3113 
3114     // assign parents first
3115     for (long par = 0; par < num_nodes; par++) {
3116         if (par != node_id && dag (par, node_id) == 1) {
3117             PostOrderInstantiate (par, results);
3118             parents << par;
3119         }
3120     }
3121 
3122     // assign random state given parents
3123     for (long pindex = 0; pindex < parents.lLength; pindex++) {
3124         long    par = parents.list_data[pindex];
3125 
3126     }
3127 }
3128 
3129 
PredictNode(long node_id,_Matrix * assignments)3130 long        Bgm::PredictNode (long node_id, _Matrix * assignments)
3131 {
3132 
3133 }
3134 #endif
3135 
3136 
3137 //___________________________________________________________________________________________
3138 #if defined __HYPHYMPI__
SerializeBgm(_String & rec)3139 void    Bgm::SerializeBgm (_String & rec)
3140 {
3141     char        buf [255];
3142     _String *   bgmName = (_String *) bgmNamesList (bgmList._SimpleList::Find((long)this));
3143     _String     dataStr,
3144                 dataName ("kBGMData"),
3145                 banStr,
3146                 banName ("ban_matrix");
3147 
3148     hyFloat  mcem_max_steps, mcem_burnin, mcem_sample_size;
3149 
3150     checkParameter (mcemMaxSteps, mcem_max_steps, 10000);
3151     checkParameter (mcemBurnin, mcem_burnin, 1000);
3152     checkParameter (mcemSampleSize, mcem_sample_size, 100);
3153 
3154     rec << "PRINT_DIGITS=-1;\n";
3155     rec << "USE_MPI_CACHING=1;\n";
3156 
3157     // write utility functions
3158     rec << "function make_dnode (id,n,maxp)\n";
3159     rec << "{\ndnode={};\n";
3160     rec << "dnode[\"NodeID\"]=id;\n";
3161     rec << "dnode[\"PriorSize\"]=n;\n";
3162     rec << "dnode[\"MaxParents\"]=maxp;\n";
3163     rec << "return dnode;\n}\n";
3164 
3165     rec << "function make_cnode (id,n,maxp,mean,prec)\n";
3166     rec << "{\ncnode={};\n";
3167     rec << "cnode[\"NodeID\"]=id;\n";
3168     rec << "cnode[\"PriorSize\"]=n;\n";
3169     rec << "cnode[\"MaxParents\"]=maxp;\n";
3170     rec << "cnode[\"PriorMean\"]=mean;\n";
3171     rec << "cnode[\"PriorVar\"]=prec;\n";
3172     rec << "return cnode;\n}\n";
3173 
3174 
3175     // prepare assortative lists
3176     rec << "dnodes={};\ncnodes={};\n";
3177 
3178     for (long node_id = 0; node_id < num_nodes; node_id++) {
3179         if (is_discrete.list_data[node_id]) {
3180             rec << "dnodes[Abs(dnodes)]=make_dnode(";
3181             snprintf (buf, sizeof(buf), "%ld,%ld,%ld", node_id, (long)prior_sample_size(node_id,0), (long)max_parents.list_data[node_id]);
3182             rec << buf;
3183             rec << ");\n";
3184         } else {
3185             rec << "cnodes[Abs(cnodes)]=make_cnode(";
3186             snprintf (buf, sizeof(buf), "%ld,%ld,%ld,%f,%f", node_id, (long)prior_sample_size(node_id,0), (long)max_parents.list_data[node_id],
3187                      prior_mean(node_id,0), prior_precision(node_id,0));
3188             rec << buf;
3189             rec << ");\n";
3190         }
3191     }
3192 
3193     // write BGM constructor
3194     rec << "BGM ";
3195     rec << bgmName;
3196     rec << "=(dnodes,cnodes);\n";
3197 
3198     // missing data imputation settings
3199     snprintf (buf, sizeof(buf), "BGM_MCEM_MAXSTEPS = %d;\n", (long)mcem_max_steps);
3200     rec << buf;
3201     snprintf (buf, sizeof(buf), "BGM_MCEM_BURNIN = %d;\n", (long)mcem_burnin);
3202     rec << buf;
3203     snprintf (buf, sizeof(buf), "BGM_MCEM_SAMPLES = %d;\n", (long)mcem_sample_size);
3204     rec << buf;
3205 
3206     // ban matrix command has to come BEFORE setting data matrix (which calls CacheNodeScores)
3207     rec << "ban_matrix=";
3208     rec << (_String *)banned_edges.toStr();
3209     rec << ";\n";
3210     rec << "SetParameter(";
3211     rec << bgmName;
3212     rec << ",BGM_BAN_MATRIX,ban_matrix);\n";
3213 
3214     // serialize data matrix and assign to BGM
3215     rec << "kBGMData=";
3216     rec << (_String *)obsData->toStr();
3217     rec << ";\n";
3218     rec << "SetParameter(";
3219     rec << bgmName;
3220     rec << ",BGM_DATA_MATRIX,kBGMData);\n";
3221 
3222 }
3223 #endif
3224 
3225 //___________________________________________________________________________________________
3226 //___________________________________________________________________________________________
3227 //___________________________________________________________________________________________
3228 //___________________________________________________________________________________________
3229 //___________________________________________________________________________________________
3230 
3231 
3232 #endif
3233