1 /*
2  HyPhy - Hypothesis Testing Using Phylogenies.
3 
4  Copyright (C) 1997-now
5  Core Developers:
6  Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
7  Art FY Poon    (apoon42@uwo.ca)
8  Steven Weaver (sweaver@temple.edu)
9 
10  Module Developers:
11  Lance Hepler (nlhepler@gmail.com)
12  Martin Smith (martin.audacis@gmail.com)
13 
14  Significant contributions from:
15  Spencer V Muse (muse@stat.ncsu.edu)
16  Simon DW Frost (sdf22@cam.ac.uk)
17 
18  Permission is hereby granted, free of charge, to any person obtaining a
19  copy of this software and associated documentation files (the
20  "Software"), to deal in the Software without restriction, including
21  without limitation the rights to use, copy, modify, merge, publish,
22  distribute, sublicense, and/or sell copies of the Software, and to
23  permit persons to whom the Software is furnished to do so, subject to
24  the following conditions:
25 
26  The above copyright notice and this permission notice shall be included
27  in all copies or substantial portions of the Software.
28 
29  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
30  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
31  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
32  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
33  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
34  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
35  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
36  */
37 
38 #include "hbl_env.h"
39 
40 /**
41     HBL environment variables and constants go here,
42     Getter/setter functions are also here
43 */
44 
45 
46 namespace hy_env {
47 
48     _List           _hy_env_default_values_aux;
49 
50     _AVLListXL      _hy_env_default_values (&_hy_env_default_values_aux);
51 
52     /*********************************************************************************/
EnvVariableGetDefault(_String const & name,unsigned long type)53     HBLObjectRef       EnvVariableGetDefault (_String const& name, unsigned long type) {
54         HBLObjectRef default_value = (HBLObjectRef)_hy_env_default_values.GetDataByKey (&name);
55         if (default_value) {
56             if (type == HY_ANY_OBJECT || (type | default_value->ObjectClass())) {
57                 return default_value;
58             }
59         }
60         return nil;
61     }
62 
63     /*********************************************************************************/
EnvVariableGetDefaultNumber(_String const & name)64     hyFloat       EnvVariableGetDefaultNumber (_String const& name) {
65         HBLObjectRef default_value = (HBLObjectRef)EnvVariableGetDefault (name, NUMBER);
66         if (default_value) {
67             return default_value->Value();
68         }
69         return HY_INVALID_RETURN_VALUE;
70     }
71 
72     /*********************************************************************************/
EnvVariableTrue(_String const & name)73     bool       EnvVariableTrue (_String const& name) {
74         HBLObjectRef value = (HBLObjectRef)EnvVariableGet (name, NUMBER);
75         if (value) {
76             return fabs (value->Value()) > 1e-10;
77         }
78         return false;
79     }
80 
81     /*********************************************************************************/
EnvVariableGet(_String const & name,unsigned long type)82     HBLObjectRef       EnvVariableGet (_String const& name, unsigned long type) {
83         HBLObjectRef current_value = (HBLObjectRef)FetchObjectFromVariableByType(&name, type);
84         if (current_value) {
85             if (type == HY_BL_ANY || (type | current_value->ObjectClass())) {
86                 return current_value;
87             }
88         }
89         return EnvVariableGetDefault (name, type);
90     }
91 
92     /*********************************************************************************/
EnvVariableGetNumber(_String const & name,hyFloat default_value)93     hyFloat       EnvVariableGetNumber (_String const& name, hyFloat default_value) {
94       HBLObjectRef current_value = EnvVariableGet (name, NUMBER);
95       return current_value ? current_value -> Value() : default_value;
96     }
97 
98   /*********************************************************************************/
EnvVariableSet(_String const & name,HBLObjectRef value,bool copy)99     void       EnvVariableSet (_String const& name, HBLObjectRef value, bool copy) {
100         EnvVariableSetNamespace (name, value, nil, copy);
101     }
102 
103     /*********************************************************************************/
EnvVariableSetNamespace(_String const & name,HBLObjectRef value,_String * nmspace,bool copy)104     void       EnvVariableSetNamespace (_String const& name, HBLObjectRef value, _String* nmspace, bool copy) {
105         setParameter(name, value, nmspace, copy);
106     }
107     /**
108         arrange variables alphabetically by C++ ID for easier scanning;
109         defaut  values are stored can be obtained by calling
110             EnvVariableGetDefault
111 
112      */
113 
114 
115     /*********************************************************************************/
116 
SetupEnvDefaults(void)117     void        SetupEnvDefaults (void) {
118         _hy_env_default_values.PushPairCopyKey (use_traversal_heuristic,  new HY_CONSTANT_TRUE)
119                               .PushPairCopyKey (normalize_sequence_names, new HY_CONSTANT_TRUE)
120                               .PushPairCopyKey(message_logging, new HY_CONSTANT_TRUE)
121                               .PushPairCopyKey (dataset_save_memory_size, new _Constant (100000.))
122                               .PushPairCopyKey (harvest_frequencies_gap_options, new HY_CONSTANT_TRUE)
123                               .PushPairCopyKey(assertion_behavior, new HY_CONSTANT_FALSE)
124                               .PushPairCopyKey (print_float_digits, new _Constant (0.))
125                               .PushPairCopyKey (mpi_node_count, new _Constant (1.))
126                               .PushPairCopyKey (always_reload_libraries, new HY_CONSTANT_FALSE)
127                               .PushPairCopyKey (end_of_file, new HY_CONSTANT_FALSE)
128                               .PushPairCopyKey (produce_markdown_output, new HY_CONSTANT_FALSE)
129                               .PushPairCopyKey (integration_maximum_iterations, new _Constant (10.))
130                               .PushPairCopyKey (integration_precision_factor, new _Constant (1.e-10))
131                               .PushPairCopyKey (skip_omissions, new _Constant (HY_CONSTANT_FALSE))
132                               .PushPairCopyKey (data_file_print_format, new _Constant (6.0))
133                               .PushPairCopyKey (data_file_default_width, new _Constant (50.0))
134                               .PushPairCopyKey (data_file_gap_width, new _Constant (10.0))
135                               .PushPairCopyKey (accept_branch_lengths, new _Constant (HY_CONSTANT_TRUE))
136       ;
137     }
138 
139 _String cli_env_settings;
140 
141 _String const
142     accept_branch_lengths                            ("ACCEPT_BRANCH_LENGTHS"),
143         // if true (default), then branch lengths from Newick strings will be accepted (whenever possible)
144     accept_rooted_trees                             ("ACCEPT_ROOTED_TREES"),
145         // if TRUE, do not perform automatic unrooting for topology/tree constructors
146     always_reload_libraries                         ("ALWAYS_RELOAD_FUNCTION_LIBRARIES"),
147         // if TRUE, reparse and re-execute source code for each call to LoadFunctionLibrary,
148         // otherwise load function libraries only once
149     assertion_behavior                              ("ASSERTION_BEHAVIOR"),
150         // if set to TRUE, then assertions that fail skip to the end of the current script
151         // otherwise they terminate the program
152     assume_reversible                               ("ASSUME_REVERSIBLE_MODELS"),
153         // 0 : check reversibility at run-time
154         // 1 : ASSUME reversibility
155         // -1 : ASSUME NON-reversibility
156 
157     automatically_convert_branch_lengths            ("AUTOMATICALLY_CONVERT_BRANCH_LENGTHS"),
158         // if TRUE, then HyPhy will attempt to solve BL (t) = C for model parameter t, whenever possible
159     base_directory                                  ("HYPHY_BASE_DIRECTORY"),
160         // is set to the base directory for local path names; can be set via a CL argument (BASEPATH)
161     blockwise_matrix                                ("BLOCK_LIKELIHOOD"),
162         // this _template_ variable is used to define likelihood function evaluator templates
163     branch_length_stencil                           ("BRANCH_LENGTH_STENCIL"),
164     covariance_parameter                            ("COVARIANCE_PARAMETER"),
165         // used to control the behavior of CovarianceMatrix
166     data_file_default_width                         ("DATA_FILE_DEFAULT_WIDTH"),
167       // for file formats with grouped alignment columns (e.g. PHYLIP), determines the width of a column
168     data_file_gap_width                             ("DATA_FILE_GAP_WIDTH"),
169       // for file formats with grouped alignment columns (e.g. PHYLIP), determines the width of the gap between
170       // column groups
171     data_file_partition_matrix                      ("DATA_FILE_PARTITION_MATRIX"),
172         // the string of data partitions read from the last valid NEXUS CHARSET block
173     data_file_print_format                          ("DATA_FILE_PRINT_FORMAT"),
174       // determines the file format for datasets and datafilters
175 
176     data_file_tree                                  ("IS_TREE_PRESENT_IN_DATA"),
177         // set to TRUE if the last data load call yielded a Newick trees
178     data_file_tree_string                           ("DATAFILE_TREE"),
179         // the last tree loaded by via a sequence file read
180     dataset_save_memory_size                        ("USE_MEMORY_SAVING_DATA_STRUCTURES"),
181         // sets the maximum dimension of a data filter for generating .site_map, .site_freqs, .sequence_map
182     directory_separator_char                        ("DIRECTORY_SEPARATOR"),
183         // is set to the platform directory separator (e.g. '/')
184     defer_constrain_assignment                        ("DEFER_CONSTRAINT_APPLICATION"),
185         // if set to TRUE, then constraint application will be done in a single batch
186         // this is helpful when many x := expr statements are strung together to avoid
187         // checking the entire namespace for dependancies
188     end_of_file                                     ("END_OF_FILE"),
189         // set by IO operations, like fscanf to indicate whetehr the end of the input stream has
190         // been reached
191     file_created                                   ("FILE_CREATED"),
192     // set by IO operations, like fscanf to indicate whetehr the end of the input stream has
193     // been reached
194     error_report_format_expression                  ("ERROR_REPORT_FORMAT_EXPRESSION"),
195         // if provided, this expression (assumed string valued), will be used to format the error
196         // message, with special placeholder variables (see below) will be replaced with the
197         // error related data
198     error_report_format_expression_stack            ("_ERROR_CALL_STACK_"),
199         // the current HBL call stack, formatted as a list
200     error_report_format_expression_stdin           ("_ERROR_CALL_STDIN_"),
201         // the current HBL standard input buffer, formatted as a list
202     error_report_format_expression_string           ("_ERROR_TEXT_"),
203     // the text message explaining the error
204     execution_mode                                  ("HBL_EXECUTION_ERROR_HANDLING"),
205     // sets HyPhy exception handling
206     // FALSE - bail out on errors
207     // TRUE  - return from the current execution list, but keep the program running (e.g. to allow for HBL testing of error handling)
208 
209     get_data_info_returns_only_the_index            ("GET_DATA_INFO_RETURNS_ONLY_THE_INDEX"),
210     // instead of returing {0,0,1,0} for a 'G' character in GetDataInfo (r, filter, species, pattern)
211     // return only the index of 'G', e.g. 2 in this case. -1 is returned for ambigs
212     false_const                                     ("FALSE"),
213         // the FALSE (0.0) constant
214     fprintf_redirect                                ("GLOBAL_FPRINTF_REDIRECT"),
215         // if set to a string path, then all stdout will go to the file at the path  (or /dev/null) instead of to stdout
216 
217     harvest_frequencies_gap_options                 ("COUNT_GAPS_IN_FREQUENCIES"),
218     /*
219         if set to `harvest_frequencies_gap_options` to TRUE, then N-fold ambigs will add 1/N to each character count in HarvestFrequencies,
220         otherwise N-folds are ignored in counting
221      */
222 
223     include_model_spec                              ("INCLUDE_MODEL_SPECS"),
224     /*
225         controls whether or not export / serialization operations (like toStr)
226         will include substitution model specifications
227      */
228 
229     integration_precision_factor                    ("INTEGRATION_PRECISION_FACTOR"),
230     integration_maximum_iterations                  ("INTEGRATION_MAX_ITERATES"),
231     // used to control integration in _Formula::Integrate
232 
233     kExpectedNumberOfSubstitutions                  ("EXPECTED_NUMBER_OF_SUBSTITUTIONS"),
234         // literal for the expected number of substitions (per unit time)
235     kGetStringFromUser                              ("PROMPT_FOR_STRING"),
236         // [LEGACY] placeholder for prompting the user for a string value
237     kSCFGCorpus                                     ("SCFG_STRING_CORPUS"),
238         // set SCFG training corpus
239     kStringSuppliedLengths                          ("STRING_SUPPLIED_LENGTHS"),
240         // literal for branch lengths from the Newick tree string
241     kDevNull                                        ("/dev/null"),
242         // literal for branch lengths from the Newick tree string
243     last_file_path                                  ("LAST_FILE_PATH"),
244         // is set by various file read/write commands (fscanf, fprintf, dialog prompts)
245         // to contain the **absolute** path to the last file interacted with
246     last_fileio_exception                           ("LAST_FILE_IO_EXCEPTION"),
247         // set to the value of the last exception if soft_fileio_exceptions is true,
248 
249     last_raw_file_prompt                            ("LAST_RAW_FILE_PROMPT"),
250            // the last unprocessed value obtained by PROMPT_FOR_FILE
251     last_model_parameter_list                       ("LAST_MODEL_PARAMETER_LIST"),
252         // a stand-in for the list of model parameters for the last
253         // declared model
254 
255     lf_convergence_criterion                        ("LF_CONVERGENCE_CRITERION"),
256         // if set to a string, provides a callback function ID to LF optimization routines,
257         // expected to take two arguments: current log L and a dict with current param values
258         // returns T/F for convergence criterion check
259 
260     lib_directory                                   ("HYPHY_LIB_DIRECTORY"),
261         // is set to the library directory for standard library searchers; can be set via a CL argument (LIBPATH)
262     matrix_element_column                           ("_MATRIX_ELEMENT_COLUMN_"),
263     matrix_element_row                              ("_MATRIX_ELEMENT_ROW_"),
264     matrix_element_value                            ("_MATRIX_ELEMENT_VALUE_"),
265         // the last three variables are used as _template_ variable for conditional / iterated matrix operations, e.g.,
266         // matrix ["_MATRIX_ELEMENT_ROW_+_MATRIX_ELEMENT_COLUMN_"]
267     message_logging                                 ("MESSAGE_LOGGING"),
268         // if set, then diagnostic messages will be logged
269     mpi_node_id                                     ("MPI_NODE_ID"),
270         // [MPI only] the ID (0 = master, etc) for teh current process
271     mpi_node_count                                  ("MPI_NODE_COUNT"),
272         // [MPI only] the count of MPI nodes (master + slaves)
273     mpi_last_sent_message                           ("MPI_LAST_SENT_MSG"),
274         // [MPI only] the contents of the last message sent by the current node
275     nexus_file_tree_matrix                          ("NEXUS_FILE_TREE_MATRIX"),
276         // the tree matrix read from the last valid NEXUS TREE block
277     normalize_sequence_names                        ("NORMALIZE_SEQUENCE_NAMES"),
278         // if set, will trigger automatic renaming of sequence names from files to valid
279         // HyPhy IDs, e.g. "awesome monkey!" -> "awesome_monkey_"
280         // the mapping will go into dataset_id.mapping
281     path_to_current_bf                              ("PATH_TO_CURRENT_BF"),
282         // is set to the absolute path for the currently executed batch file (assuming it has one)
283     print_float_digits                              ("PRINT_DIGITS"),
284         // controls how many decimal places are generated by fprintf and various number->string conversions
285     produce_markdown_output                         ("MARKDOWN_OUTPUT"),
286         // controls if certain stdout output is formatted as MarkDown (default is not)
287     random_seed                                     ("RANDOM_SEED"),
288         // the seed used for the Mersenne Twister random number generator
289     selection_strings                               ("SELECTION_STRINGS"),
290         // populated by a successful call to 'ChoiceList', @see ExecuteCase32
291     sitewise_matrix                                 ("SITE_LIKELIHOOD"),
292         // this _template_ variable is used to define likelihood function evaluator templates
293     short_mpi_return                                ("SHORT_MPI_RETURN"),
294         // controls the return format of optimized functions from MPI slave nodes
295     skip_omissions                                  ("SKIP_OMISSIONS"),
296         // if set, will cause data filters to _EXCLUDE_ sites with gaps or other N-fold redundancies
297     soft_fileio_exceptions                          ("SOFT_FILE_IO_EXCEPTIONS"),
298         // if set, read/write errors from fscanf and fprintf will not cause a program termination
299         // but rather set the last_fileio_exception variable to the value of the exception
300     status_bar_update_string                        ("STATUS_BAR_STATUS_STRING"),
301         // used to set the progress message displayed to the user
302     tolerate_numerical_errors                       ("TOLERATE_NUMERICAL_ERRORS"),
303         // if set, numerical errors that would cause termination are instead trated as warnings
304     try_numeric_sequence_match                      ("TRY_NUMERIC_SEQUENCE_MATCH"),
305         // try matching sequences by 0 (or 1) based index, if matching by name fails
306     true_const                                      ("TRUE"),
307         // the TRUE (1.0) constant
308     use_last_model                                  ("USE_LAST_MODEL"),
309         // a stand-in for the last declared model
310     use_traversal_heuristic                         ("USE_TRAVERSAL_HEURISTIC"),
311         // TODO (20170413): don't remember what this does; , see @ _DataSetFilter::MatchStartNEnd
312         // #DEPRECATE
313     verbosity_level_string                           ("VERBOSITY_LEVEL")
314         // controls verbosity level during optimization and other long-running operations
315     ;
316 
317 
318 /** default values get set up in hy_global:: */
319 
320 
321 }
322 
323