1 /*
2 This file is part of solidity.
3
4 solidity is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
8
9 solidity is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with solidity. If not, see <http://www.gnu.org/licenses/>.
16 */
17 // SPDX-License-Identifier: GPL-3.0
18
19 #include <tools/yulPhaser/Phaser.h>
20
21 #include <tools/yulPhaser/AlgorithmRunner.h>
22 #include <tools/yulPhaser/Common.h>
23 #include <tools/yulPhaser/Exceptions.h>
24 #include <tools/yulPhaser/FitnessMetrics.h>
25 #include <tools/yulPhaser/GeneticAlgorithms.h>
26 #include <tools/yulPhaser/Program.h>
27 #include <tools/yulPhaser/SimulationRNG.h>
28
29 #include <liblangutil/CharStream.h>
30 #include <liblangutil/CharStreamProvider.h>
31 #include <liblangutil/SourceReferenceFormatter.h>
32 #include <liblangutil/Scanner.h>
33
34 #include <libsolutil/Assertions.h>
35 #include <libsolutil/CommonData.h>
36 #include <libsolutil/CommonIO.h>
37
38 #include <iostream>
39
40 using namespace std;
41 using namespace solidity;
42 using namespace solidity::langutil;
43 using namespace solidity::util;
44 using namespace solidity::yul;
45 using namespace solidity::phaser;
46
47 namespace po = boost::program_options;
48
49 namespace
50 {
51
52 map<PhaserMode, string> const PhaserModeToStringMap =
53 {
54 {PhaserMode::RunAlgorithm, "run-algorithm"},
55 {PhaserMode::PrintOptimisedPrograms, "print-optimised-programs"},
56 {PhaserMode::PrintOptimisedASTs, "print-optimised-asts"},
57 };
58 map<string, PhaserMode> const StringToPhaserModeMap = invertMap(PhaserModeToStringMap);
59
60 map<Algorithm, string> const AlgorithmToStringMap =
61 {
62 {Algorithm::Random, "random"},
63 {Algorithm::GEWEP, "GEWEP"},
64 {Algorithm::Classic, "classic"},
65 };
66 map<string, Algorithm> const StringToAlgorithmMap = invertMap(AlgorithmToStringMap);
67
68 map<MetricChoice, string> MetricChoiceToStringMap =
69 {
70 {MetricChoice::CodeSize, "code-size"},
71 {MetricChoice::RelativeCodeSize, "relative-code-size"},
72 };
73 map<string, MetricChoice> const StringToMetricChoiceMap = invertMap(MetricChoiceToStringMap);
74
75 map<MetricAggregatorChoice, string> const MetricAggregatorChoiceToStringMap =
76 {
77 {MetricAggregatorChoice::Average, "average"},
78 {MetricAggregatorChoice::Sum, "sum"},
79 {MetricAggregatorChoice::Maximum, "maximum"},
80 {MetricAggregatorChoice::Minimum, "minimum"},
81 };
82 map<string, MetricAggregatorChoice> const StringToMetricAggregatorChoiceMap = invertMap(MetricAggregatorChoiceToStringMap);
83
84 map<CrossoverChoice, string> const CrossoverChoiceToStringMap =
85 {
86 {CrossoverChoice::SinglePoint, "single-point"},
87 {CrossoverChoice::TwoPoint, "two-point"},
88 {CrossoverChoice::Uniform, "uniform"},
89 };
90 map<string, CrossoverChoice> const StringToCrossoverChoiceMap = invertMap(CrossoverChoiceToStringMap);
91
92 }
93
operator >>(istream & _inputStream,PhaserMode & _phaserMode)94 istream& phaser::operator>>(istream& _inputStream, PhaserMode& _phaserMode) { return deserializeChoice(_inputStream, _phaserMode, StringToPhaserModeMap); }
operator <<(ostream & _outputStream,PhaserMode _phaserMode)95 ostream& phaser::operator<<(ostream& _outputStream, PhaserMode _phaserMode) { return serializeChoice(_outputStream, _phaserMode, PhaserModeToStringMap); }
operator >>(istream & _inputStream,Algorithm & _algorithm)96 istream& phaser::operator>>(istream& _inputStream, Algorithm& _algorithm) { return deserializeChoice(_inputStream, _algorithm, StringToAlgorithmMap); }
operator <<(ostream & _outputStream,Algorithm _algorithm)97 ostream& phaser::operator<<(ostream& _outputStream, Algorithm _algorithm) { return serializeChoice(_outputStream, _algorithm, AlgorithmToStringMap); }
operator >>(istream & _inputStream,MetricChoice & _metric)98 istream& phaser::operator>>(istream& _inputStream, MetricChoice& _metric) { return deserializeChoice(_inputStream, _metric, StringToMetricChoiceMap); }
operator <<(ostream & _outputStream,MetricChoice _metric)99 ostream& phaser::operator<<(ostream& _outputStream, MetricChoice _metric) { return serializeChoice(_outputStream, _metric, MetricChoiceToStringMap); }
operator >>(istream & _inputStream,MetricAggregatorChoice & _aggregator)100 istream& phaser::operator>>(istream& _inputStream, MetricAggregatorChoice& _aggregator) { return deserializeChoice(_inputStream, _aggregator, StringToMetricAggregatorChoiceMap); }
operator <<(ostream & _outputStream,MetricAggregatorChoice _aggregator)101 ostream& phaser::operator<<(ostream& _outputStream, MetricAggregatorChoice _aggregator) { return serializeChoice(_outputStream, _aggregator, MetricAggregatorChoiceToStringMap); }
operator >>(istream & _inputStream,CrossoverChoice & _crossover)102 istream& phaser::operator>>(istream& _inputStream, CrossoverChoice& _crossover) { return deserializeChoice(_inputStream, _crossover, StringToCrossoverChoiceMap); }
operator <<(ostream & _outputStream,CrossoverChoice _crossover)103 ostream& phaser::operator<<(ostream& _outputStream, CrossoverChoice _crossover) { return serializeChoice(_outputStream, _crossover, CrossoverChoiceToStringMap); }
104
fromCommandLine(po::variables_map const & _arguments)105 GeneticAlgorithmFactory::Options GeneticAlgorithmFactory::Options::fromCommandLine(po::variables_map const& _arguments)
106 {
107 return {
108 _arguments["algorithm"].as<Algorithm>(),
109 _arguments["min-chromosome-length"].as<size_t>(),
110 _arguments["max-chromosome-length"].as<size_t>(),
111 _arguments["crossover"].as<CrossoverChoice>(),
112 _arguments["uniform-crossover-swap-chance"].as<double>(),
113 _arguments.count("random-elite-pool-size") > 0 ?
114 _arguments["random-elite-pool-size"].as<double>() :
115 optional<double>{},
116 _arguments["gewep-mutation-pool-size"].as<double>(),
117 _arguments["gewep-crossover-pool-size"].as<double>(),
118 _arguments["gewep-randomisation-chance"].as<double>(),
119 _arguments["gewep-deletion-vs-addition-chance"].as<double>(),
120 _arguments.count("gewep-genes-to-randomise") > 0 ?
121 _arguments["gewep-genes-to-randomise"].as<double>() :
122 optional<double>{},
123 _arguments.count("gewep-genes-to-add-or-delete") > 0 ?
124 _arguments["gewep-genes-to-add-or-delete"].as<double>() :
125 optional<double>{},
126 _arguments["classic-elite-pool-size"].as<double>(),
127 _arguments["classic-crossover-chance"].as<double>(),
128 _arguments["classic-mutation-chance"].as<double>(),
129 _arguments["classic-deletion-chance"].as<double>(),
130 _arguments["classic-addition-chance"].as<double>(),
131 };
132 }
133
build(Options const & _options,size_t _populationSize)134 unique_ptr<GeneticAlgorithm> GeneticAlgorithmFactory::build(
135 Options const& _options,
136 size_t _populationSize
137 )
138 {
139 assert(_populationSize > 0);
140
141 switch (_options.algorithm)
142 {
143 case Algorithm::Random:
144 {
145 double elitePoolSize = 1.0 / double(_populationSize);
146
147 if (_options.randomElitePoolSize.has_value())
148 elitePoolSize = _options.randomElitePoolSize.value();
149
150 return make_unique<RandomAlgorithm>(RandomAlgorithm::Options{
151 /* elitePoolSize = */ elitePoolSize,
152 /* minChromosomeLength = */ _options.minChromosomeLength,
153 /* maxChromosomeLength = */ _options.maxChromosomeLength,
154 });
155 }
156 case Algorithm::GEWEP:
157 {
158 double percentGenesToRandomise = 1.0 / double(_options.maxChromosomeLength);
159 double percentGenesToAddOrDelete = percentGenesToRandomise;
160
161 if (_options.gewepGenesToRandomise.has_value())
162 percentGenesToRandomise = _options.gewepGenesToRandomise.value();
163 if (_options.gewepGenesToAddOrDelete.has_value())
164 percentGenesToAddOrDelete = _options.gewepGenesToAddOrDelete.value();
165
166 return make_unique<GenerationalElitistWithExclusivePools>(GenerationalElitistWithExclusivePools::Options{
167 /* mutationPoolSize = */ _options.gewepMutationPoolSize,
168 /* crossoverPoolSize = */ _options.gewepCrossoverPoolSize,
169 /* randomisationChance = */ _options.gewepRandomisationChance,
170 /* deletionVsAdditionChance = */ _options.gewepDeletionVsAdditionChance,
171 /* percentGenesToRandomise = */ percentGenesToRandomise,
172 /* percentGenesToAddOrDelete = */ percentGenesToAddOrDelete,
173 /* crossover = */ _options.crossover,
174 /* uniformCrossoverSwapChance = */ _options.uniformCrossoverSwapChance,
175 });
176 }
177 case Algorithm::Classic:
178 {
179 return make_unique<ClassicGeneticAlgorithm>(ClassicGeneticAlgorithm::Options{
180 /* elitePoolSize = */ _options.classicElitePoolSize,
181 /* crossoverChance = */ _options.classicCrossoverChance,
182 /* mutationChance = */ _options.classicMutationChance,
183 /* deletionChance = */ _options.classicDeletionChance,
184 /* additionChance = */ _options.classicAdditionChance,
185 /* crossover = */ _options.crossover,
186 /* uniformCrossoverSwapChance = */ _options.uniformCrossoverSwapChance,
187 });
188 }
189 default:
190 assertThrow(false, solidity::util::Exception, "Invalid Algorithm value.");
191 }
192 }
193
buildFromCommandLine(po::variables_map const & _arguments)194 CodeWeights CodeWeightFactory::buildFromCommandLine(po::variables_map const& _arguments)
195 {
196 return {
197 _arguments["expression-statement-cost"].as<size_t>(),
198 _arguments["assignment-cost"].as<size_t>(),
199 _arguments["variable-declaration-cost"].as<size_t>(),
200 _arguments["function-definition-cost"].as<size_t>(),
201 _arguments["if-cost"].as<size_t>(),
202 _arguments["switch-cost"].as<size_t>(),
203 _arguments["case-cost"].as<size_t>(),
204 _arguments["for-loop-cost"].as<size_t>(),
205 _arguments["break-cost"].as<size_t>(),
206 _arguments["continue-cost"].as<size_t>(),
207 _arguments["leave-cost"].as<size_t>(),
208 _arguments["block-cost"].as<size_t>(),
209 _arguments["function-call-cost"].as<size_t>(),
210 _arguments["identifier-cost"].as<size_t>(),
211 _arguments["literal-cost"].as<size_t>(),
212 };
213 }
214
fromCommandLine(po::variables_map const & _arguments)215 FitnessMetricFactory::Options FitnessMetricFactory::Options::fromCommandLine(po::variables_map const& _arguments)
216 {
217 return {
218 _arguments["metric"].as<MetricChoice>(),
219 _arguments["metric-aggregator"].as<MetricAggregatorChoice>(),
220 _arguments["relative-metric-scale"].as<size_t>(),
221 _arguments["chromosome-repetitions"].as<size_t>(),
222 };
223 }
224
build(Options const & _options,vector<Program> _programs,vector<shared_ptr<ProgramCache>> _programCaches,CodeWeights const & _weights)225 unique_ptr<FitnessMetric> FitnessMetricFactory::build(
226 Options const& _options,
227 vector<Program> _programs,
228 vector<shared_ptr<ProgramCache>> _programCaches,
229 CodeWeights const& _weights
230 )
231 {
232 assert(_programCaches.size() == _programs.size());
233 assert(_programs.size() > 0 && "Validations should prevent this from being executed with zero files.");
234
235 vector<shared_ptr<FitnessMetric>> metrics;
236 switch (_options.metric)
237 {
238 case MetricChoice::CodeSize:
239 {
240 for (size_t i = 0; i < _programs.size(); ++i)
241 metrics.push_back(make_unique<ProgramSize>(
242 _programCaches[i] != nullptr ? optional<Program>{} : move(_programs[i]),
243 move(_programCaches[i]),
244 _weights,
245 _options.chromosomeRepetitions
246 ));
247
248 break;
249 }
250 case MetricChoice::RelativeCodeSize:
251 {
252 for (size_t i = 0; i < _programs.size(); ++i)
253 metrics.push_back(make_unique<RelativeProgramSize>(
254 _programCaches[i] != nullptr ? optional<Program>{} : move(_programs[i]),
255 move(_programCaches[i]),
256 _options.relativeMetricScale,
257 _weights,
258 _options.chromosomeRepetitions
259 ));
260 break;
261 }
262 default:
263 assertThrow(false, solidity::util::Exception, "Invalid MetricChoice value.");
264 }
265
266 switch (_options.metricAggregator)
267 {
268 case MetricAggregatorChoice::Average:
269 return make_unique<FitnessMetricAverage>(move(metrics));
270 case MetricAggregatorChoice::Sum:
271 return make_unique<FitnessMetricSum>(move(metrics));
272 case MetricAggregatorChoice::Maximum:
273 return make_unique<FitnessMetricMaximum>(move(metrics));
274 case MetricAggregatorChoice::Minimum:
275 return make_unique<FitnessMetricMinimum>(move(metrics));
276 default:
277 assertThrow(false, solidity::util::Exception, "Invalid MetricAggregatorChoice value.");
278 }
279 }
280
fromCommandLine(po::variables_map const & _arguments)281 PopulationFactory::Options PopulationFactory::Options::fromCommandLine(po::variables_map const& _arguments)
282 {
283 return {
284 _arguments["min-chromosome-length"].as<size_t>(),
285 _arguments["max-chromosome-length"].as<size_t>(),
286 _arguments.count("population") > 0 ?
287 _arguments["population"].as<vector<string>>() :
288 vector<string>{},
289 _arguments.count("random-population") > 0 ?
290 _arguments["random-population"].as<vector<size_t>>() :
291 vector<size_t>{},
292 _arguments.count("population-from-file") > 0 ?
293 _arguments["population-from-file"].as<vector<string>>() :
294 vector<string>{},
295 };
296 }
297
build(Options const & _options,shared_ptr<FitnessMetric> _fitnessMetric)298 Population PopulationFactory::build(
299 Options const& _options,
300 shared_ptr<FitnessMetric> _fitnessMetric
301 )
302 {
303 Population population = buildFromStrings(_options.population, _fitnessMetric);
304
305 size_t combinedSize = 0;
306 for (size_t populationSize: _options.randomPopulation)
307 combinedSize += populationSize;
308
309 population = move(population) + buildRandom(
310 combinedSize,
311 _options.minChromosomeLength,
312 _options.maxChromosomeLength,
313 _fitnessMetric
314 );
315
316 for (string const& populationFilePath: _options.populationFromFile)
317 population = move(population) + buildFromFile(populationFilePath, _fitnessMetric);
318
319 return population;
320 }
321
buildFromStrings(vector<string> const & _geneSequences,shared_ptr<FitnessMetric> _fitnessMetric)322 Population PopulationFactory::buildFromStrings(
323 vector<string> const& _geneSequences,
324 shared_ptr<FitnessMetric> _fitnessMetric
325 )
326 {
327 vector<Chromosome> chromosomes;
328 for (string const& geneSequence: _geneSequences)
329 chromosomes.emplace_back(geneSequence);
330
331 return Population(move(_fitnessMetric), move(chromosomes));
332 }
333
buildRandom(size_t _populationSize,size_t _minChromosomeLength,size_t _maxChromosomeLength,shared_ptr<FitnessMetric> _fitnessMetric)334 Population PopulationFactory::buildRandom(
335 size_t _populationSize,
336 size_t _minChromosomeLength,
337 size_t _maxChromosomeLength,
338 shared_ptr<FitnessMetric> _fitnessMetric
339 )
340 {
341 return Population::makeRandom(
342 move(_fitnessMetric),
343 _populationSize,
344 _minChromosomeLength,
345 _maxChromosomeLength
346 );
347 }
348
buildFromFile(string const & _filePath,shared_ptr<FitnessMetric> _fitnessMetric)349 Population PopulationFactory::buildFromFile(
350 string const& _filePath,
351 shared_ptr<FitnessMetric> _fitnessMetric
352 )
353 {
354 return buildFromStrings(readLinesFromFile(_filePath), move(_fitnessMetric));
355 }
356
fromCommandLine(po::variables_map const & _arguments)357 ProgramCacheFactory::Options ProgramCacheFactory::Options::fromCommandLine(po::variables_map const& _arguments)
358 {
359 return {
360 _arguments["program-cache"].as<bool>(),
361 };
362 }
363
build(Options const & _options,vector<Program> _programs)364 vector<shared_ptr<ProgramCache>> ProgramCacheFactory::build(
365 Options const& _options,
366 vector<Program> _programs
367 )
368 {
369 vector<shared_ptr<ProgramCache>> programCaches;
370 for (Program& program: _programs)
371 programCaches.push_back(_options.programCacheEnabled ? make_shared<ProgramCache>(move(program)) : nullptr);
372
373 return programCaches;
374 }
375
fromCommandLine(po::variables_map const & _arguments)376 ProgramFactory::Options ProgramFactory::Options::fromCommandLine(po::variables_map const& _arguments)
377 {
378 return {
379 _arguments["input-files"].as<vector<string>>(),
380 _arguments["prefix"].as<string>(),
381 };
382 }
383
build(Options const & _options)384 vector<Program> ProgramFactory::build(Options const& _options)
385 {
386 vector<Program> inputPrograms;
387 for (auto& path: _options.inputFiles)
388 {
389 CharStream sourceCode = loadSource(path);
390 variant<Program, ErrorList> programOrErrors = Program::load(sourceCode);
391 if (holds_alternative<ErrorList>(programOrErrors))
392 {
393 SourceReferenceFormatter{cerr, SingletonCharStreamProvider(sourceCode), true, false}
394 .printErrorInformation(get<ErrorList>(programOrErrors));
395 cerr << endl;
396 assertThrow(false, InvalidProgram, "Failed to load program " + path);
397 }
398
399 get<Program>(programOrErrors).optimise(Chromosome(_options.prefix).optimisationSteps());
400 inputPrograms.push_back(move(get<Program>(programOrErrors)));
401 }
402
403 return inputPrograms;
404 }
405
loadSource(boost::filesystem::path const & _sourcePath)406 CharStream ProgramFactory::loadSource(boost::filesystem::path const& _sourcePath)
407 {
408 assertThrow(boost::filesystem::exists(_sourcePath), MissingFile, "Source file does not exist: " + _sourcePath.string());
409
410 string sourceCode = readFileAsString(_sourcePath);
411 return CharStream(sourceCode, _sourcePath.string());
412 }
413
main(int _argc,char ** _argv)414 void Phaser::main(int _argc, char** _argv)
415 {
416 optional<po::variables_map> arguments = parseCommandLine(_argc, _argv);
417 if (!arguments.has_value())
418 return;
419
420 initialiseRNG(arguments.value());
421
422 runPhaser(arguments.value());
423 }
424
buildCommandLineDescription()425 Phaser::CommandLineDescription Phaser::buildCommandLineDescription()
426 {
427 unsigned const lineLength = po::options_description::m_default_line_length;
428 unsigned const minDescriptionLength = lineLength - 23;
429
430 po::options_description keywordDescription(
431 "yul-phaser, a tool for finding the best sequence of Yul optimisation phases.\n"
432 "\n"
433 "Usage: yul-phaser [options] <file>\n"
434 "Reads <file> as Yul code and tries to find the best order in which to run optimisation"
435 " phases using a genetic algorithm.\n"
436 "Example:\n"
437 "yul-phaser program.yul\n"
438 "\n"
439 "Allowed options",
440 lineLength,
441 minDescriptionLength
442 );
443
444 po::options_description generalDescription("GENERAL", lineLength, minDescriptionLength);
445 generalDescription.add_options()
446 ("help", "Show help message and exit.")
447 ("input-files", po::value<vector<string>>()->required()->value_name("<PATH>"), "Input files.")
448 (
449 "prefix",
450 po::value<string>()->value_name("<CHROMOSOME>")->default_value(""),
451 "Initial optimisation steps automatically applied to every input program.\n"
452 "The result is treated as if it was the actual input, i.e. the steps are not considered "
453 "a part of the chromosomes and cannot be mutated. The values of relative metric values "
454 "are also relative to the fitness of a program with these steps applied rather than the "
455 "fitness of the original program.\n"
456 "Note that phaser always adds a 'hgo' prefix to ensure that chromosomes can "
457 "contain arbitrary optimisation steps. This implicit prefix cannot be changed or "
458 "or removed using this option. The value given here is applied after it."
459 )
460 ("seed", po::value<uint32_t>()->value_name("<NUM>"), "Seed for the random number generator.")
461 (
462 "rounds",
463 po::value<size_t>()->value_name("<NUM>"),
464 "The number of rounds after which the algorithm should stop. (default=no limit)."
465 )
466 (
467 "mode",
468 po::value<PhaserMode>()->value_name("<NAME>")->default_value(PhaserMode::RunAlgorithm),
469 (
470 "Mode of operation. The default is to run the algorithm but you can also tell phaser "
471 "to do something else with its parameters, e.g. just print the optimised programs and exit.\n"
472 "\n"
473 "AVAILABLE MODES:\n"
474 "* " + toString(PhaserMode::RunAlgorithm) + "\n" +
475 "* " + toString(PhaserMode::PrintOptimisedPrograms) + "\n" +
476 "* " + toString(PhaserMode::PrintOptimisedASTs)
477 ).c_str()
478 )
479 ;
480 keywordDescription.add(generalDescription);
481
482 po::options_description algorithmDescription("ALGORITHM", lineLength, minDescriptionLength);
483 algorithmDescription.add_options()
484 (
485 "algorithm",
486 po::value<Algorithm>()->value_name("<NAME>")->default_value(Algorithm::GEWEP),
487 (
488 "Algorithm\n"
489 "\n"
490 "AVAILABLE ALGORITHMS:\n"
491 "* " + toString(Algorithm::GEWEP) + "\n" +
492 "* " + toString(Algorithm::Classic) + "\n" +
493 "* " + toString(Algorithm::Random)
494 ).c_str()
495 )
496 (
497 "no-randomise-duplicates",
498 po::bool_switch(),
499 "By default, after each round of the algorithm duplicate chromosomes are removed from"
500 "the population and replaced with randomly generated ones. "
501 "This option disables this postprocessing."
502 )
503 (
504 "min-chromosome-length",
505 po::value<size_t>()->value_name("<NUM>")->default_value(100),
506 "Minimum length of randomly generated chromosomes."
507 )
508 (
509 "max-chromosome-length",
510 po::value<size_t>()->value_name("<NUM>")->default_value(100),
511 "Maximum length of randomly generated chromosomes."
512 )
513 (
514 "crossover",
515 po::value<CrossoverChoice>()->value_name("<NAME>")->default_value(CrossoverChoice::Uniform),
516 (
517 "Type of the crossover operator to use.\n"
518 "\n"
519 "AVAILABLE CROSSOVER OPERATORS:\n"
520 "* " + toString(CrossoverChoice::SinglePoint) + "\n" +
521 "* " + toString(CrossoverChoice::TwoPoint) + "\n" +
522 "* " + toString(CrossoverChoice::Uniform)
523 ).c_str()
524 )
525 (
526 "uniform-crossover-swap-chance",
527 po::value<double>()->value_name("<PROBABILITY>")->default_value(0.5),
528 "Chance of two genes being swapped between chromosomes in uniform crossover."
529 )
530 ;
531 keywordDescription.add(algorithmDescription);
532
533 po::options_description gewepAlgorithmDescription("GEWEP ALGORITHM", lineLength, minDescriptionLength);
534 gewepAlgorithmDescription.add_options()
535 (
536 "gewep-mutation-pool-size",
537 po::value<double>()->value_name("<FRACTION>")->default_value(0.25),
538 "Percentage of population to regenerate using mutations in each round."
539 )
540 (
541 "gewep-crossover-pool-size",
542 po::value<double>()->value_name("<FRACTION>")->default_value(0.25),
543 "Percentage of population to regenerate using crossover in each round."
544 )
545 (
546 "gewep-randomisation-chance",
547 po::value<double>()->value_name("<PROBABILITY>")->default_value(0.9),
548 "The chance of choosing gene randomisation as the mutation to perform."
549 )
550 (
551 "gewep-deletion-vs-addition-chance",
552 po::value<double>()->value_name("<PROBABILITY>")->default_value(0.5),
553 "The chance of choosing gene deletion as the mutation if randomisation was not chosen."
554 )
555 (
556 "gewep-genes-to-randomise",
557 po::value<double>()->value_name("<PROBABILITY>"),
558 "The chance of any given gene being mutated in gene randomisation. "
559 "(default=1/max-chromosome-length)"
560 )
561 (
562 "gewep-genes-to-add-or-delete",
563 po::value<double>()->value_name("<PROBABILITY>"),
564 "The chance of a gene being added (or deleted) in gene addition (or deletion). "
565 "(default=1/max-chromosome-length)"
566 )
567 ;
568 keywordDescription.add(gewepAlgorithmDescription);
569
570 po::options_description classicGeneticAlgorithmDescription("CLASSIC GENETIC ALGORITHM", lineLength, minDescriptionLength);
571 classicGeneticAlgorithmDescription.add_options()
572 (
573 "classic-elite-pool-size",
574 po::value<double>()->value_name("<FRACTION>")->default_value(0.25),
575 "Percentage of population to regenerate using mutations in each round."
576 )
577 (
578 "classic-crossover-chance",
579 po::value<double>()->value_name("<FRACTION>")->default_value(0.75),
580 "Chance of a chromosome being selected for crossover."
581 )
582 (
583 "classic-mutation-chance",
584 po::value<double>()->value_name("<FRACTION>")->default_value(0.01),
585 "Chance of a gene being mutated."
586 )
587 (
588 "classic-deletion-chance",
589 po::value<double>()->value_name("<PROBABILITY>")->default_value(0.01),
590 "Chance of a gene being deleted."
591 )
592 (
593 "classic-addition-chance",
594 po::value<double>()->value_name("<PROBABILITY>")->default_value(0.01),
595 "Chance of a random gene being added."
596 )
597 ;
598 keywordDescription.add(classicGeneticAlgorithmDescription);
599
600 po::options_description randomAlgorithmDescription("RANDOM ALGORITHM", lineLength, minDescriptionLength);
601 randomAlgorithmDescription.add_options()
602 (
603 "random-elite-pool-size",
604 po::value<double>()->value_name("<FRACTION>"),
605 "Percentage of the population preserved in each round. "
606 "(default=one individual, regardless of population size)"
607 )
608 ;
609 keywordDescription.add(randomAlgorithmDescription);
610
611 po::options_description populationDescription("POPULATION", lineLength, minDescriptionLength);
612 populationDescription.add_options()
613 (
614 "population",
615 po::value<vector<string>>()->multitoken()->value_name("<CHROMOSOMES>"),
616 "List of chromosomes to be included in the initial population. "
617 "You can specify multiple values separated with spaces or invoke the option multiple times "
618 "and all the values will be included."
619 )
620 (
621 "random-population",
622 po::value<vector<size_t>>()->value_name("<SIZE>"),
623 "The number of randomly generated chromosomes to be included in the initial population."
624 )
625 (
626 "population-from-file",
627 po::value<vector<string>>()->value_name("<FILE>"),
628 "A text file with a list of chromosomes (one per line) to be included in the initial population."
629 )
630 (
631 "population-autosave",
632 po::value<string>()->value_name("<FILE>"),
633 "If specified, the population is saved in the specified file after each round. (default=autosave disabled)"
634 )
635 ;
636 keywordDescription.add(populationDescription);
637
638 po::options_description metricsDescription("METRICS", lineLength, minDescriptionLength);
639 metricsDescription.add_options()
640 (
641 "metric",
642 po::value<MetricChoice>()->value_name("<NAME>")->default_value(MetricChoice::RelativeCodeSize),
643 (
644 "Metric used to evaluate the fitness of a chromosome.\n"
645 "\n"
646 "AVAILABLE METRICS:\n"
647 "* " + toString(MetricChoice::CodeSize) + "\n" +
648 "* " + toString(MetricChoice::RelativeCodeSize)
649 ).c_str()
650 )
651 (
652 "metric-aggregator",
653 po::value<MetricAggregatorChoice>()->value_name("<NAME>")->default_value(MetricAggregatorChoice::Average),
654 (
655 "Operator used to combine multiple fitness metric values obtained by evaluating a "
656 "chromosome separately for each input program.\n"
657 "\n"
658 "AVAILABLE METRIC AGGREGATORS:\n"
659 "* " + toString(MetricAggregatorChoice::Average) + "\n" +
660 "* " + toString(MetricAggregatorChoice::Sum) + "\n" +
661 "* " + toString(MetricAggregatorChoice::Maximum) + "\n" +
662 "* " + toString(MetricAggregatorChoice::Minimum)
663 ).c_str()
664 )
665 (
666 "relative-metric-scale",
667 po::value<size_t>()->value_name("<EXPONENT>")->default_value(3),
668 "Scaling factor for values produced by relative fitness metrics. \n"
669 "Since all metrics must produce integer values, the fractional part of the result is discarded. "
670 "To keep the numbers meaningful, a relative metric multiples its values by a scaling factor "
671 "and this option specifies the exponent of this factor. "
672 "For example with value of 3 the factor is 10^3 = 1000 and the metric will return "
673 "500 to represent 0.5, 1000 for 1.0, 2000 for 2.0 and so on. "
674 "Using a bigger factor allows discerning smaller relative differences between chromosomes "
675 "but makes the numbers less readable and may also lose precision if the numbers are very large."
676 )
677 (
678 "chromosome-repetitions",
679 po::value<size_t>()->value_name("<COUNT>")->default_value(1),
680 "Number of times to repeat the sequence optimisation steps represented by a chromosome."
681 )
682 ;
683 keywordDescription.add(metricsDescription);
684
685 po::options_description metricWeightDescription("METRIC WEIGHTS", lineLength, minDescriptionLength);
686 metricWeightDescription.add_options()
687 // TODO: We need to figure out the best set of weights for the phaser.
688 // This one is just a stopgap to make sure no statement or expression has zero cost.
689 ("expression-statement-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
690 ("assignment-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
691 ("variable-declaration-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
692 ("function-definition-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
693 ("if-cost", po::value<size_t>()->value_name("<COST>")->default_value(2))
694 ("switch-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
695 ("case-cost", po::value<size_t>()->value_name("<COST>")->default_value(2))
696 ("for-loop-cost", po::value<size_t>()->value_name("<COST>")->default_value(3))
697 ("break-cost", po::value<size_t>()->value_name("<COST>")->default_value(2))
698 ("continue-cost", po::value<size_t>()->value_name("<COST>")->default_value(2))
699 ("leave-cost", po::value<size_t>()->value_name("<COST>")->default_value(2))
700 ("block-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
701 ("function-call-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
702 ("identifier-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
703 ("literal-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
704 ;
705 keywordDescription.add(metricWeightDescription);
706
707 po::options_description cacheDescription("CACHE", lineLength, minDescriptionLength);
708 cacheDescription.add_options()
709 (
710 "program-cache",
711 po::bool_switch(),
712 "Enables caching of intermediate programs corresponding to chromosome prefixes.\n"
713 "This speeds up fitness evaluation by a lot but eats tons of memory if the chromosomes are long. "
714 "Disabled by default since there's currently no way to set an upper limit on memory usage but "
715 "highly recommended if your computer has enough RAM."
716 )
717 ;
718 keywordDescription.add(cacheDescription);
719
720 po::options_description outputDescription("OUTPUT", lineLength, minDescriptionLength);
721 outputDescription.add_options()
722 (
723 "show-initial-population",
724 po::bool_switch(),
725 "Print the state of the population before the algorithm starts."
726 )
727 (
728 "show-only-top-chromosome",
729 po::bool_switch(),
730 "Print only the best chromosome found in each round rather than the whole population."
731 )
732 (
733 "hide-round",
734 po::bool_switch(),
735 "Hide information about the current round (round number and elapsed time)."
736 )
737 (
738 "show-cache-stats",
739 po::bool_switch(),
740 "Print information about cache size and effectiveness after each round."
741 )
742 (
743 "show-seed",
744 po::bool_switch(),
745 "Print the selected random seed."
746 )
747 ;
748 keywordDescription.add(outputDescription);
749
750 po::positional_options_description positionalDescription;
751 positionalDescription.add("input-files", -1);
752
753 return {keywordDescription, positionalDescription};
754 }
755
parseCommandLine(int _argc,char ** _argv)756 optional<po::variables_map> Phaser::parseCommandLine(int _argc, char** _argv)
757 {
758 auto [keywordDescription, positionalDescription] = buildCommandLineDescription();
759
760 po::variables_map arguments;
761 po::notify(arguments);
762
763 po::command_line_parser parser(_argc, _argv);
764 parser.options(keywordDescription).positional(positionalDescription);
765 po::store(parser.run(), arguments);
766
767 if (arguments.count("help") > 0)
768 {
769 cout << keywordDescription << endl;
770 return nullopt;
771 }
772
773 if (arguments.count("input-files") == 0)
774 assertThrow(false, NoInputFiles, "Missing argument: input-files.");
775
776 return arguments;
777 }
778
initialiseRNG(po::variables_map const & _arguments)779 void Phaser::initialiseRNG(po::variables_map const& _arguments)
780 {
781 uint32_t seed;
782 if (_arguments.count("seed") > 0)
783 seed = _arguments["seed"].as<uint32_t>();
784 else
785 seed = SimulationRNG::generateSeed();
786
787 SimulationRNG::reset(seed);
788 if (_arguments["show-seed"].as<bool>())
789 cout << "Random seed: " << seed << endl;
790 }
791
buildAlgorithmRunnerOptions(po::variables_map const & _arguments)792 AlgorithmRunner::Options Phaser::buildAlgorithmRunnerOptions(po::variables_map const& _arguments)
793 {
794 return {
795 _arguments.count("rounds") > 0 ? static_cast<optional<size_t>>(_arguments["rounds"].as<size_t>()) : nullopt,
796 _arguments.count("population-autosave") > 0 ? static_cast<optional<string>>(_arguments["population-autosave"].as<string>()) : nullopt,
797 !_arguments["no-randomise-duplicates"].as<bool>(),
798 _arguments["min-chromosome-length"].as<size_t>(),
799 _arguments["max-chromosome-length"].as<size_t>(),
800 _arguments["show-initial-population"].as<bool>(),
801 _arguments["show-only-top-chromosome"].as<bool>(),
802 !_arguments["hide-round"].as<bool>(),
803 _arguments["show-cache-stats"].as<bool>(),
804 };
805 }
806
runPhaser(po::variables_map const & _arguments)807 void Phaser::runPhaser(po::variables_map const& _arguments)
808 {
809 auto programOptions = ProgramFactory::Options::fromCommandLine(_arguments);
810 auto cacheOptions = ProgramCacheFactory::Options::fromCommandLine(_arguments);
811 auto metricOptions = FitnessMetricFactory::Options::fromCommandLine(_arguments);
812 auto populationOptions = PopulationFactory::Options::fromCommandLine(_arguments);
813
814 vector<Program> programs = ProgramFactory::build(programOptions);
815 vector<shared_ptr<ProgramCache>> programCaches = ProgramCacheFactory::build(cacheOptions, programs);
816 CodeWeights codeWeights = CodeWeightFactory::buildFromCommandLine(_arguments);
817 unique_ptr<FitnessMetric> fitnessMetric = FitnessMetricFactory::build(
818 metricOptions,
819 programs,
820 programCaches,
821 codeWeights
822 );
823 Population population = PopulationFactory::build(populationOptions, move(fitnessMetric));
824
825 if (_arguments["mode"].as<PhaserMode>() == PhaserMode::RunAlgorithm)
826 runAlgorithm(_arguments, move(population), move(programCaches));
827 else
828 printOptimisedProgramsOrASTs(_arguments, population, move(programs), _arguments["mode"].as<PhaserMode>());
829 }
830
runAlgorithm(po::variables_map const & _arguments,Population _population,vector<shared_ptr<ProgramCache>> _programCaches)831 void Phaser::runAlgorithm(
832 po::variables_map const& _arguments,
833 Population _population,
834 vector<shared_ptr<ProgramCache>> _programCaches
835 )
836 {
837 auto algorithmOptions = GeneticAlgorithmFactory::Options::fromCommandLine(_arguments);
838
839 unique_ptr<GeneticAlgorithm> geneticAlgorithm = GeneticAlgorithmFactory::build(
840 algorithmOptions,
841 _population.individuals().size()
842 );
843
844 AlgorithmRunner algorithmRunner(move(_population), move(_programCaches), buildAlgorithmRunnerOptions(_arguments), cout);
845 algorithmRunner.run(*geneticAlgorithm);
846 }
847
printOptimisedProgramsOrASTs(po::variables_map const & _arguments,Population const & _population,vector<Program> _programs,PhaserMode phaserMode)848 void Phaser::printOptimisedProgramsOrASTs(
849 po::variables_map const& _arguments,
850 Population const& _population,
851 vector<Program> _programs,
852 PhaserMode phaserMode
853 )
854 {
855 assert(phaserMode == PhaserMode::PrintOptimisedPrograms || phaserMode == PhaserMode::PrintOptimisedASTs);
856 assert(_programs.size() == _arguments["input-files"].as<vector<string>>().size());
857
858 if (_population.individuals().size() == 0)
859 {
860 cout << "<EMPTY POPULATION>" << endl;
861 return;
862 }
863
864 vector<string> const& paths = _arguments["input-files"].as<vector<string>>();
865 for (auto& individual: _population.individuals())
866 {
867 cout << "Chromosome: " << individual.chromosome << endl;
868
869 for (size_t i = 0; i < _programs.size(); ++i)
870 {
871 for (size_t j = 0; j < _arguments["chromosome-repetitions"].as<size_t>(); ++j)
872 _programs[i].optimise(individual.chromosome.optimisationSteps());
873
874 cout << "Program: " << paths[i] << endl;
875 if (phaserMode == PhaserMode::PrintOptimisedPrograms)
876 cout << _programs[i] << endl;
877 else
878 cout << _programs[i].toJson() << endl;
879 }
880 }
881 }
882