1 /*
2 	This file is part of solidity.
3 
4 	solidity is free software: you can redistribute it and/or modify
5 	it under the terms of the GNU General Public License as published by
6 	the Free Software Foundation, either version 3 of the License, or
7 	(at your option) any later version.
8 
9 	solidity is distributed in the hope that it will be useful,
10 	but WITHOUT ANY WARRANTY; without even the implied warranty of
11 	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 	GNU General Public License for more details.
13 
14 	You should have received a copy of the GNU General Public License
15 	along with solidity.  If not, see <http://www.gnu.org/licenses/>.
16 */
17 // SPDX-License-Identifier: GPL-3.0
18 
19 #include <tools/yulPhaser/Phaser.h>
20 
21 #include <tools/yulPhaser/AlgorithmRunner.h>
22 #include <tools/yulPhaser/Common.h>
23 #include <tools/yulPhaser/Exceptions.h>
24 #include <tools/yulPhaser/FitnessMetrics.h>
25 #include <tools/yulPhaser/GeneticAlgorithms.h>
26 #include <tools/yulPhaser/Program.h>
27 #include <tools/yulPhaser/SimulationRNG.h>
28 
29 #include <liblangutil/CharStream.h>
30 #include <liblangutil/CharStreamProvider.h>
31 #include <liblangutil/SourceReferenceFormatter.h>
32 #include <liblangutil/Scanner.h>
33 
34 #include <libsolutil/Assertions.h>
35 #include <libsolutil/CommonData.h>
36 #include <libsolutil/CommonIO.h>
37 
38 #include <iostream>
39 
40 using namespace std;
41 using namespace solidity;
42 using namespace solidity::langutil;
43 using namespace solidity::util;
44 using namespace solidity::yul;
45 using namespace solidity::phaser;
46 
47 namespace po = boost::program_options;
48 
49 namespace
50 {
51 
52 map<PhaserMode, string> const PhaserModeToStringMap =
53 {
54 	{PhaserMode::RunAlgorithm, "run-algorithm"},
55 	{PhaserMode::PrintOptimisedPrograms, "print-optimised-programs"},
56 	{PhaserMode::PrintOptimisedASTs, "print-optimised-asts"},
57 };
58 map<string, PhaserMode> const StringToPhaserModeMap = invertMap(PhaserModeToStringMap);
59 
60 map<Algorithm, string> const AlgorithmToStringMap =
61 {
62 	{Algorithm::Random, "random"},
63 	{Algorithm::GEWEP, "GEWEP"},
64 	{Algorithm::Classic, "classic"},
65 };
66 map<string, Algorithm> const StringToAlgorithmMap = invertMap(AlgorithmToStringMap);
67 
68 map<MetricChoice, string> MetricChoiceToStringMap =
69 {
70 	{MetricChoice::CodeSize, "code-size"},
71 	{MetricChoice::RelativeCodeSize, "relative-code-size"},
72 };
73 map<string, MetricChoice> const StringToMetricChoiceMap = invertMap(MetricChoiceToStringMap);
74 
75 map<MetricAggregatorChoice, string> const MetricAggregatorChoiceToStringMap =
76 {
77 	{MetricAggregatorChoice::Average, "average"},
78 	{MetricAggregatorChoice::Sum, "sum"},
79 	{MetricAggregatorChoice::Maximum, "maximum"},
80 	{MetricAggregatorChoice::Minimum, "minimum"},
81 };
82 map<string, MetricAggregatorChoice> const StringToMetricAggregatorChoiceMap = invertMap(MetricAggregatorChoiceToStringMap);
83 
84 map<CrossoverChoice, string> const CrossoverChoiceToStringMap =
85 {
86 	{CrossoverChoice::SinglePoint, "single-point"},
87 	{CrossoverChoice::TwoPoint, "two-point"},
88 	{CrossoverChoice::Uniform, "uniform"},
89 };
90 map<string, CrossoverChoice> const StringToCrossoverChoiceMap = invertMap(CrossoverChoiceToStringMap);
91 
92 }
93 
operator >>(istream & _inputStream,PhaserMode & _phaserMode)94 istream& phaser::operator>>(istream& _inputStream, PhaserMode& _phaserMode) { return deserializeChoice(_inputStream, _phaserMode, StringToPhaserModeMap); }
operator <<(ostream & _outputStream,PhaserMode _phaserMode)95 ostream& phaser::operator<<(ostream& _outputStream, PhaserMode _phaserMode) { return serializeChoice(_outputStream, _phaserMode, PhaserModeToStringMap); }
operator >>(istream & _inputStream,Algorithm & _algorithm)96 istream& phaser::operator>>(istream& _inputStream, Algorithm& _algorithm) { return deserializeChoice(_inputStream, _algorithm, StringToAlgorithmMap); }
operator <<(ostream & _outputStream,Algorithm _algorithm)97 ostream& phaser::operator<<(ostream& _outputStream, Algorithm _algorithm) { return serializeChoice(_outputStream, _algorithm, AlgorithmToStringMap); }
operator >>(istream & _inputStream,MetricChoice & _metric)98 istream& phaser::operator>>(istream& _inputStream, MetricChoice& _metric) { return deserializeChoice(_inputStream, _metric, StringToMetricChoiceMap); }
operator <<(ostream & _outputStream,MetricChoice _metric)99 ostream& phaser::operator<<(ostream& _outputStream, MetricChoice _metric) { return serializeChoice(_outputStream, _metric, MetricChoiceToStringMap); }
operator >>(istream & _inputStream,MetricAggregatorChoice & _aggregator)100 istream& phaser::operator>>(istream& _inputStream, MetricAggregatorChoice& _aggregator) { return deserializeChoice(_inputStream, _aggregator, StringToMetricAggregatorChoiceMap); }
operator <<(ostream & _outputStream,MetricAggregatorChoice _aggregator)101 ostream& phaser::operator<<(ostream& _outputStream, MetricAggregatorChoice _aggregator) { return serializeChoice(_outputStream, _aggregator, MetricAggregatorChoiceToStringMap); }
operator >>(istream & _inputStream,CrossoverChoice & _crossover)102 istream& phaser::operator>>(istream& _inputStream, CrossoverChoice& _crossover) { return deserializeChoice(_inputStream, _crossover, StringToCrossoverChoiceMap); }
operator <<(ostream & _outputStream,CrossoverChoice _crossover)103 ostream& phaser::operator<<(ostream& _outputStream, CrossoverChoice _crossover) { return serializeChoice(_outputStream, _crossover, CrossoverChoiceToStringMap); }
104 
fromCommandLine(po::variables_map const & _arguments)105 GeneticAlgorithmFactory::Options GeneticAlgorithmFactory::Options::fromCommandLine(po::variables_map const& _arguments)
106 {
107 	return {
108 		_arguments["algorithm"].as<Algorithm>(),
109 		_arguments["min-chromosome-length"].as<size_t>(),
110 		_arguments["max-chromosome-length"].as<size_t>(),
111 		_arguments["crossover"].as<CrossoverChoice>(),
112 		_arguments["uniform-crossover-swap-chance"].as<double>(),
113 		_arguments.count("random-elite-pool-size") > 0 ?
114 			_arguments["random-elite-pool-size"].as<double>() :
115 			optional<double>{},
116 		_arguments["gewep-mutation-pool-size"].as<double>(),
117 		_arguments["gewep-crossover-pool-size"].as<double>(),
118 		_arguments["gewep-randomisation-chance"].as<double>(),
119 		_arguments["gewep-deletion-vs-addition-chance"].as<double>(),
120 		_arguments.count("gewep-genes-to-randomise") > 0 ?
121 			_arguments["gewep-genes-to-randomise"].as<double>() :
122 			optional<double>{},
123 		_arguments.count("gewep-genes-to-add-or-delete") > 0 ?
124 			_arguments["gewep-genes-to-add-or-delete"].as<double>() :
125 			optional<double>{},
126 		_arguments["classic-elite-pool-size"].as<double>(),
127 		_arguments["classic-crossover-chance"].as<double>(),
128 		_arguments["classic-mutation-chance"].as<double>(),
129 		_arguments["classic-deletion-chance"].as<double>(),
130 		_arguments["classic-addition-chance"].as<double>(),
131 	};
132 }
133 
build(Options const & _options,size_t _populationSize)134 unique_ptr<GeneticAlgorithm> GeneticAlgorithmFactory::build(
135 	Options const& _options,
136 	size_t _populationSize
137 )
138 {
139 	assert(_populationSize > 0);
140 
141 	switch (_options.algorithm)
142 	{
143 		case Algorithm::Random:
144 		{
145 			double elitePoolSize = 1.0 / double(_populationSize);
146 
147 			if (_options.randomElitePoolSize.has_value())
148 				elitePoolSize = _options.randomElitePoolSize.value();
149 
150 			return make_unique<RandomAlgorithm>(RandomAlgorithm::Options{
151 				/* elitePoolSize = */ elitePoolSize,
152 				/* minChromosomeLength = */ _options.minChromosomeLength,
153 				/* maxChromosomeLength = */ _options.maxChromosomeLength,
154 			});
155 		}
156 		case Algorithm::GEWEP:
157 		{
158 			double percentGenesToRandomise = 1.0 / double(_options.maxChromosomeLength);
159 			double percentGenesToAddOrDelete = percentGenesToRandomise;
160 
161 			if (_options.gewepGenesToRandomise.has_value())
162 				percentGenesToRandomise = _options.gewepGenesToRandomise.value();
163 			if (_options.gewepGenesToAddOrDelete.has_value())
164 				percentGenesToAddOrDelete = _options.gewepGenesToAddOrDelete.value();
165 
166 			return make_unique<GenerationalElitistWithExclusivePools>(GenerationalElitistWithExclusivePools::Options{
167 				/* mutationPoolSize = */ _options.gewepMutationPoolSize,
168 				/* crossoverPoolSize = */ _options.gewepCrossoverPoolSize,
169 				/* randomisationChance = */ _options.gewepRandomisationChance,
170 				/* deletionVsAdditionChance = */ _options.gewepDeletionVsAdditionChance,
171 				/* percentGenesToRandomise = */ percentGenesToRandomise,
172 				/* percentGenesToAddOrDelete = */ percentGenesToAddOrDelete,
173 				/* crossover = */ _options.crossover,
174 				/* uniformCrossoverSwapChance = */ _options.uniformCrossoverSwapChance,
175 			});
176 		}
177 		case Algorithm::Classic:
178 		{
179 			return make_unique<ClassicGeneticAlgorithm>(ClassicGeneticAlgorithm::Options{
180 				/* elitePoolSize = */ _options.classicElitePoolSize,
181 				/* crossoverChance = */ _options.classicCrossoverChance,
182 				/* mutationChance = */ _options.classicMutationChance,
183 				/* deletionChance = */ _options.classicDeletionChance,
184 				/* additionChance = */ _options.classicAdditionChance,
185 				/* crossover = */ _options.crossover,
186 				/* uniformCrossoverSwapChance = */ _options.uniformCrossoverSwapChance,
187 			});
188 		}
189 		default:
190 			assertThrow(false, solidity::util::Exception, "Invalid Algorithm value.");
191 	}
192 }
193 
buildFromCommandLine(po::variables_map const & _arguments)194 CodeWeights CodeWeightFactory::buildFromCommandLine(po::variables_map const& _arguments)
195 {
196 	return {
197 		_arguments["expression-statement-cost"].as<size_t>(),
198 		_arguments["assignment-cost"].as<size_t>(),
199 		_arguments["variable-declaration-cost"].as<size_t>(),
200 		_arguments["function-definition-cost"].as<size_t>(),
201 		_arguments["if-cost"].as<size_t>(),
202 		_arguments["switch-cost"].as<size_t>(),
203 		_arguments["case-cost"].as<size_t>(),
204 		_arguments["for-loop-cost"].as<size_t>(),
205 		_arguments["break-cost"].as<size_t>(),
206 		_arguments["continue-cost"].as<size_t>(),
207 		_arguments["leave-cost"].as<size_t>(),
208 		_arguments["block-cost"].as<size_t>(),
209 		_arguments["function-call-cost"].as<size_t>(),
210 		_arguments["identifier-cost"].as<size_t>(),
211 		_arguments["literal-cost"].as<size_t>(),
212 	};
213 }
214 
fromCommandLine(po::variables_map const & _arguments)215 FitnessMetricFactory::Options FitnessMetricFactory::Options::fromCommandLine(po::variables_map const& _arguments)
216 {
217 	return {
218 		_arguments["metric"].as<MetricChoice>(),
219 		_arguments["metric-aggregator"].as<MetricAggregatorChoice>(),
220 		_arguments["relative-metric-scale"].as<size_t>(),
221 		_arguments["chromosome-repetitions"].as<size_t>(),
222 	};
223 }
224 
build(Options const & _options,vector<Program> _programs,vector<shared_ptr<ProgramCache>> _programCaches,CodeWeights const & _weights)225 unique_ptr<FitnessMetric> FitnessMetricFactory::build(
226 	Options const& _options,
227 	vector<Program> _programs,
228 	vector<shared_ptr<ProgramCache>> _programCaches,
229 	CodeWeights const& _weights
230 )
231 {
232 	assert(_programCaches.size() == _programs.size());
233 	assert(_programs.size() > 0 && "Validations should prevent this from being executed with zero files.");
234 
235 	vector<shared_ptr<FitnessMetric>> metrics;
236 	switch (_options.metric)
237 	{
238 		case MetricChoice::CodeSize:
239 		{
240 			for (size_t i = 0; i < _programs.size(); ++i)
241 				metrics.push_back(make_unique<ProgramSize>(
242 					_programCaches[i] != nullptr ? optional<Program>{} : move(_programs[i]),
243 					move(_programCaches[i]),
244 					_weights,
245 					_options.chromosomeRepetitions
246 				));
247 
248 			break;
249 		}
250 		case MetricChoice::RelativeCodeSize:
251 		{
252 			for (size_t i = 0; i < _programs.size(); ++i)
253 				metrics.push_back(make_unique<RelativeProgramSize>(
254 					_programCaches[i] != nullptr ? optional<Program>{} : move(_programs[i]),
255 					move(_programCaches[i]),
256 					_options.relativeMetricScale,
257 					_weights,
258 					_options.chromosomeRepetitions
259 				));
260 			break;
261 		}
262 		default:
263 			assertThrow(false, solidity::util::Exception, "Invalid MetricChoice value.");
264 	}
265 
266 	switch (_options.metricAggregator)
267 	{
268 		case MetricAggregatorChoice::Average:
269 			return make_unique<FitnessMetricAverage>(move(metrics));
270 		case MetricAggregatorChoice::Sum:
271 			return make_unique<FitnessMetricSum>(move(metrics));
272 		case MetricAggregatorChoice::Maximum:
273 			return make_unique<FitnessMetricMaximum>(move(metrics));
274 		case MetricAggregatorChoice::Minimum:
275 			return make_unique<FitnessMetricMinimum>(move(metrics));
276 		default:
277 			assertThrow(false, solidity::util::Exception, "Invalid MetricAggregatorChoice value.");
278 	}
279 }
280 
fromCommandLine(po::variables_map const & _arguments)281 PopulationFactory::Options PopulationFactory::Options::fromCommandLine(po::variables_map const& _arguments)
282 {
283 	return {
284 		_arguments["min-chromosome-length"].as<size_t>(),
285 		_arguments["max-chromosome-length"].as<size_t>(),
286 		_arguments.count("population") > 0 ?
287 			_arguments["population"].as<vector<string>>() :
288 			vector<string>{},
289 		_arguments.count("random-population") > 0 ?
290 			_arguments["random-population"].as<vector<size_t>>() :
291 			vector<size_t>{},
292 		_arguments.count("population-from-file") > 0 ?
293 			_arguments["population-from-file"].as<vector<string>>() :
294 			vector<string>{},
295 	};
296 }
297 
build(Options const & _options,shared_ptr<FitnessMetric> _fitnessMetric)298 Population PopulationFactory::build(
299 	Options const& _options,
300 	shared_ptr<FitnessMetric> _fitnessMetric
301 )
302 {
303 	Population population = buildFromStrings(_options.population, _fitnessMetric);
304 
305 	size_t combinedSize = 0;
306 	for (size_t populationSize: _options.randomPopulation)
307 		combinedSize += populationSize;
308 
309 	population = move(population) + buildRandom(
310 		combinedSize,
311 		_options.minChromosomeLength,
312 		_options.maxChromosomeLength,
313 		_fitnessMetric
314 	);
315 
316 	for (string const& populationFilePath: _options.populationFromFile)
317 		population = move(population) + buildFromFile(populationFilePath, _fitnessMetric);
318 
319 	return population;
320 }
321 
buildFromStrings(vector<string> const & _geneSequences,shared_ptr<FitnessMetric> _fitnessMetric)322 Population PopulationFactory::buildFromStrings(
323 	vector<string> const& _geneSequences,
324 	shared_ptr<FitnessMetric> _fitnessMetric
325 )
326 {
327 	vector<Chromosome> chromosomes;
328 	for (string const& geneSequence: _geneSequences)
329 		chromosomes.emplace_back(geneSequence);
330 
331 	return Population(move(_fitnessMetric), move(chromosomes));
332 }
333 
buildRandom(size_t _populationSize,size_t _minChromosomeLength,size_t _maxChromosomeLength,shared_ptr<FitnessMetric> _fitnessMetric)334 Population PopulationFactory::buildRandom(
335 	size_t _populationSize,
336 	size_t _minChromosomeLength,
337 	size_t _maxChromosomeLength,
338 	shared_ptr<FitnessMetric> _fitnessMetric
339 )
340 {
341 	return Population::makeRandom(
342 		move(_fitnessMetric),
343 		_populationSize,
344 		_minChromosomeLength,
345 		_maxChromosomeLength
346 	);
347 }
348 
buildFromFile(string const & _filePath,shared_ptr<FitnessMetric> _fitnessMetric)349 Population PopulationFactory::buildFromFile(
350 	string const& _filePath,
351 	shared_ptr<FitnessMetric> _fitnessMetric
352 )
353 {
354 	return buildFromStrings(readLinesFromFile(_filePath), move(_fitnessMetric));
355 }
356 
fromCommandLine(po::variables_map const & _arguments)357 ProgramCacheFactory::Options ProgramCacheFactory::Options::fromCommandLine(po::variables_map const& _arguments)
358 {
359 	return {
360 		_arguments["program-cache"].as<bool>(),
361 	};
362 }
363 
build(Options const & _options,vector<Program> _programs)364 vector<shared_ptr<ProgramCache>> ProgramCacheFactory::build(
365 	Options const& _options,
366 	vector<Program> _programs
367 )
368 {
369 	vector<shared_ptr<ProgramCache>> programCaches;
370 	for (Program& program: _programs)
371 		programCaches.push_back(_options.programCacheEnabled ? make_shared<ProgramCache>(move(program)) : nullptr);
372 
373 	return programCaches;
374 }
375 
fromCommandLine(po::variables_map const & _arguments)376 ProgramFactory::Options ProgramFactory::Options::fromCommandLine(po::variables_map const& _arguments)
377 {
378 	return {
379 		_arguments["input-files"].as<vector<string>>(),
380 		_arguments["prefix"].as<string>(),
381 	};
382 }
383 
build(Options const & _options)384 vector<Program> ProgramFactory::build(Options const& _options)
385 {
386 	vector<Program> inputPrograms;
387 	for (auto& path: _options.inputFiles)
388 	{
389 		CharStream sourceCode = loadSource(path);
390 		variant<Program, ErrorList> programOrErrors = Program::load(sourceCode);
391 		if (holds_alternative<ErrorList>(programOrErrors))
392 		{
393 			SourceReferenceFormatter{cerr, SingletonCharStreamProvider(sourceCode), true, false}
394 				.printErrorInformation(get<ErrorList>(programOrErrors));
395 			cerr << endl;
396 			assertThrow(false, InvalidProgram, "Failed to load program " + path);
397 		}
398 
399 		get<Program>(programOrErrors).optimise(Chromosome(_options.prefix).optimisationSteps());
400 		inputPrograms.push_back(move(get<Program>(programOrErrors)));
401 	}
402 
403 	return inputPrograms;
404 }
405 
loadSource(boost::filesystem::path const & _sourcePath)406 CharStream ProgramFactory::loadSource(boost::filesystem::path const& _sourcePath)
407 {
408 	assertThrow(boost::filesystem::exists(_sourcePath), MissingFile, "Source file does not exist: " + _sourcePath.string());
409 
410 	string sourceCode = readFileAsString(_sourcePath);
411 	return CharStream(sourceCode, _sourcePath.string());
412 }
413 
main(int _argc,char ** _argv)414 void Phaser::main(int _argc, char** _argv)
415 {
416 	optional<po::variables_map> arguments = parseCommandLine(_argc, _argv);
417 	if (!arguments.has_value())
418 		return;
419 
420 	initialiseRNG(arguments.value());
421 
422 	runPhaser(arguments.value());
423 }
424 
buildCommandLineDescription()425 Phaser::CommandLineDescription Phaser::buildCommandLineDescription()
426 {
427 	unsigned const lineLength = po::options_description::m_default_line_length;
428 	unsigned const minDescriptionLength = lineLength - 23;
429 
430 	po::options_description keywordDescription(
431 		"yul-phaser, a tool for finding the best sequence of Yul optimisation phases.\n"
432 		"\n"
433 		"Usage: yul-phaser [options] <file>\n"
434 		"Reads <file> as Yul code and tries to find the best order in which to run optimisation"
435 		" phases using a genetic algorithm.\n"
436 		"Example:\n"
437 		"yul-phaser program.yul\n"
438 		"\n"
439 		"Allowed options",
440 		lineLength,
441 		minDescriptionLength
442 	);
443 
444 	po::options_description generalDescription("GENERAL", lineLength, minDescriptionLength);
445 	generalDescription.add_options()
446 		("help", "Show help message and exit.")
447 		("input-files", po::value<vector<string>>()->required()->value_name("<PATH>"), "Input files.")
448 		(
449 			"prefix",
450 			po::value<string>()->value_name("<CHROMOSOME>")->default_value(""),
451 			"Initial optimisation steps automatically applied to every input program.\n"
452 			"The result is treated as if it was the actual input, i.e. the steps are not considered "
453 			"a part of the chromosomes and cannot be mutated. The values of relative metric values "
454 			"are also relative to the fitness of a program with these steps applied rather than the "
455 			"fitness of the original program.\n"
456 			"Note that phaser always adds a 'hgo' prefix to ensure that chromosomes can "
457 			"contain arbitrary optimisation steps. This implicit prefix cannot be changed or "
458 			"or removed using this option. The value given here is applied after it."
459 		)
460 		("seed", po::value<uint32_t>()->value_name("<NUM>"), "Seed for the random number generator.")
461 		(
462 			"rounds",
463 			po::value<size_t>()->value_name("<NUM>"),
464 			"The number of rounds after which the algorithm should stop. (default=no limit)."
465 		)
466 		(
467 			"mode",
468 			po::value<PhaserMode>()->value_name("<NAME>")->default_value(PhaserMode::RunAlgorithm),
469 			(
470 				"Mode of operation. The default is to run the algorithm but you can also tell phaser "
471 				"to do something else with its parameters, e.g. just print the optimised programs and exit.\n"
472 				"\n"
473 				"AVAILABLE MODES:\n"
474 				"* " + toString(PhaserMode::RunAlgorithm) + "\n" +
475 				"* " + toString(PhaserMode::PrintOptimisedPrograms) + "\n" +
476 				"* " + toString(PhaserMode::PrintOptimisedASTs)
477 			).c_str()
478 		)
479 	;
480 	keywordDescription.add(generalDescription);
481 
482 	po::options_description algorithmDescription("ALGORITHM", lineLength, minDescriptionLength);
483 	algorithmDescription.add_options()
484 		(
485 			"algorithm",
486 			po::value<Algorithm>()->value_name("<NAME>")->default_value(Algorithm::GEWEP),
487 			(
488 				"Algorithm\n"
489 				"\n"
490 				"AVAILABLE ALGORITHMS:\n"
491 				"* " + toString(Algorithm::GEWEP) + "\n" +
492 				"* " + toString(Algorithm::Classic) + "\n" +
493 				"* " + toString(Algorithm::Random)
494 			).c_str()
495 		)
496 		(
497 			"no-randomise-duplicates",
498 			po::bool_switch(),
499 			"By default, after each round of the algorithm duplicate chromosomes are removed from"
500 			"the population and replaced with randomly generated ones. "
501 			"This option disables this postprocessing."
502 		)
503 		(
504 			"min-chromosome-length",
505 			po::value<size_t>()->value_name("<NUM>")->default_value(100),
506 			"Minimum length of randomly generated chromosomes."
507 		)
508 		(
509 			"max-chromosome-length",
510 			po::value<size_t>()->value_name("<NUM>")->default_value(100),
511 			"Maximum length of randomly generated chromosomes."
512 		)
513 		(
514 			"crossover",
515 			po::value<CrossoverChoice>()->value_name("<NAME>")->default_value(CrossoverChoice::Uniform),
516 			(
517 				"Type of the crossover operator to use.\n"
518 				"\n"
519 				"AVAILABLE CROSSOVER OPERATORS:\n"
520 				"* " + toString(CrossoverChoice::SinglePoint) + "\n" +
521 				"* " + toString(CrossoverChoice::TwoPoint) + "\n" +
522 				"* " + toString(CrossoverChoice::Uniform)
523 			).c_str()
524 		)
525 		(
526 			"uniform-crossover-swap-chance",
527 			po::value<double>()->value_name("<PROBABILITY>")->default_value(0.5),
528 			"Chance of two genes being swapped between chromosomes in uniform crossover."
529 		)
530 	;
531 	keywordDescription.add(algorithmDescription);
532 
533 	po::options_description gewepAlgorithmDescription("GEWEP ALGORITHM", lineLength, minDescriptionLength);
534 	gewepAlgorithmDescription.add_options()
535 		(
536 			"gewep-mutation-pool-size",
537 			po::value<double>()->value_name("<FRACTION>")->default_value(0.25),
538 			"Percentage of population to regenerate using mutations in each round."
539 		)
540 		(
541 			"gewep-crossover-pool-size",
542 			po::value<double>()->value_name("<FRACTION>")->default_value(0.25),
543 			"Percentage of population to regenerate using crossover in each round."
544 		)
545 		(
546 			"gewep-randomisation-chance",
547 			po::value<double>()->value_name("<PROBABILITY>")->default_value(0.9),
548 			"The chance of choosing gene randomisation as the mutation to perform."
549 		)
550 		(
551 			"gewep-deletion-vs-addition-chance",
552 			po::value<double>()->value_name("<PROBABILITY>")->default_value(0.5),
553 			"The chance of choosing gene deletion as the mutation if randomisation was not chosen."
554 		)
555 		(
556 			"gewep-genes-to-randomise",
557 			po::value<double>()->value_name("<PROBABILITY>"),
558 			"The chance of any given gene being mutated in gene randomisation. "
559 			"(default=1/max-chromosome-length)"
560 		)
561 		(
562 			"gewep-genes-to-add-or-delete",
563 			po::value<double>()->value_name("<PROBABILITY>"),
564 			"The chance of a gene being added (or deleted) in gene addition (or deletion). "
565 			"(default=1/max-chromosome-length)"
566 		)
567 	;
568 	keywordDescription.add(gewepAlgorithmDescription);
569 
570 	po::options_description classicGeneticAlgorithmDescription("CLASSIC GENETIC ALGORITHM", lineLength, minDescriptionLength);
571 	classicGeneticAlgorithmDescription.add_options()
572 		(
573 			"classic-elite-pool-size",
574 			po::value<double>()->value_name("<FRACTION>")->default_value(0.25),
575 			"Percentage of population to regenerate using mutations in each round."
576 		)
577 		(
578 			"classic-crossover-chance",
579 			po::value<double>()->value_name("<FRACTION>")->default_value(0.75),
580 			"Chance of a chromosome being selected for crossover."
581 		)
582 		(
583 			"classic-mutation-chance",
584 			po::value<double>()->value_name("<FRACTION>")->default_value(0.01),
585 			"Chance of a gene being mutated."
586 		)
587 		(
588 			"classic-deletion-chance",
589 			po::value<double>()->value_name("<PROBABILITY>")->default_value(0.01),
590 			"Chance of a gene being deleted."
591 		)
592 		(
593 			"classic-addition-chance",
594 			po::value<double>()->value_name("<PROBABILITY>")->default_value(0.01),
595 			"Chance of a random gene being added."
596 		)
597 	;
598 	keywordDescription.add(classicGeneticAlgorithmDescription);
599 
600 	po::options_description randomAlgorithmDescription("RANDOM ALGORITHM", lineLength, minDescriptionLength);
601 	randomAlgorithmDescription.add_options()
602 		(
603 			"random-elite-pool-size",
604 			po::value<double>()->value_name("<FRACTION>"),
605 			"Percentage of the population preserved in each round. "
606 			"(default=one individual, regardless of population size)"
607 		)
608 	;
609 	keywordDescription.add(randomAlgorithmDescription);
610 
611 	po::options_description populationDescription("POPULATION", lineLength, minDescriptionLength);
612 	populationDescription.add_options()
613 		(
614 			"population",
615 			po::value<vector<string>>()->multitoken()->value_name("<CHROMOSOMES>"),
616 			"List of chromosomes to be included in the initial population. "
617 			"You can specify multiple values separated with spaces or invoke the option multiple times "
618 			"and all the values will be included."
619 		)
620 		(
621 			"random-population",
622 			po::value<vector<size_t>>()->value_name("<SIZE>"),
623 			"The number of randomly generated chromosomes to be included in the initial population."
624 		)
625 		(
626 			"population-from-file",
627 			po::value<vector<string>>()->value_name("<FILE>"),
628 			"A text file with a list of chromosomes (one per line) to be included in the initial population."
629 		)
630 		(
631 			"population-autosave",
632 			po::value<string>()->value_name("<FILE>"),
633 			"If specified, the population is saved in the specified file after each round. (default=autosave disabled)"
634 		)
635 	;
636 	keywordDescription.add(populationDescription);
637 
638 	po::options_description metricsDescription("METRICS", lineLength, minDescriptionLength);
639 	metricsDescription.add_options()
640 		(
641 			"metric",
642 			po::value<MetricChoice>()->value_name("<NAME>")->default_value(MetricChoice::RelativeCodeSize),
643 			(
644 				"Metric used to evaluate the fitness of a chromosome.\n"
645 				"\n"
646 				"AVAILABLE METRICS:\n"
647 				"* " + toString(MetricChoice::CodeSize) + "\n" +
648 				"* " + toString(MetricChoice::RelativeCodeSize)
649 			).c_str()
650 		)
651 		(
652 			"metric-aggregator",
653 			po::value<MetricAggregatorChoice>()->value_name("<NAME>")->default_value(MetricAggregatorChoice::Average),
654 			(
655 				"Operator used to combine multiple fitness metric values obtained by evaluating a "
656 				"chromosome separately for each input program.\n"
657 				"\n"
658 				"AVAILABLE METRIC AGGREGATORS:\n"
659 				"* " + toString(MetricAggregatorChoice::Average) + "\n" +
660 				"* " + toString(MetricAggregatorChoice::Sum) + "\n" +
661 				"* " + toString(MetricAggregatorChoice::Maximum) + "\n" +
662 				"* " + toString(MetricAggregatorChoice::Minimum)
663 			).c_str()
664 		)
665 		(
666 			"relative-metric-scale",
667 			po::value<size_t>()->value_name("<EXPONENT>")->default_value(3),
668 			"Scaling factor for values produced by relative fitness metrics. \n"
669 			"Since all metrics must produce integer values, the fractional part of the result is discarded. "
670 			"To keep the numbers meaningful, a relative metric multiples its values by a scaling factor "
671 			"and this option specifies the exponent of this factor. "
672 			"For example with value of 3 the factor is 10^3 = 1000 and the metric will return "
673 			"500 to represent 0.5, 1000 for 1.0, 2000 for 2.0 and so on. "
674 			"Using a bigger factor allows discerning smaller relative differences between chromosomes "
675 			"but makes the numbers less readable and may also lose precision if the numbers are very large."
676 		)
677 		(
678 			"chromosome-repetitions",
679 			po::value<size_t>()->value_name("<COUNT>")->default_value(1),
680 			"Number of times to repeat the sequence optimisation steps represented by a chromosome."
681 		)
682 	;
683 	keywordDescription.add(metricsDescription);
684 
685 	po::options_description metricWeightDescription("METRIC WEIGHTS", lineLength, minDescriptionLength);
686 	metricWeightDescription.add_options()
687 		// TODO: We need to figure out the best set of weights for the phaser.
688 		// This one is just a stopgap to make sure no statement or expression has zero cost.
689 		("expression-statement-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
690 		("assignment-cost",           po::value<size_t>()->value_name("<COST>")->default_value(1))
691 		("variable-declaration-cost", po::value<size_t>()->value_name("<COST>")->default_value(1))
692 		("function-definition-cost",  po::value<size_t>()->value_name("<COST>")->default_value(1))
693 		("if-cost",                   po::value<size_t>()->value_name("<COST>")->default_value(2))
694 		("switch-cost",               po::value<size_t>()->value_name("<COST>")->default_value(1))
695 		("case-cost",                 po::value<size_t>()->value_name("<COST>")->default_value(2))
696 		("for-loop-cost",             po::value<size_t>()->value_name("<COST>")->default_value(3))
697 		("break-cost",                po::value<size_t>()->value_name("<COST>")->default_value(2))
698 		("continue-cost",             po::value<size_t>()->value_name("<COST>")->default_value(2))
699 		("leave-cost",                po::value<size_t>()->value_name("<COST>")->default_value(2))
700 		("block-cost",                po::value<size_t>()->value_name("<COST>")->default_value(1))
701 		("function-call-cost",        po::value<size_t>()->value_name("<COST>")->default_value(1))
702 		("identifier-cost",           po::value<size_t>()->value_name("<COST>")->default_value(1))
703 		("literal-cost",              po::value<size_t>()->value_name("<COST>")->default_value(1))
704 	;
705 	keywordDescription.add(metricWeightDescription);
706 
707 	po::options_description cacheDescription("CACHE", lineLength, minDescriptionLength);
708 	cacheDescription.add_options()
709 		(
710 			"program-cache",
711 			po::bool_switch(),
712 			"Enables caching of intermediate programs corresponding to chromosome prefixes.\n"
713 			"This speeds up fitness evaluation by a lot but eats tons of memory if the chromosomes are long. "
714 			"Disabled by default since there's currently no way to set an upper limit on memory usage but "
715 			"highly recommended if your computer has enough RAM."
716 		)
717 	;
718 	keywordDescription.add(cacheDescription);
719 
720 	po::options_description outputDescription("OUTPUT", lineLength, minDescriptionLength);
721 	outputDescription.add_options()
722 		(
723 			"show-initial-population",
724 			po::bool_switch(),
725 			"Print the state of the population before the algorithm starts."
726 		)
727 		(
728 			"show-only-top-chromosome",
729 			po::bool_switch(),
730 			"Print only the best chromosome found in each round rather than the whole population."
731 		)
732 		(
733 			"hide-round",
734 			po::bool_switch(),
735 			"Hide information about the current round (round number and elapsed time)."
736 		)
737 		(
738 			"show-cache-stats",
739 			po::bool_switch(),
740 			"Print information about cache size and effectiveness after each round."
741 		)
742 		(
743 			"show-seed",
744 			po::bool_switch(),
745 			"Print the selected random seed."
746 		)
747 	;
748 	keywordDescription.add(outputDescription);
749 
750 	po::positional_options_description positionalDescription;
751 	positionalDescription.add("input-files", -1);
752 
753 	return {keywordDescription, positionalDescription};
754 }
755 
parseCommandLine(int _argc,char ** _argv)756 optional<po::variables_map> Phaser::parseCommandLine(int _argc, char** _argv)
757 {
758 	auto [keywordDescription, positionalDescription] = buildCommandLineDescription();
759 
760 	po::variables_map arguments;
761 	po::notify(arguments);
762 
763 	po::command_line_parser parser(_argc, _argv);
764 	parser.options(keywordDescription).positional(positionalDescription);
765 	po::store(parser.run(), arguments);
766 
767 	if (arguments.count("help") > 0)
768 	{
769 		cout << keywordDescription << endl;
770 		return nullopt;
771 	}
772 
773 	if (arguments.count("input-files") == 0)
774 		assertThrow(false, NoInputFiles, "Missing argument: input-files.");
775 
776 	return arguments;
777 }
778 
initialiseRNG(po::variables_map const & _arguments)779 void Phaser::initialiseRNG(po::variables_map const& _arguments)
780 {
781 	uint32_t seed;
782 	if (_arguments.count("seed") > 0)
783 		seed = _arguments["seed"].as<uint32_t>();
784 	else
785 		seed = SimulationRNG::generateSeed();
786 
787 	SimulationRNG::reset(seed);
788 	if (_arguments["show-seed"].as<bool>())
789 		cout << "Random seed: " << seed << endl;
790 }
791 
buildAlgorithmRunnerOptions(po::variables_map const & _arguments)792 AlgorithmRunner::Options Phaser::buildAlgorithmRunnerOptions(po::variables_map const& _arguments)
793 {
794 	return {
795 		_arguments.count("rounds") > 0 ? static_cast<optional<size_t>>(_arguments["rounds"].as<size_t>()) : nullopt,
796 		_arguments.count("population-autosave") > 0 ? static_cast<optional<string>>(_arguments["population-autosave"].as<string>()) : nullopt,
797 		!_arguments["no-randomise-duplicates"].as<bool>(),
798 		_arguments["min-chromosome-length"].as<size_t>(),
799 		_arguments["max-chromosome-length"].as<size_t>(),
800 		_arguments["show-initial-population"].as<bool>(),
801 		_arguments["show-only-top-chromosome"].as<bool>(),
802 		!_arguments["hide-round"].as<bool>(),
803 		_arguments["show-cache-stats"].as<bool>(),
804 	};
805 }
806 
runPhaser(po::variables_map const & _arguments)807 void Phaser::runPhaser(po::variables_map const& _arguments)
808 {
809 	auto programOptions = ProgramFactory::Options::fromCommandLine(_arguments);
810 	auto cacheOptions = ProgramCacheFactory::Options::fromCommandLine(_arguments);
811 	auto metricOptions = FitnessMetricFactory::Options::fromCommandLine(_arguments);
812 	auto populationOptions = PopulationFactory::Options::fromCommandLine(_arguments);
813 
814 	vector<Program> programs = ProgramFactory::build(programOptions);
815 	vector<shared_ptr<ProgramCache>> programCaches = ProgramCacheFactory::build(cacheOptions, programs);
816 	CodeWeights codeWeights = CodeWeightFactory::buildFromCommandLine(_arguments);
817 	unique_ptr<FitnessMetric> fitnessMetric = FitnessMetricFactory::build(
818 		metricOptions,
819 		programs,
820 		programCaches,
821 		codeWeights
822 	);
823 	Population population = PopulationFactory::build(populationOptions, move(fitnessMetric));
824 
825 	if (_arguments["mode"].as<PhaserMode>() == PhaserMode::RunAlgorithm)
826 		runAlgorithm(_arguments, move(population), move(programCaches));
827 	else
828 		printOptimisedProgramsOrASTs(_arguments, population, move(programs), _arguments["mode"].as<PhaserMode>());
829 }
830 
runAlgorithm(po::variables_map const & _arguments,Population _population,vector<shared_ptr<ProgramCache>> _programCaches)831 void Phaser::runAlgorithm(
832 	po::variables_map const& _arguments,
833 	Population _population,
834 	vector<shared_ptr<ProgramCache>> _programCaches
835 )
836 {
837 	auto algorithmOptions = GeneticAlgorithmFactory::Options::fromCommandLine(_arguments);
838 
839 	unique_ptr<GeneticAlgorithm> geneticAlgorithm = GeneticAlgorithmFactory::build(
840 		algorithmOptions,
841 		_population.individuals().size()
842 	);
843 
844 	AlgorithmRunner algorithmRunner(move(_population), move(_programCaches), buildAlgorithmRunnerOptions(_arguments), cout);
845 	algorithmRunner.run(*geneticAlgorithm);
846 }
847 
printOptimisedProgramsOrASTs(po::variables_map const & _arguments,Population const & _population,vector<Program> _programs,PhaserMode phaserMode)848 void Phaser::printOptimisedProgramsOrASTs(
849 	po::variables_map const& _arguments,
850 	Population const& _population,
851 	vector<Program> _programs,
852 	PhaserMode phaserMode
853 )
854 {
855 	assert(phaserMode == PhaserMode::PrintOptimisedPrograms || phaserMode == PhaserMode::PrintOptimisedASTs);
856 	assert(_programs.size() == _arguments["input-files"].as<vector<string>>().size());
857 
858 	if (_population.individuals().size() == 0)
859 	{
860 		cout << "<EMPTY POPULATION>" << endl;
861 		return;
862 	}
863 
864 	vector<string> const& paths = _arguments["input-files"].as<vector<string>>();
865 	for (auto& individual: _population.individuals())
866 	{
867 		cout << "Chromosome: " << individual.chromosome << endl;
868 
869 		for (size_t i = 0; i < _programs.size(); ++i)
870 		{
871 			for (size_t j = 0; j < _arguments["chromosome-repetitions"].as<size_t>(); ++j)
872 				_programs[i].optimise(individual.chromosome.optimisationSteps());
873 
874 			cout << "Program: " << paths[i] << endl;
875 			if (phaserMode == PhaserMode::PrintOptimisedPrograms)
876 				cout << _programs[i] << endl;
877 			else
878 				cout << _programs[i].toJson() << endl;
879 		}
880 	}
881 }
882