1 
2 /* "Species" - a CoreWars evolver.  Copyright (C) 2003 'Varfar'
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of the GNU General Public License as published by the Free
6  * Software Foundation; either version 1, or (at your option) any later
7  * version.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with this program; if not, write to the Free Software Foundation, Inc.,
16  * 675 Mass Ave, Cambridge, MA 02139, USA.
17  */
18 
19 #include "inst_gen_markov.hpp"
20 #include "species.hpp"
21 #include "exhaust.hpp"
22 
23 #include "opcode_branch_lookup.hpp"
24 #include "rand.hpp"
25 
26 using namespace std;
27 
28 /******* CInstGeneratorMarkov implementation *********************/
29 
30 const float CInstGeneratorMarkov::DEFAULT_DAT = 0.1F; // 10%
31 
CInstGeneratorMarkov()32 CInstGeneratorMarkov::CInstGeneratorMarkov() {
33 	_type = MARKOV; // important line!
34 	CORESIZE = 0;
35 	opcodes = 0;
36 	opcode = 0;
37 	opcode_modifiers = 0;
38 	readonly = true;
39 	_filename = "<none>";
40 }
41 
~CInstGeneratorMarkov()42 CInstGeneratorMarkov::~CInstGeneratorMarkov() {
43 	clear();
44 	readonly = true;
45 }
46 
write_ini(ostream & os)47 void CInstGeneratorMarkov::write_ini(ostream &os) {
48 	os << "inst_gen=" << impl_desc(type()) << endl;
49 	os << "markov_file=" << _filename << endl;
50 	if(DEFAULT_DAT != _dat)
51 		os << "markov_dats=" << _dat << endl;
52 }
53 
write_override_ini(ostream & os,const CInstGenerator * parent)54 void CInstGeneratorMarkov::write_override_ini(ostream &os,const CInstGenerator *parent) {
55 	if(parent == this) // nothing to do?
56 		return;
57 	os << "inst_gen=" << impl_desc(type()) << endl;
58 	if(same_type(this,parent)) {
59 		if(((CInstGeneratorMarkov*)parent)->_filename != _filename) {
60 			os << "markov_file=" << _filename << endl;
61 			os << "markov_dats=" << _dat << endl;
62 		} else
63 			if(((CInstGeneratorMarkov*)parent)->_dat != _dat) // overriden?
64 				os << "markov_dats=" << _dat << endl;
65 	} else {
66 		if(DEFAULT_DAT != _dat)
67 			os << "markov_dats=" << _dat << endl;
68 	}
69 }
70 
read_ini_impl(INIFile & ini)71 void CInstGeneratorMarkov::read_ini_impl(INIFile &ini) {
72 	KeyValuePair *kvp;
73 	kvp = ini.get("markov_file"); if(0 == kvp) PANIC(MISC,"markov_file key expected",NULL); // null?!
74 	kvp->getValueAsString(_filename);
75 	load(_filename.c_str());
76 	kvp = ini.get("markov_dats");
77 	if(0 != kvp) {
78 		_dat = kvp->getValueAsFloat(DEFAULT_DAT);
79 		if((0.0F > _dat) || (1.0F <= _dat)) PANIC(MISC,"markov_dat must be between 0.0 and 1.0",NULL);
80 	} else
81 		_dat = DEFAULT_DAT;
82 }
83 
read_override_ini_impl(INIFile & ini)84 CInstGenerator *CInstGeneratorMarkov::read_override_ini_impl(INIFile &ini) {
85 	string filename;
86 	CInstGeneratorMarkov *ret = this;
87 	KeyValuePair *kvp;
88 	kvp = ini.get("markov_file");
89 	if(0 != kvp) { // specified?
90 		kvp->getValueAsString(filename);
91 		if(_filename != filename) {
92 			ret = new CInstGeneratorMarkov();
93 			ret->read_ini_impl(ini);
94 		}
95 	}
96 	kvp = ini.get("markov_dats");
97 	if(0 != kvp) {
98 		float dat = kvp->getValueAsFloat(DEFAULT_DAT);
99 		if(dat != ret->_dat) {
100 			ret = new CInstGeneratorMarkov(*this);
101 			ret->_dat = dat;
102 			if((0.0F > ret->_dat) || (1.0F <= ret->_dat)) PANIC(MISC,"markov_dat must be between 0.0 and 1.0",NULL);
103 		}
104 	}
105 	return ret;
106 }
107 
opcode_modifier_2_index(const unsigned opcode_modifier) const108 unsigned CInstGeneratorMarkov::opcode_modifier_2_index(const unsigned opcode_modifier) const {
109 	//TODO: binary search?  this is sorted btw..
110 	for(unsigned i=0; i<opcodes; i++) {
111 		if(opcode_modifier == opcode[i].key.get_opcode_modifier())
112 			return i;
113 	}
114 	return INVALID_INDEX;
115 }
116 
suggest_opcode_modifier() const117 unsigned CInstGeneratorMarkov::suggest_opcode_modifier() const { // returns an index
118 	const unsigned TARGET = CRand::irand(opcode_modifiers);
119 	unsigned y = 0;
120 	for(unsigned x = 0; x < opcodes; x++) {
121 		y += opcode[x].key.get_count();
122 		if(TARGET < y) {
123 			return x;
124 		}
125 	}
126 	PANIC(MISC,"roulette off end of markov opcodes",NULL);
127 }
128 
suggest_opcode_modifier() const129 unsigned CInstGeneratorMarkov::CFreq::suggest_opcode_modifier() const {
130 	if(0 == opcode_modifiers) return INVALID_INDEX;
131 	const unsigned TARGET = CRand::irand(opcode_modifiers);
132 	unsigned y = 0;
133 	for(unsigned x = 0; x < markovs; x++) {
134 		y += markov[x].get_count();
135 		if(TARGET < y) {
136 			return x;
137 		}
138 	}
139 	cout << MNEMONIC_OPCODE(key.get_opcode()) << '.' << MNEMONIC_MODIFIER(key.get_modifier()) << ": target = " << TARGET << ", opcode_modififers = " << opcode_modifiers << ", y = " << y << ", markovs = " << markovs << endl;
140 	PANIC(MISC,"roulette off end of markovs",NULL);
141 }
142 
dat() const143 bool CInstGeneratorMarkov::dat() const { // test that this can be a dat
144 	return (index() > 0) && (_dat >= CRand::frand(1.0F));
145 }
146 
suggest_instruction(insn_t & instruction,const MODE)147 void CInstGeneratorMarkov::suggest_instruction(insn_t &instruction,const MODE /*mode*/) {
148 	instruction.in = 0;
149 	unsigned prev = INVALID_INDEX, curr = INVALID_INDEX, opmod = INVALID_INDEX;
150 	if((index() > 0) && (index() < chromosome()->len()))
151 		prev = opcode_modifier_2_index(chromosome()->code(index()-1).opcode_modifier());
152 	if(INVALID_INDEX != prev) {
153 		opmod = opcode[prev].suggest_opcode_modifier(); // is index
154 		if(INVALID_INDEX != opmod)
155 			opmod = opcode[prev].markov[opmod].get_opcode_modifier(); // to actual opcode/modifier
156 	}
157 	if(INVALID_INDEX == opmod) {
158 		opmod = opcode[suggest_opcode_modifier()].key.get_opcode_modifier();
159 	}
160 	instruction.set_opcode_modifier(opmod);
161 	curr = opcode_modifier_2_index(opmod);
162 	if(INVALID_INDEX != curr) {
163 		//cout << MNEMONIC_OPCODE(instruction.opcode()] << '.' << MNEMONIC_MODIFIER[instruction.modifier()] << " is known" << endl;
164 		// set appropriate values
165 		instruction.set_addrmode_a((ADDRMODE)opcode[curr].addrmode_a.suggest());
166 		instruction.set_addrmode_b((ADDRMODE)opcode[curr].addrmode_b.suggest());
167 		chromosome()->type()->operands()->rnd(instruction,COperand::A);
168 		chromosome()->type()->operands()->rnd(instruction,COperand::B);
169 	} else {
170 		//cout << MNEMONIC_OPCODE(instruction.opcode()] << '.' << MNEMONIC_MODIFIER[instruction.modifier()] << " is unknown" << endl;
171 		// make stuff up
172 		instruction.set_addrmode_a((ADDRMODE)CRand::irand(ADDRMODE_LAST));
173 		instruction.set_addrmode_b((ADDRMODE)CRand::irand(ADDRMODE_LAST));
174 		chromosome()->type()->operands()->rnd(instruction,COperand::A);
175 		chromosome()->type()->operands()->rnd(instruction,COperand::B);
176 	}
177 	if(safety_checks) {
178 		if(!instruction.valid(chromosome()->warrior()->species()->kingdom()->coresize())) {
179 			cerr << "bad instruction in chromosome()[" << index() << ']' << endl;
180 			PANIC(MISC,"corrupt warrior from markov",NULL);
181 		}
182 	}
183 }
184 
clear()185 void CInstGeneratorMarkov::clear() {
186 	for(int i=0; i<opcodes; i++) {
187 		delete [] opcode[i].markov;
188 		//TODO: check that CHighLowArrays autodestruct
189 	}
190 	delete [] opcode;
191 	opcodes = 0;
192 	opcode = 0;
193 	readonly = true;
194 }
195 
init(const unsigned coresize)196 void CInstGeneratorMarkov::init(const unsigned coresize) {
197 	clear();
198 	CORESIZE = coresize;
199 	if(CORESIZE <= 0 || CORESIZE >= 0xFFFF)
200 		PANIC(MISC,"coresize isn\'t valid",NULL);
201 	opcodes = OPCODE_MODIFIER_LAST;
202 	opcode = new CFreq[opcodes];
203 	for(unsigned i=0; i<OPCODE_MODIFIER_LAST; i++) {
204 		opcode[i].key.set(i);
205 		opcode[i].markovs = OPCODE_MODIFIER_LAST;
206 		opcode[i].markov = new COpcodeModifier[opcode[i].markovs];
207 		for(unsigned j=0; j<opcode[i].markovs; j++)
208 			opcode[i].markov[j].set(j);
209 		opcode[i].addrmode_a.init(ADDRMODE_LAST);
210 		opcode[i].addrmode_b.init(ADDRMODE_LAST);
211 		for(unsigned j=0; j<ADDRMODE_LAST; j++) {
212 			opcode[i].addrmode_a[j].set(j);
213 			opcode[i].addrmode_b[j].set(j);
214 		}
215 		opcode[i].a.init(CORESIZE);
216 		opcode[i].b.init(CORESIZE);
217 		for(unsigned j=0; j<CORESIZE; j++) {
218 			opcode[i].a[j].set(j);
219 			opcode[i].a[j].set(j);
220 		}
221 	}
222 	readonly = false;
223 }
224 
record(const OPCODE opcode_prev,const MODIFIER mod_prev,const insn_st & code)225 void CInstGeneratorMarkov::record(const OPCODE opcode_prev,const MODIFIER mod_prev,const insn_st &code) {
226 	const unsigned
227 		curr = _OP(code.opcode(),code.modifier()),
228 		prev = _OP(opcode_prev,mod_prev);
229 	opcode[curr].key.inc_count();
230 	if(opcode_prev < OPCODE_LAST)
231 		opcode[prev].markov[curr].inc_count();
232 	opcode[curr].addrmode_a[code.addrmode_a()].inc_low();
233 	opcode[curr].addrmode_b[code.addrmode_b()].inc_low();
234 	opcode[curr].a[code.a].inc_low();
235 	opcode[curr].b[code.b].inc_low();
236 }
237 
dump(std::ostream & out) const238 void CInstGeneratorMarkov::dump(std::ostream &out) const {
239 	using namespace std;
240 	out << used_opcodes() << " used opcode/modifier combos" << endl;
241 	for(unsigned i=0; i<opcodes; i++) {
242 		if(opcode[i].key.used()) {
243 			out << MNEMONIC_OPCODE(opcode[i].key.get_opcode()) << '.' << MNEMONIC_MODIFIER(opcode[i].key.get_modifier()) << " = " << opcode[i].key.get_count() << endl;
244 			for(unsigned j=0; j<opcode[i].markovs; j++) {
245 				if(opcode[i].markov[j].used())
246 					out << '\t' << MNEMONIC_OPCODE(opcode[i].markov[j].get_opcode()) << '.' << MNEMONIC_MODIFIER(opcode[i].markov[j].get_modifier()) << " = " << opcode[i].markov[j].get_count() << endl;
247 			}
248 			for(unsigned j=0; j<opcode[i].addrmode_a.count(); j++) {
249 				if(opcode[i].addrmode_a[j].used())
250 					out << "\ta." << MNEMONIC_ADDRMODE((ADDRMODE)opcode[i].addrmode_a[j].get_high()) << " = " << opcode[i].addrmode_a[j].get_low() << endl;
251 			}
252 			for(unsigned j=0; j<opcode[i].addrmode_b.count(); j++) {
253 				if(opcode[i].addrmode_b[j].used())
254 					out << "\tb." << MNEMONIC_ADDRMODE((ADDRMODE)opcode[i].addrmode_b[j].get_high()) << " = " << opcode[i].addrmode_b[j].get_low() << endl;
255 			}
256 			for(unsigned j=0; j<opcode[i].a.count(); j++) {
257 				if(opcode[i].a[j].used())
258 					out << "\ta[" << opcode[i].a[j].get_high() << "] = " << opcode[i].a[j].get_low() << endl;
259 			}
260 			for(unsigned j=0; j<opcode[i].b.count(); j++) {
261 				if(opcode[i].b[j].used())
262 					out << "\tb[" << opcode[i].b[j].get_high() << "] = " << opcode[i].b[j].get_low() << endl;
263 			}
264 		}
265 	}
266 }
267 
save(const char * binfilename) const268 void CInstGeneratorMarkov::save(const char *binfilename) const {
269 	// open file
270 	FILE *f = fopen(binfilename,"wb");
271 	if(!f)
272 		PANIC(MISC,"couldn\'t create lookup file ",binfilename);
273 	write(CORESIZE,f);
274 	write(used_opcodes(),f);
275 	for(unsigned i=0; i<opcodes; i++) {
276 		if(opcode[i].key.used()) {
277 			write(opcode[i].key.get_literal(),f);
278 			write(opcode[i].used_markovs(),f);
279 			for(unsigned j=0; j<opcode[i].markovs; j++) {
280 				if(opcode[i].markov[j].used())
281 					write(opcode[i].markov[j].get_literal(),f);
282 			}
283 			write(opcode[i].addrmode_a.used(),f);
284 			for(unsigned j=0; j<opcode[i].addrmode_a.count(); j++) {
285 				if(opcode[i].addrmode_a[j].used())
286 					write(opcode[i].addrmode_a[j].get_data(),f);
287 			}
288 			if(opcode[i].addrmode_a.calc_sum() != opcode[i].key.get_count()) PANIC(MISC,"bad number of addrmode_a entries in ",binfilename);
289 			write(opcode[i].addrmode_b.used(),f);
290 			for(unsigned j=0; j<opcode[i].addrmode_b.count(); j++) {
291 				if(opcode[i].addrmode_b[j].used())
292 					write(opcode[i].addrmode_b[j].get_data(),f);
293 			}
294 			if(opcode[i].addrmode_b.calc_sum() != opcode[i].key.get_count()) PANIC(MISC,"bad number of addrmode_b entries in ",binfilename);
295 			write(opcode[i].a.used(),f);
296 			for(unsigned j=0; j<opcode[i].a.count(); j++) {
297 				if(opcode[i].a[j].used())
298 					write(opcode[i].a[j].get_data(),f);
299 			}
300 			if(opcode[i].a.calc_sum() != opcode[i].key.get_count()) PANIC(MISC,"bad number of a entries in ",binfilename);
301 			write(opcode[i].b.used(),f);
302 			for(unsigned j=0; j<opcode[i].b.count(); j++) {
303 				if(opcode[i].b[j].used())
304 					write(opcode[i].b[j].get_data(),f);
305 			}
306 			if(opcode[i].b.calc_sum() != opcode[i].key.get_count()) PANIC(MISC,"bad number of b entries in ",binfilename);
307 		}
308 	}
309 	// done
310 	fclose(f);
311 }
312 
load(const char * binfilename)313 void CInstGeneratorMarkov::load(const char *binfilename) {
314 	unsigned x;
315 	FILE *f = fopen(binfilename,"rb");
316 	if(!f)
317 		PANIC(MISC,"couldn\'t open lookup file ",binfilename);
318 	clear();
319 	read(x,f); CORESIZE = x;
320 	read(x,f); opcodes = x;
321 	opcode_modifiers = 0;
322 	opcode = new CFreq[opcodes];
323 	for(unsigned i=0; i<opcodes; i++) {
324 		read(x,f); opcode[i].key.set_literal(x); opcode_modifiers += opcode[i].key.get_count();
325 		read(x,f); opcode[i].markovs = x;
326 		opcode[i].markov = new COpcodeModifier[opcode[i].markovs];
327 		opcode[i].opcode_modifiers = 0;
328 		for(unsigned j=0; j<opcode[i].markovs; j++) {
329 			read(x,f); opcode[i].markov[j].set_literal(x);
330 			opcode[i].opcode_modifiers += opcode[i].markov[j].get_count();
331 		}
332 		read(x,f); opcode[i].addrmode_a.init(x);
333 		for(unsigned j=0; j<opcode[i].addrmode_a.count(); j++) {
334 			read(x,f); opcode[i].addrmode_a[j].set_data(x);
335 		}
336 		if(opcode[i].addrmode_a.calc_sum() != opcode[i].key.get_count()) PANIC(MISC,"bad number of addrmode_a entries in ",binfilename);
337 		read(x,f); opcode[i].addrmode_b.init(x);
338 		for(unsigned j=0; j<opcode[i].addrmode_b.count(); j++) {
339 			read(x,f); opcode[i].addrmode_b[j].set_data(x);
340 		}
341 		if(opcode[i].addrmode_b.calc_sum() != opcode[i].key.get_count()) PANIC(MISC,"bad number of addrmode_b entries in ",binfilename);
342 		read(x,f); opcode[i].a.init(x);
343 		for(unsigned j=0; j<opcode[i].a.count(); j++) {
344 			read(x,f); opcode[i].a[j].set_data(x);
345 		}
346 		if(opcode[i].a.calc_sum() != opcode[i].key.get_count()) PANIC(MISC,"bad number of a entries in ",binfilename);
347 		read(x,f); opcode[i].b.init(x);
348 		for(unsigned j=0; j<opcode[i].b.count(); j++) {
349 			read(x,f); opcode[i].b[j].set_data(x);
350 		}
351 		if(opcode[i].b.calc_sum() != opcode[i].key.get_count()) PANIC(MISC,"bad number of b entries in ",binfilename);
352 	}
353 	if(0 != fread(&x,1,1,f)) PANIC(MISC,"unexpected data after file",binfilename);
354 	fclose(f);
355 	readonly = true;
356 }
357 
append(const char * rcfilename)358 void CInstGeneratorMarkov::append(const char *rcfilename) {
359 	if(readonly)
360 		PANIC(MISC,"OpcodeFreqLookup is readonly",NULL);
361 	warrior_t warrior;
362 	// load it
363 	printf("loading %s\n",rcfilename);
364 	asm_fname(rcfilename,&warrior,CORESIZE); // will panic fatally if can't
365 	// and add to markov chaining
366 	OPCODE opcode_prev = OPCODE_LAST;
367 	MODIFIER mod_prev = MODIFIER_LAST;
368 	for(int i=0; i<warrior.len; i++) {
369 		if(warrior.code[i].opcode() == DAT) {
370 			opcode_prev = OPCODE_LAST;
371 			mod_prev = MODIFIER_LAST;
372 			continue; // skip it
373 		}
374 		record(opcode_prev,mod_prev,warrior.code[i]);
375 		opcode_prev = warrior.code[i].opcode();
376 		mod_prev = warrior.code[i].modifier();
377 	}
378 }
379 
380