1 // license:BSD-3-Clause
2 // copyright-holders:Raphael Nabet
3 /*
4     cpu/apexc/apexc.c: APE(X)C CPU emulation
5 
6     By Raphael Nabet
7 
8     APE(X)C (All Purpose Electronic X-ray Computer) was a computer built by Andrew D. Booth
9     and others for the Birkbeck College, in London, which was used to compute cristal
10     structure using X-ray diffraction.
11 
12     It was one of the APEC series of computer, which were simple electronic computers
13     built in the early 1950s for various British Universities.  Known members of this series
14     are:
15     * APE(X)C: Birkbeck College, London (before 1953 (1951?))
16     * APE(N)C: Board of Mathematical Machines, Oslo
17     * APE(H)C: British Tabulating Machine Company
18     * APE(R)C: British Rayon Research Association
19     * UCC: University College, London (circa january 1956)
20     * MAC (Magnetic Automatic Calculator): "built by Wharf Engineering Laboratories"
21     (february 1955), which used some germanium diodes
22     * The HEC (built by the British Tabulating Machine Company), a commercial machine sold
23     in two models at least (HEC 2M and HEC 4) (before 1955)
24 
25     References:
26     * Andrew D. Booth & Kathleen H. V. Booth: Automatic Digital Calculators, 2nd edition
27     (Buttersworth Scientific Publications, 1956)  (referred to as 'Booth&Booth')
28     * Kathleen H. V. Booth: Programming for an Automatic Digital Calculator
29     (Buttersworth Scientific Publications, 1958)  (referred to as 'Booth')
30     * Digital Engineering Newsletter vol 7 nb 1 p 60 and vol 8 nb 1 p 60-61 provided some
31     dates
32 */
33 
34 /*
35     Generals specs:
36     * 32-bit data word size (10-bit addresses): uses fixed-point, 2's complement arithmetic
37     * CPU has one accumulator (A) and one register (R), plus a Control Register (this is
38       what we would call an "instruction register" nowadays).  No Program Counter, each
39       instruction contains the address of the next instruction (!).
40     * memory is composed of 256 (maximal value only found on the UCC - APE(X)C only has
41       32 tracks) circular magnetic tracks of 32 words: only 32 tracks can
42       be accessed at a time (the 16 first ones, plus 16 others chosen by the programmer),
43       and the rotation rate is 3750rpm (62.5 rotations per second).
44     * two I/O units: tape reader and tape puncher.  A teletyper was designed to read
45       specially-encoded punched tapes and print decoded text.  (See /systems/apexc.c)
46     * machine code has 15 instructions (!), including add, substract, shift, multiply (!),
47       test and branch, input and punch.  A so-called vector mode allow to repeat the same
48       operation 32 times with 32 successive memory locations.  Note the lack of bitwise
49       and/or/xor (!) .
50     * 1 kIPS, although memory access times make this figure fairly theorical (drum rotation
51       time: 16ms, which would allow about 60IPS when no optimization is made)
52     * there is no indirect addressing whatever, although dynamic modification of opcodes (!)
53       allows to simulate it...
54     * a control panel allows operation and debugging of the machine.  (See /systems/apexc.c)
55 
56     Conventions:
57     Bits are numbered in big-endian order, starting with 1: bit #1 is the
58     MSBit, and bit #32 is the LSBit.
59 
60     References:
61     * Andrew D. Booth & Kathleen H. V. Booth: Automatic Digital Calculators, 2nd edition
62     (Buttersworth Scientific Publications, 1956)
63     * Kathleen H. V. Booth: Programming for an Automatic Digital Calculator
64     (Buttersworth Scientific Publications, 1958)
65 */
66 
67 /*
68     Machine code (reference: Booth):
69 
70     Format of a machine instruction:
71 bits:       1-5         6-10        11-15       16-20       21-25       26-31       32
72 field:      X address   X address   Y address   Y address   Function    C6          Vector
73             (track)     (location)  (track)     (location)
74 
75     Meaning of fields:
76     X: address of an operand, or immediate, or meaningless, depending on Function
77         (When X is meaningless, it should be a duplicate of Y.  Maybe this is because
78         X is unintentionnally loaded into the memory address register, and if track # is
79         different, we add unneeded track switch delays (this theory is either wrong or
80         incomplete, since it cannot be true for B or X))
81     Y: address of the next instruction
82     Function: code for the actual instruction executed
83     C6: immediate value used by shift, multiply and store operations
84     Vector: repeat operation 32 times (on all 32 consecutive locations of a track,
85         starting with the location given by the X field)
86 
87     Function code:
88     #   Mnemonic    C6      Description
89 
90     0   Stop
91 
92     2   I(y)                Input.  A 5-bit word is read from tape and loaded
93                             into the 5 MSBits of R.  (These bits of R must be
94                             cleared initially.)
95 
96     4   P(y)                Punch.  The 5 MSBits of R are punched onto the
97                             output tape.
98 
99     6   B<(x)>=(y)          Branch.  If A<0, next instruction is fetched from @x, whereas
100                             if A>=0, next instruction is fetched from @y
101 
102     8   l (y)       n       Shift left: the 64 bits of A and R are rotated left n times.
103          n
104 
105     10  r (y)       64-n    Shift right: the 64 bits of A and R are shifted right n times.
106          n                  The sign bit of A is duplicated.
107 
108     14  X (x)(y)    33-n    Multiply the contents of *track* x by the last n digits of the
109          n                  number in R, sending the 32 MSBs to A and 31 LSBs to R
110 
111     16  +c(x)(y)            A <- (x)
112 
113     18  -c(x)(y)            A <- -(x)
114 
115     20  +(x)(y)             A <- A+(x)
116 
117     22  -(x)(y)             A <- A-(x)
118 
119     24  T(x)(y)             R <- (x)
120 
121     26  R   (x)(y)  32+n    Store first or last bits of R into (x).  The remaining bits of (x)
122          1-n                are unaffected.  "The contents of R are filled with 0s or 1s
123                             according as the original contents were positive or negative".
124         R    (x)(y) n-1
125          n-32
126 
127     28  A   (x)(y)  32+n    Same as 26, except that source is A, and the contents of A are
128          1-n                not modified.
129 
130         A    (x)(y) n-1
131          n-32
132 
133     30  S(x)(y)             Block Head switch.  This enables the block of heads specified
134                             in x to be loaded into the working store.
135 
136     Note: Mnemonics use subscripts (!), which I tried to render the best I could.  Also,
137       ">=" is actually one single character.  Last, "1-n" and "n-32" in store mnemonics
138       are the actual sequences "1 *DASH* <number n>" and "<number n> *DASH* 32"
139       (these are NOT formulas with substract signs).
140 
141     Note2: Short-hand notations: X stands for X  , A for A    , and R for R    .
142                                                32         1-32             1-32
143 
144     Note3: Vectors instruction are notated with a subscript 'v' following the basic
145       mnemonic.  For instance:
146 
147         A (x)(y), + (x)(y)
148          v         v
149 
150       are the vector counterparts of A(x)(y) and +(x)(y).
151 
152 
153 
154 
155     Note that the code has been presented so far as it was in 1957.  It appears that
156     it was somewhat different in 1953 (Booth&Booth):
157 
158     Format of a machine instruction:
159     Format for r, l, A:
160 bits:       1-9         10-15       16-17   18-21       22-30       31-32
161 field:      X address   C6          spare   Function    Y address   spare
162     Format for other instructions:
163 bits:       1-9         10-17       18-21       22-30       31-32
164 field:      X address   D           Function    Y address   D (part 2)
165 
166     Meaning of fields:
167     D (i.e. drum #): MSBs for the address of the X operand.  I don't know whether this feature
168         was actually implemented, since it is said in Booth&Booth that the APE(X)C does
169         not use this feature (it had only one drum of 16 tracks at the time, hence the 9
170         address bits).
171 
172     Function code:
173     #   Mnemonic    C6      Description
174 
175     1   A   (x)(y)  32+n(?) record first bits of A in (x).  The remaining bits of x
176          1-n                are unaffected.
177 
178     2   +c(x)(y)            A <- (x)
179 
180     3   -c(x)(y)            A <- -(x)
181 
182     4   +(x)(y)             A <- A+(x)
183 
184     5   -(x)(y)             A <- A-(x)
185 
186     6   T(x)(y)             R <- (x)
187 
188     7   X (x)(y)            Multiply the contents of (x) by the number in R,
189                             sending the 32 MSBs to A and 31 LSBs to R
190 
191     8   r (y)       64-n(?) Shift right: the 64 bits of A and R are shifted right n times.
192          n                  The sign bit of A is duplicated.
193 
194     9   l (y)       n(?)    Shift left: the 64 bits of A and R are rotated left n times.
195          n
196 
197     10  R   (x)(y)  32+n    record R into (x).
198          1-n                "the contents of R are filled with 0s or 1s
199                             according as the original contents were positive or negative".
200 
201     11  B<(x)>=(y)          Branch.  If A<0, next instruction is read from @x, whereas
202                             if A>=0, next instruction is read from @y
203 
204     12  Print(y)            Punch.  Contents of A are printed.
205 
206     13  C(d+x)              branch ("switch Control") to instruction located in position
207                             (D:X)
208 
209     14  Stop
210 
211     You will notice the absence of input instruction.  It seems that program and data were
212     meant to be entered with a teletyper or a card reader located on the control panel.
213 
214     I don't know whether this computer really was in operation with this code.  Handle
215     these info with caution.
216 */
217 
218 /*
219     memory interface:
220 
221     Data is exchanged on a 1-bit (!) data bus, 10-bit address bus.
222 
223     While the bus is 1-bit wide, read/write operation can only be take place on word
224     (i.e. 32 bit) boundaries.  However, it is possible to store only the n first bits or
225     n last bits of a word, leaving other bits in memory unaffected.
226 
227     The LSBits are transferred first, since this enables to perform bit-per-bit add and
228     substract.  Otherwise, the CPU would need an additional register to store the second
229     operand, and it would be probably slower, since the operation could only
230     take place after all the data has been transferred.
231 
232     Memory operations are synchronous with 2 clocks found on the memory controller:
233     * word clock: a pulse on each word boundary (3750rpm*32 -> 2kHz)
234     * bit clock: a pulse when a bit is present on the bus (word clock * 32 -> 64kHz)
235 
236     CPU operation is synchronous with these clocks, too.  For instance, the AU does bit-per-bit
237     addition and substraction with a memory operand, synchronously with bit clock,
238     starting and stopping on word clock boundaries.  Similar thing with a Fetch operation.
239 
240     There is a 10-bit memory location (i.e. address) register on the memory controller.
241     It is loaded with the contents of X after when instruction fetch is complete, and
242     with the contents of Y when instruction execution is complete, so that the next fetch
243     can be executed correctly.
244 */
245 
246 /*
247     Instruction timings:
248 
249 
250     References: Booth p. 14 for the table below
251 
252 
253     Mnemonic                    delay in word clock cycles
254 
255     I                           32
256 
257     P                           32
258 
259     B                           0
260 
261     l                           1 if n>=32 (i.e. C6>=32) (see 4.)
262      n                          2 if n<32  (i.e. C6<32)
263 
264     r                           1 if n<=32 (i.e. C6>=32) (see 4.)
265      n                          2 if n>32  (i.e. C6<32)
266 
267     X                           32
268 
269     +c, -c, +, -, T             0
270 
271     R    , R    , A   , A       1 (see 1. & 4.)
272       1-n   n-32   1-n   n-32
273 
274     track switch                6 (see 2.)
275 
276     vector                      12 (see 3.)
277 
278 
279     (S and stop are missing in the table)
280 
281 
282     Note that you must add the fetch delay (at least 1 cycle), and, when applicable, the
283     operand read/write delay (at least 1 cycle).
284 
285 
286     Notes:
287 
288     1.  The delay is applied after the store is done (from the analysis of the example
289      in Booth p.52)
290 
291     2.  I guess that the memory controller needs 6 cycles to stabilize whenever track
292       switching occurs, i.e. when X does not refer to the current track, and then when Y
293       does not refer to the same track as X.  This matches various examples in Booth,
294       although it appears that this delay is not applied when X is not read (cf cross-track
295       B in Booth p. 49).
296         However, and here comes the wacky part, analysis of Booth p. 55 shows that
297       no additional delay is caused by an X instruction having its X operand
298       on another track.  Maybe, just maybe, this is related to the fact that X does not
299       need to take the word count into account, any word in track is as good as any (yet,
300       this leaves the question of why this optimization could not be applied to vector
301       operations unanswered).
302 
303     3.  This is an ambiguous statement.  Analysis of Booth p. 55 shows that
304       an instance of an Av instruction with its destination on another track takes no more
305       than 45 cycles, as follow:
306         * 1 cycle for fetch
307         * 6-cycle delay (at most) before write starts (-> track switch)
308         * 32 memory cycles
309         * 6-cycle delay (at most) after write completion (-> track switch)
310       It appears that the delay associated with the vector mode is not distinguishable from
311       the delay caused by track switch and even the delay associated to the Av instruction.
312         Is there really a specific delay associated with the vector mode? To know this, we
313       would need to see a vector instruction on the same track as its operands, which is
314       unlikely to be seen (the only reasonnable application I can see is running a '+_v'
315       to compute the checksum of the current track).
316 
317     4.  Example in Booth p. 76 ("20/4 A (27/27) (21/2)") seems to imply that
318       when doing a store with a destination on a track other than the track where next
319       instruction is located, the 1-cycle post-store delay is merged with the 6-cycle track
320       switch delay.  (I assume this because there is lots of room on track 21, and if
321       the delays were not merged, it should be easy to move the instruction forward
322       to speed up loop execution time.
323         Similarly, example in Booth p. 49-50 ("4/24 l 32 (5/31)") seems to show that
324       a similar delay merge occurs when doing a shift with the next instruction located on
325       another track.
326 */
327 
328 #include "emu.h"
329 #include "apexc.h"
330 #include "apexcdsm.h"
331 #include "debugger.h"
332 
333 
334 DEFINE_DEVICE_TYPE(APEXC, apexc_cpu_device, "apexc_cpu", "APE(X)C")
335 
336 
337 /* decrement ICount by n */
338 #define DELAY(n)    {m_icount -= (n); m_current_word = (m_current_word + (n)) & 0x1f;}
339 
340 
apexc_cpu_device(const machine_config & mconfig,const char * tag,device_t * owner,uint32_t clock)341 apexc_cpu_device::apexc_cpu_device(const machine_config &mconfig, const char *tag, device_t *owner, uint32_t clock)
342 	: cpu_device(mconfig, APEXC, tag, owner, clock)
343 	, m_program_config("program", ENDIANNESS_BIG, 32, 15, 0)
344 	, m_tape_read_cb(*this)
345 	, m_tape_punch_cb(*this)
346 	, m_a(0)
347 	, m_r(0)
348 	, m_cr(0)
349 	, m_ml(0)
350 	, m_working_store(1)
351 	, m_running(0)
352 	, m_pc(0)
353 {
354 }
355 
memory_space_config() const356 device_memory_interface::space_config_vector apexc_cpu_device::memory_space_config() const
357 {
358 	return space_config_vector {
359 		std::make_pair(AS_PROGRAM, &m_program_config)
360 	};
361 }
362 
363 
364 /*
365     word accessor functions
366 
367     take a 10-bit word address
368         5 bits (MSBs): track address within working store
369         5 bits (LSBs): word position within track
370 
371     'special' flag: if true, read first word found in track (used by X instruction only)
372 
373     'mask': one bit is set for each bit to write (used by store instructions)
374 
375     memory latency delays are taken into account, but not track switching delays
376 */
377 
378 /* compute complete word address (i.e. translate a logical track address (expressed
379 in current working store) to an absolute track address) */
effective_address(uint32_t address)380 uint32_t apexc_cpu_device::effective_address(uint32_t address)
381 {
382 	if (address & 0x200)
383 	{
384 		address = (address & 0x1FF) | (m_working_store) << 9;
385 	}
386 
387 	return address;
388 }
389 
390 /* read word */
word_read(uint32_t address,uint32_t special)391 uint32_t apexc_cpu_device::word_read(uint32_t address, uint32_t special)
392 {
393 	uint32_t result;
394 
395 	/* compute absolute track address */
396 	address = effective_address(address);
397 
398 	if (special)
399 	{
400 		/* ignore word position in x - use current position instead */
401 		address = (address & ~ 0x1f) | m_current_word;
402 	}
403 	else
404 	{
405 		/* wait for requested word to appear under the heads */
406 		DELAY(((address /*& 0x1f*/) - m_current_word) & 0x1f);
407 	}
408 
409 	/* read 32 bits */
410 	result = apexc_readmem(address);
411 
412 	/* read takes one memory cycle */
413 	DELAY(1);
414 
415 	return result;
416 }
417 
418 /* write word (or part of a word, according to mask) */
word_write(uint32_t address,uint32_t data,uint32_t mask)419 void apexc_cpu_device::word_write(uint32_t address, uint32_t data, uint32_t mask)
420 {
421 	/* compute absolute track address */
422 	address = effective_address(address);
423 
424 	/* wait for requested word to appear under the heads */
425 	DELAY(((address /*& 0x1f*/) - m_current_word) & 0x1f);
426 
427 	/* write 32 bits according to mask */
428 	apexc_writemem_masked(address, data, mask);
429 
430 	/* write takes one memory cycle (2, actually, but the 2nd cycle is taken into
431 	account in execute) */
432 	DELAY(1);
433 }
434 
435 /*
436     I/O accessors
437 
438     no address is used, these functions just punch or read 5 bits
439 */
440 
papertape_read()441 uint8_t apexc_cpu_device::papertape_read()
442 {
443 	return m_tape_read_cb() & 0x1f;
444 }
445 
papertape_punch(uint8_t data)446 void apexc_cpu_device::papertape_punch(uint8_t data)
447 {
448 	m_tape_punch_cb(data);
449 }
450 
451 /*
452     now for emulation code
453 */
454 
455 /*
456     set the memory location (i.e. address) register, and compute the associated delay
457 */
load_ml(uint32_t address,uint32_t vector)458 uint32_t apexc_cpu_device::load_ml(uint32_t address, uint32_t vector)
459 {
460 	int delay;
461 
462 	/* additional delay appears if we switch tracks */
463 	if (((m_ml & 0x3E0) != (address & 0x3E0)) /*|| vector*/)
464 		delay = 6;  /* if tracks are different, delay to allow for track switching */
465 	else
466 		delay = 0;  /* else, no problem */
467 
468 	m_ml = address; /* save ml */
469 
470 	return delay;
471 }
472 
473 /*
474     execute one instruction
475 
476     TODO:
477     * test!!!
478 
479     NOTE:
480     * I do not know whether we should fetch instructions at the beginning or the end of the
481     instruction cycle.  Either solution is roughly equivalent to the other, but changes
482     the control panel operation (and I know virtually nothing on the control panel).
483     Currently, I fetch each instruction right after executing the previous instruction, so that
484     the user may enter an instruction into the control register with the control panel, then
485     execute it.
486     This solution makes timing simulation much simpler, too.
487 */
execute()488 void apexc_cpu_device::execute()
489 {
490 	int x, y, function, c6, vector; /* instruction fields */
491 	int i = 0;          /* misc counter */
492 	int has_operand;    /* true if instruction is an AU operation with an X operand */
493 	static const char has_operand_table[32] =   /* table for has_operand - one entry for each function code */
494 	{
495 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 		1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0
497 	};
498 	int delay1; /* pre-operand-access delay */
499 	int delay2; /* post-operation delay */
500 	int delay3; /* pre-operand-fetch delay */
501 
502 	/* first isolate the instruction fields */
503 	x = (m_cr >> 22) & 0x3FF;
504 	y = (m_cr >> 12) & 0x3FF;
505 	function = (m_cr >> 7) & 0x1F;
506 	c6 = (m_cr >> 1) & 0x3F;
507 	vector = m_cr & 1;
508 
509 	function &= 0x1E;   /* this is a mere guess - the LSBit is reserved for future additions */
510 
511 	/* determinates if we need to read an operand*/
512 	has_operand = has_operand_table[function];
513 
514 	if (has_operand)
515 	{
516 		/* load ml with X */
517 		delay1 = load_ml(x, vector);
518 		/* burn pre-operand-access delay if needed */
519 		if (delay1)
520 		{
521 			DELAY(delay1);
522 		}
523 	}
524 
525 	delay2 = 0; /* default */
526 
527 	do
528 	{
529 		switch (function)
530 		{
531 		case 0:
532 			/* stop */
533 
534 			m_running = false;
535 
536 			/* BTW, I don't know whether stop loads y into ml or not, and whether
537 			subsequent fetch is done */
538 			break;
539 
540 		case 2:
541 			/* I */
542 			/* I do not know whether the CPU does an OR or whatever, but since docs say that
543 			the 5 bits must be cleared initially, an OR kind of makes sense */
544 			m_r |= papertape_read() << 27;
545 			delay2 = 32;    /* no idea whether this should be counted as an absolute delay
546 			                or as a value in delay2 */
547 			break;
548 
549 		case 4:
550 			/* P */
551 			papertape_punch((m_r >> 27) & 0x1f);
552 			delay2 = 32;    /* no idea whether this should be counted as an absolute delay
553 			                or as a value in delay2 */
554 			break;
555 
556 		case 6:
557 			/* B<(x)>=(y) */
558 			/* I have no idea what we should do if the vector bit is set */
559 			if (m_a & 0x80000000UL)
560 			{
561 				/* load ml with X */
562 				delay1 = load_ml(x, vector);
563 				/* burn pre-fetch delay if needed */
564 				if (delay1)
565 				{
566 					DELAY(delay1);
567 				}
568 				/* and do fetch at X */
569 				goto special_fetch;
570 			}
571 			/* else, the instruction ends with a normal fetch */
572 			break;
573 
574 		case 8:
575 			/* l_n */
576 			delay2 = (c6 & 0x20) ? 1 : 2;   /* if more than 32 shifts, it takes more time */
577 
578 			/* Yes, this code is inefficient, but this must be the way the APEXC does it ;-) */
579 			while (c6 != 0)
580 			{
581 				int shifted_bit = 0;
582 
583 				/* shift and increment c6 */
584 				shifted_bit = m_r & 1;
585 				m_r >>= 1;
586 				if (m_a & 1)
587 					m_r |= 0x80000000UL;
588 				m_a >>= 1;
589 				if (shifted_bit)
590 					m_a |= 0x80000000UL;
591 
592 				c6 = (c6+1) & 0x3f;
593 			}
594 
595 			break;
596 
597 		case 10:
598 			/* r_n */
599 			delay2 = (c6 & 0x20) ? 1 : 2;   /* if more than 32 shifts, it takes more time */
600 
601 			/* Yes, this code is inefficient, but this must be the way the APEXC does it ;-) */
602 			while (c6 != 0)
603 			{
604 				/* shift and increment c6 */
605 				m_r >>= 1;
606 				if (m_a & 1)
607 					m_r |= 0x80000000UL;
608 				m_a = ((int32_t) m_a) >> 1;
609 
610 				c6 = (c6+1) & 0x3f;
611 			}
612 
613 			break;
614 
615 		case 12:
616 			/* unused function code.  I assume this results into a NOP, for lack of any
617 			specific info... */
618 
619 			break;
620 
621 		case 14:
622 			/* X_n(x) */
623 
624 			/* Yes, this code is inefficient, but this must be the way the APEXC does it ;-) */
625 			/* algorithm found in Booth&Booth, p. 45-48 */
626 			{
627 				int shifted_bit;
628 
629 				m_a = 0;
630 				shifted_bit = 0;
631 				while (1)
632 				{
633 					/* note we read word at current word position */
634 					if (shifted_bit && ! (m_r & 1))
635 						m_a += word_read(x, 1);
636 					else if ((! shifted_bit) && (m_r & 1))
637 						m_a -= word_read(x, 1);
638 					else
639 						/* Even if we do not read anything, the loop still takes 1 cycle of
640 						the memory word clock. */
641 						/* Anyway, maybe we still read the data even if we do not use it. */
642 						DELAY(1);
643 
644 					/* exit if c6 reached 32 ("c6 & 0x20" is simpler to implement and
645 					essentially equivalent, so this is most likely the actual implementation) */
646 					if (c6 & 0x20)
647 						break;
648 
649 					/* else increment c6 and  shift */
650 					c6 = (c6+1) & 0x3f;
651 
652 					/* shift */
653 					shifted_bit = m_r & 1;
654 					m_r >>= 1;
655 					if (m_a & 1)
656 						m_r |= 0x80000000UL;
657 					m_a = ((int32_t) m_a) >> 1;
658 				}
659 			}
660 
661 			//DELAY(32);    /* mmmh... we have already counted 32 wait states */
662 			/* actually, if (n < 32) (which is an untypical case), we do not have 32 wait
663 			states.  Question is: do we really have 32 wait states if (n < 32), or is
664 			the timing table incomplete? */
665 			break;
666 
667 		case 16:
668 			/* +c(x) */
669 			m_a = + word_read(m_ml, 0);
670 			break;
671 
672 		case 18:
673 			/* -c(x) */
674 			m_a = - word_read(m_ml, 0);
675 			break;
676 
677 		case 20:
678 			/* +(x) */
679 			m_a += word_read(m_ml, 0);
680 			break;
681 
682 		case 22:
683 			/* -(x) */
684 			m_a -= word_read(m_ml, 0);
685 			break;
686 
687 		case 24:
688 			/* T(x) */
689 			m_r = word_read(m_ml, 0);
690 			break;
691 
692 		case 26:
693 			/* R_(1-n)(x) & R_(n-32)(x) */
694 
695 			{
696 				uint32_t mask;
697 
698 				if (c6 & 0x20)
699 					mask = 0xFFFFFFFFUL << (64 - c6);
700 				else
701 					mask = 0xFFFFFFFFUL >> c6;
702 
703 				word_write(m_ml, m_r, mask);
704 			}
705 
706 			m_r = (m_r & 0x80000000UL) ? 0xFFFFFFFFUL : 0;
707 
708 			delay2 = 1;
709 			break;
710 
711 		case 28:
712 			/* A_(1-n)(x) & A_(n-32)(x) */
713 
714 			{
715 				uint32_t mask;
716 
717 				if (c6 & 0x20)
718 					mask = 0xFFFFFFFFUL << (64 - c6);
719 				else
720 					mask = 0xFFFFFFFFUL >> c6;
721 
722 				word_write(m_ml, m_a, mask);
723 			}
724 
725 			delay2 = 1;
726 			break;
727 
728 		case 30:
729 			/* S(x) */
730 			m_working_store = (x >> 5) & 0xf;   /* or is it (x >> 6)? */
731 			DELAY(32);  /* no idea what the value is...  All I know is that it takes much
732 			            more time than track switching (which takes 6 cycles) */
733 			break;
734 		}
735 		if (vector)
736 			/* increment word position in vector operations */
737 			m_ml = (m_ml & 0x3E0) | ((m_ml + 1) & 0x1F);
738 	} while (vector && has_operand && (++i < 32));  /* iterate 32 times if vector bit is set */
739 													/* the has_operand is a mere guess */
740 
741 	/* load ml with Y */
742 	delay3 = load_ml(y, 0);
743 
744 	/* compute max(delay2, delay3) */
745 	if (delay2 > delay3)
746 		delay3 = delay2;
747 
748 	/* burn pre-fetch delay if needed */
749 	if (delay3)
750 	{
751 		DELAY(delay3);
752 	}
753 
754 	/* entry point after a successful Branch (which alters the normal instruction sequence,
755 	in order not to load ml with Y) */
756 special_fetch:
757 
758 	m_pc = effective_address(m_ml) << 2;
759 
760 	/* fetch current instruction into control register */
761 	m_cr = word_read(m_ml, 0);
762 }
763 
764 
device_start()765 void apexc_cpu_device::device_start()
766 {
767 	m_tape_read_cb.resolve_safe(0);
768 	m_tape_punch_cb.resolve_safe();
769 
770 	m_program = &space(AS_PROGRAM);
771 
772 	save_item(NAME(m_a));
773 	save_item(NAME(m_r));
774 	save_item(NAME(m_cr));
775 	save_item(NAME(m_ml));
776 	save_item(NAME(m_working_store));
777 	save_item(NAME(m_current_word));
778 	save_item(NAME(m_running));
779 	save_item(NAME(m_pc));
780 
781 	state_add( APEXC_CR, "CR", m_cr ).formatstr("%08X");
782 	state_add( APEXC_A, "A", m_a ).formatstr("%08X");
783 	state_add( APEXC_R, "R", m_r ).formatstr("%08X");
784 	state_add( APEXC_ML, "ML", m_ml ).mask(0x3ff).callimport().formatstr("%03X");
785 	state_add( APEXC_WS, "WS", m_working_store ).mask(0xf).callimport();
786 	state_add( APEXC_STATE, "CPU state", m_running ).mask(0x01);
787 	state_add( STATE_GENPC, "PC", m_pc ).mask(0x7ffc).callimport().formatstr("%04X");
788 	state_add( STATE_GENPCBASE, "CURPC", m_pc ).mask(0x7ffc).callimport().noshow();
789 
790 	set_icountptr(m_icount);
791 }
792 
793 
794 //-------------------------------------------------
795 //  state_import - import state into the device,
796 //  after it has been set
797 //-------------------------------------------------
798 
state_import(const device_state_entry & entry)799 void apexc_cpu_device::state_import(const device_state_entry &entry)
800 {
801 	switch (entry.index())
802 	{
803 		case STATE_GENPC:
804 		case STATE_GENPCBASE:
805 			/* keep address 9 LSBits - 10th bit depends on whether we are accessing the permanent
806 			track group or a switchable one */
807 			m_ml = (m_pc >> 2) & 0x1ff;
808 			if ((m_pc >> 2) & 0x1e00)
809 			{
810 				/* we are accessing a switchable track group */
811 				m_ml |= 0x200;  /* set 10th bit */
812 				m_working_store = (m_pc >> 11) & 0xf;
813 			}
814 
815 			/* fetch current instruction into control register */
816 			m_cr = m_program->read_dword(m_pc);
817 			break;
818 
819 		case APEXC_ML:
820 		case APEXC_WS:
821 			m_pc = effective_address(m_ml) << 2;
822 			break;
823 	}
824 }
825 
826 
device_reset()827 void apexc_cpu_device::device_reset()
828 {
829 	/* mmmh...  I don't know what happens on reset with an actual APEXC. */
830 
831 	m_working_store = 1;    /* mere guess */
832 	m_current_word = 0;     /* well, we do have to start somewhere... */
833 
834 	/* next two lines are just the product of my bold fantasy */
835 	m_cr = 0;               /* first instruction executed will be a stop */
836 	m_running = true;       /* this causes the CPU to load the instruction at 0/0,
837 	                           which enables easy booting (just press run on the panel) */
838 	m_a = 0;
839 	m_r = 0;
840 	m_pc = 0;
841 	m_ml = 0;
842 }
843 
844 
execute_run()845 void apexc_cpu_device::execute_run()
846 {
847 	do
848 	{
849 		debugger_instruction_hook(m_pc);
850 
851 		if (m_running)
852 			execute();
853 		else
854 		{
855 			DELAY(m_icount);    /* burn cycles once for all */
856 		}
857 	} while (m_icount > 0);
858 }
859 
create_disassembler()860 std::unique_ptr<util::disasm_interface> apexc_cpu_device::create_disassembler()
861 {
862 	return std::make_unique<apexc_disassembler>();
863 }
864