1 /*
2  *  QEMU model of the Milkymist programmable FPU.
3  *
4  *  Copyright (c) 2010 Michael Walle <michael@walle.cc>
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  *
19  *
20  * Specification available at:
21  *   http://milkymist.walle.cc/socdoc/pfpu.pdf
22  *
23  */
24 
25 #include "qemu/osdep.h"
26 #include "hw/irq.h"
27 #include "hw/sysbus.h"
28 #include "migration/vmstate.h"
29 #include "trace.h"
30 #include "qemu/log.h"
31 #include "qemu/module.h"
32 #include "qemu/error-report.h"
33 #include <math.h>
34 
35 /* #define TRACE_EXEC */
36 
37 #ifdef TRACE_EXEC
38 #    define D_EXEC(x) x
39 #else
40 #    define D_EXEC(x)
41 #endif
42 
43 enum {
44     R_CTL = 0,
45     R_MESHBASE,
46     R_HMESHLAST,
47     R_VMESHLAST,
48     R_CODEPAGE,
49     R_VERTICES,
50     R_COLLISIONS,
51     R_STRAYWRITES,
52     R_LASTDMA,
53     R_PC,
54     R_DREGBASE,
55     R_CODEBASE,
56     R_MAX
57 };
58 
59 enum {
60     CTL_START_BUSY = (1<<0),
61 };
62 
63 enum {
64     OP_NOP = 0,
65     OP_FADD,
66     OP_FSUB,
67     OP_FMUL,
68     OP_FABS,
69     OP_F2I,
70     OP_I2F,
71     OP_VECTOUT,
72     OP_SIN,
73     OP_COS,
74     OP_ABOVE,
75     OP_EQUAL,
76     OP_COPY,
77     OP_IF,
78     OP_TSIGN,
79     OP_QUAKE,
80 };
81 
82 enum {
83     GPR_X = 0,
84     GPR_Y = 1,
85     GPR_FLAGS = 2,
86 };
87 
88 enum {
89     LATENCY_FADD = 5,
90     LATENCY_FSUB = 5,
91     LATENCY_FMUL = 7,
92     LATENCY_FABS = 2,
93     LATENCY_F2I = 2,
94     LATENCY_I2F = 3,
95     LATENCY_VECTOUT = 0,
96     LATENCY_SIN = 4,
97     LATENCY_COS = 4,
98     LATENCY_ABOVE = 2,
99     LATENCY_EQUAL = 2,
100     LATENCY_COPY = 2,
101     LATENCY_IF = 2,
102     LATENCY_TSIGN = 2,
103     LATENCY_QUAKE = 2,
104     MAX_LATENCY = 7
105 };
106 
107 #define GPR_BEGIN       0x100
108 #define GPR_END         0x17f
109 #define MICROCODE_BEGIN 0x200
110 #define MICROCODE_END   0x3ff
111 #define MICROCODE_WORDS 2048
112 
113 #define REINTERPRET_CAST(type, val) (*((type *)&(val)))
114 
115 #ifdef TRACE_EXEC
116 static const char *opcode_to_str[] = {
117     "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
118     "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
119 };
120 #endif
121 
122 #define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
123 #define MILKYMIST_PFPU(obj) \
124     OBJECT_CHECK(MilkymistPFPUState, (obj), TYPE_MILKYMIST_PFPU)
125 
126 struct MilkymistPFPUState {
127     SysBusDevice parent_obj;
128 
129     MemoryRegion regs_region;
130     Chardev *chr;
131     qemu_irq irq;
132 
133     uint32_t regs[R_MAX];
134     uint32_t gp_regs[128];
135     uint32_t microcode[MICROCODE_WORDS];
136 
137     int output_queue_pos;
138     uint32_t output_queue[MAX_LATENCY];
139 };
140 typedef struct MilkymistPFPUState MilkymistPFPUState;
141 
142 static inline uint32_t
get_dma_address(uint32_t base,uint32_t x,uint32_t y)143 get_dma_address(uint32_t base, uint32_t x, uint32_t y)
144 {
145     return base + 8 * (128 * y + x);
146 }
147 
148 static inline void
output_queue_insert(MilkymistPFPUState * s,uint32_t val,int pos)149 output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
150 {
151     s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
152 }
153 
154 static inline uint32_t
output_queue_remove(MilkymistPFPUState * s)155 output_queue_remove(MilkymistPFPUState *s)
156 {
157     return s->output_queue[s->output_queue_pos];
158 }
159 
160 static inline void
output_queue_advance(MilkymistPFPUState * s)161 output_queue_advance(MilkymistPFPUState *s)
162 {
163     s->output_queue[s->output_queue_pos] = 0;
164     s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
165 }
166 
pfpu_decode_insn(MilkymistPFPUState * s)167 static int pfpu_decode_insn(MilkymistPFPUState *s)
168 {
169     uint32_t pc = s->regs[R_PC];
170     uint32_t insn = s->microcode[pc];
171     uint32_t reg_a = (insn >> 18) & 0x7f;
172     uint32_t reg_b = (insn >> 11) & 0x7f;
173     uint32_t op = (insn >> 7) & 0xf;
174     uint32_t reg_d = insn & 0x7f;
175     uint32_t r = 0;
176     int latency = 0;
177 
178     switch (op) {
179     case OP_NOP:
180         break;
181     case OP_FADD:
182     {
183         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
184         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
185         float t = a + b;
186         r = REINTERPRET_CAST(uint32_t, t);
187         latency = LATENCY_FADD;
188         D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
189     } break;
190     case OP_FSUB:
191     {
192         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
193         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
194         float t = a - b;
195         r = REINTERPRET_CAST(uint32_t, t);
196         latency = LATENCY_FSUB;
197         D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
198     } break;
199     case OP_FMUL:
200     {
201         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
202         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
203         float t = a * b;
204         r = REINTERPRET_CAST(uint32_t, t);
205         latency = LATENCY_FMUL;
206         D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
207     } break;
208     case OP_FABS:
209     {
210         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
211         float t = fabsf(a);
212         r = REINTERPRET_CAST(uint32_t, t);
213         latency = LATENCY_FABS;
214         D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
215     } break;
216     case OP_F2I:
217     {
218         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
219         int32_t t = a;
220         r = REINTERPRET_CAST(uint32_t, t);
221         latency = LATENCY_F2I;
222         D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
223     } break;
224     case OP_I2F:
225     {
226         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
227         float t = a;
228         r = REINTERPRET_CAST(uint32_t, t);
229         latency = LATENCY_I2F;
230         D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
231     } break;
232     case OP_VECTOUT:
233     {
234         uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
235         uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
236         hwaddr dma_ptr =
237             get_dma_address(s->regs[R_MESHBASE],
238                     s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
239         cpu_physical_memory_write(dma_ptr, &a, 4);
240         cpu_physical_memory_write(dma_ptr + 4, &b, 4);
241         s->regs[R_LASTDMA] = dma_ptr + 4;
242         D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
243         trace_milkymist_pfpu_vectout(a, b, dma_ptr);
244     } break;
245     case OP_SIN:
246     {
247         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
248         float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
249         r = REINTERPRET_CAST(uint32_t, t);
250         latency = LATENCY_SIN;
251         D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
252     } break;
253     case OP_COS:
254     {
255         int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
256         float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
257         r = REINTERPRET_CAST(uint32_t, t);
258         latency = LATENCY_COS;
259         D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
260     } break;
261     case OP_ABOVE:
262     {
263         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
264         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
265         float t = (a > b) ? 1.0f : 0.0f;
266         r = REINTERPRET_CAST(uint32_t, t);
267         latency = LATENCY_ABOVE;
268         D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
269     } break;
270     case OP_EQUAL:
271     {
272         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
273         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
274         float t = (a == b) ? 1.0f : 0.0f;
275         r = REINTERPRET_CAST(uint32_t, t);
276         latency = LATENCY_EQUAL;
277         D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
278     } break;
279     case OP_COPY:
280     {
281         r = s->gp_regs[reg_a];
282         latency = LATENCY_COPY;
283         D_EXEC(qemu_log("COPY"));
284     } break;
285     case OP_IF:
286     {
287         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
288         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
289         uint32_t f = s->gp_regs[GPR_FLAGS];
290         float t = (f != 0) ? a : b;
291         r = REINTERPRET_CAST(uint32_t, t);
292         latency = LATENCY_IF;
293         D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
294     } break;
295     case OP_TSIGN:
296     {
297         float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
298         float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
299         float t = (b < 0) ? -a : a;
300         r = REINTERPRET_CAST(uint32_t, t);
301         latency = LATENCY_TSIGN;
302         D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
303     } break;
304     case OP_QUAKE:
305     {
306         uint32_t a = s->gp_regs[reg_a];
307         r = 0x5f3759df - (a >> 1);
308         latency = LATENCY_QUAKE;
309         D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
310     } break;
311 
312     default:
313         error_report("milkymist_pfpu: unknown opcode %d", op);
314         break;
315     }
316 
317     if (!reg_d) {
318         D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
319                     s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
320                     s->regs[R_PC] + latency));
321     } else {
322         D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
323                     s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
324                     s->regs[R_PC] + latency, reg_d));
325     }
326 
327     if (op == OP_VECTOUT) {
328         return 0;
329     }
330 
331     /* store output for this cycle */
332     if (reg_d) {
333         uint32_t val = output_queue_remove(s);
334         D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
335         s->gp_regs[reg_d] = val;
336     }
337 
338     output_queue_advance(s);
339 
340     /* store op output */
341     if (op != OP_NOP) {
342         output_queue_insert(s, r, latency-1);
343     }
344 
345     /* advance PC */
346     s->regs[R_PC]++;
347 
348     return 1;
349 };
350 
pfpu_start(MilkymistPFPUState * s)351 static void pfpu_start(MilkymistPFPUState *s)
352 {
353     int x, y;
354     int i;
355 
356     for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
357         for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
358             D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
359 
360             /* set current position */
361             s->gp_regs[GPR_X] = x;
362             s->gp_regs[GPR_Y] = y;
363 
364             /* run microcode on this position */
365             i = 0;
366             while (pfpu_decode_insn(s)) {
367                 /* decode at most MICROCODE_WORDS instructions */
368                 if (++i >= MICROCODE_WORDS) {
369                     error_report("milkymist_pfpu: too many instructions "
370                             "executed in microcode. No VECTOUT?");
371                     break;
372                 }
373             }
374 
375             /* reset pc for next run */
376             s->regs[R_PC] = 0;
377         }
378     }
379 
380     s->regs[R_VERTICES] = x * y;
381 
382     trace_milkymist_pfpu_pulse_irq();
383     qemu_irq_pulse(s->irq);
384 }
385 
get_microcode_address(MilkymistPFPUState * s,uint32_t addr)386 static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
387 {
388     return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
389 }
390 
pfpu_read(void * opaque,hwaddr addr,unsigned size)391 static uint64_t pfpu_read(void *opaque, hwaddr addr,
392                           unsigned size)
393 {
394     MilkymistPFPUState *s = opaque;
395     uint32_t r = 0;
396 
397     addr >>= 2;
398     switch (addr) {
399     case R_CTL:
400     case R_MESHBASE:
401     case R_HMESHLAST:
402     case R_VMESHLAST:
403     case R_CODEPAGE:
404     case R_VERTICES:
405     case R_COLLISIONS:
406     case R_STRAYWRITES:
407     case R_LASTDMA:
408     case R_PC:
409     case R_DREGBASE:
410     case R_CODEBASE:
411         r = s->regs[addr];
412         break;
413     case GPR_BEGIN ... GPR_END:
414         r = s->gp_regs[addr - GPR_BEGIN];
415         break;
416     case MICROCODE_BEGIN ...  MICROCODE_END:
417         r = s->microcode[get_microcode_address(s, addr)];
418         break;
419 
420     default:
421         error_report("milkymist_pfpu: read access to unknown register 0x"
422                 TARGET_FMT_plx, addr << 2);
423         break;
424     }
425 
426     trace_milkymist_pfpu_memory_read(addr << 2, r);
427 
428     return r;
429 }
430 
pfpu_write(void * opaque,hwaddr addr,uint64_t value,unsigned size)431 static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
432                        unsigned size)
433 {
434     MilkymistPFPUState *s = opaque;
435 
436     trace_milkymist_pfpu_memory_write(addr, value);
437 
438     addr >>= 2;
439     switch (addr) {
440     case R_CTL:
441         if (value & CTL_START_BUSY) {
442             pfpu_start(s);
443         }
444         break;
445     case R_MESHBASE:
446     case R_HMESHLAST:
447     case R_VMESHLAST:
448     case R_CODEPAGE:
449     case R_VERTICES:
450     case R_COLLISIONS:
451     case R_STRAYWRITES:
452     case R_LASTDMA:
453     case R_PC:
454     case R_DREGBASE:
455     case R_CODEBASE:
456         s->regs[addr] = value;
457         break;
458     case GPR_BEGIN ...  GPR_END:
459         s->gp_regs[addr - GPR_BEGIN] = value;
460         break;
461     case MICROCODE_BEGIN ...  MICROCODE_END:
462         s->microcode[get_microcode_address(s, addr)] = value;
463         break;
464 
465     default:
466         error_report("milkymist_pfpu: write access to unknown register 0x"
467                 TARGET_FMT_plx, addr << 2);
468         break;
469     }
470 }
471 
472 static const MemoryRegionOps pfpu_mmio_ops = {
473     .read = pfpu_read,
474     .write = pfpu_write,
475     .valid = {
476         .min_access_size = 4,
477         .max_access_size = 4,
478     },
479     .endianness = DEVICE_NATIVE_ENDIAN,
480 };
481 
milkymist_pfpu_reset(DeviceState * d)482 static void milkymist_pfpu_reset(DeviceState *d)
483 {
484     MilkymistPFPUState *s = MILKYMIST_PFPU(d);
485     int i;
486 
487     for (i = 0; i < R_MAX; i++) {
488         s->regs[i] = 0;
489     }
490     for (i = 0; i < 128; i++) {
491         s->gp_regs[i] = 0;
492     }
493     for (i = 0; i < MICROCODE_WORDS; i++) {
494         s->microcode[i] = 0;
495     }
496     s->output_queue_pos = 0;
497     for (i = 0; i < MAX_LATENCY; i++) {
498         s->output_queue[i] = 0;
499     }
500 }
501 
milkymist_pfpu_realize(DeviceState * dev,Error ** errp)502 static void milkymist_pfpu_realize(DeviceState *dev, Error **errp)
503 {
504     MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
505     SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
506 
507     sysbus_init_irq(sbd, &s->irq);
508 
509     memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
510             "milkymist-pfpu", MICROCODE_END * 4);
511     sysbus_init_mmio(sbd, &s->regs_region);
512 }
513 
514 static const VMStateDescription vmstate_milkymist_pfpu = {
515     .name = "milkymist-pfpu",
516     .version_id = 1,
517     .minimum_version_id = 1,
518     .fields = (VMStateField[]) {
519         VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
520         VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
521         VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
522         VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
523         VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
524         VMSTATE_END_OF_LIST()
525     }
526 };
527 
milkymist_pfpu_class_init(ObjectClass * klass,void * data)528 static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
529 {
530     DeviceClass *dc = DEVICE_CLASS(klass);
531 
532     dc->realize = milkymist_pfpu_realize;
533     dc->reset = milkymist_pfpu_reset;
534     dc->vmsd = &vmstate_milkymist_pfpu;
535 }
536 
537 static const TypeInfo milkymist_pfpu_info = {
538     .name          = TYPE_MILKYMIST_PFPU,
539     .parent        = TYPE_SYS_BUS_DEVICE,
540     .instance_size = sizeof(MilkymistPFPUState),
541     .class_init    = milkymist_pfpu_class_init,
542 };
543 
milkymist_pfpu_register_types(void)544 static void milkymist_pfpu_register_types(void)
545 {
546     type_register_static(&milkymist_pfpu_info);
547 }
548 
549 type_init(milkymist_pfpu_register_types)
550