1 /*
2 * Copyright (c) 2001-2020 Stephen Williams (steve@icarus.com)
3 *
4 * This source code is free software; you can redistribute it
5 * and/or modify it in source code form under the terms of the GNU
6 * General Public License as published by the Free Software
7 * Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 */
19
20 # include "config.h"
21 # include "vthread.h"
22 # include "codes.h"
23 # include "schedule.h"
24 # include "ufunc.h"
25 # include "event.h"
26 # include "vpi_priv.h"
27 # include "vvp_net_sig.h"
28 # include "vvp_cobject.h"
29 # include "vvp_darray.h"
30 # include "class_type.h"
31 #ifdef CHECK_WITH_VALGRIND
32 # include "vvp_cleanup.h"
33 #endif
34 # include <set>
35 # include <typeinfo>
36 # include <vector>
37 # include <cstdlib>
38 # include <climits>
39 # include <cstring>
40 # include <cmath>
41 # include <cassert>
42
43 # include <iostream>
44 # include <sstream>
45 # include <cstdio>
46
47 using namespace std;
48
49 /* This is the size of an unsigned long in bits. This is just a
50 convenience macro. */
51 # define CPU_WORD_BITS (8*sizeof(unsigned long))
52 # define TOP_BIT (1UL << (CPU_WORD_BITS-1))
53
54 /*
55 * This vthread_s structure describes all there is to know about a
56 * thread, including its program counter, all the private bits it
57 * holds, and its place in other lists.
58 *
59 *
60 * ** Notes On The Interactions of %fork/%join/%end:
61 *
62 * The %fork instruction creates a new thread and pushes that into a
63 * set of children for the thread. This new thread, then, becomes a
64 * child of the current thread, and the current thread a parent of the
65 * new thread. Any child can be reaped by a %join.
66 *
67 * Children that are detached with %join/detach need to have a different
68 * parent/child relationship since the parent can still effect them if
69 * it uses the %disable/fork or %wait/fork opcodes. The i_am_detached
70 * flag and detached_children set are used for this relationship.
71 *
72 * It is a programming error for a thread that created threads to not
73 * %join (or %join/detach) as many as it created before it %ends. The
74 * children set will get messed up otherwise.
75 *
76 * the i_am_joining flag is a clue to children that the parent is
77 * blocked in a %join and may need to be scheduled. The %end
78 * instruction will check this flag in the parent to see if it should
79 * notify the parent that something is interesting.
80 *
81 * The i_have_ended flag, on the other hand, is used by threads to
82 * tell their parents that they are already dead. A thread that
83 * executes %end will set its own i_have_ended flag and let its parent
84 * reap it when the parent does the %join. If a thread has its
85 * schedule_parent_on_end flag set already when it %ends, then it
86 * reaps itself and simply schedules its parent. If a child has its
87 * i_have_ended flag set when a thread executes %join, then it is free
88 * to reap the child immediately.
89 */
90
91 struct vthread_s {
92 vthread_s();
93
94 void debug_dump(ostream&fd, const char*label_text);
95
96 /* This is the program counter. */
97 vvp_code_t pc;
98 /* These hold the private thread bits. */
99 enum { FLAGS_COUNT = 512, WORDS_COUNT = 16 };
100 vvp_bit4_t flags[FLAGS_COUNT];
101
102 /* These are the word registers. */
103 union {
104 int64_t w_int;
105 uint64_t w_uint;
106 } words[WORDS_COUNT];
107
108 // These vectors are depths within the parent thread's
109 // corresponding stack. This is how the %ret/* instructions
110 // get at parent thread arguments.
111 vector<unsigned> args_real;
112 vector<unsigned> args_str;
113 vector<unsigned> args_vec4;
114
115 private:
116 vector<vvp_vector4_t>stack_vec4_;
117 public:
pop_vec4vthread_s118 inline vvp_vector4_t pop_vec4(void)
119 {
120 assert(! stack_vec4_.empty());
121 vvp_vector4_t val = stack_vec4_.back();
122 stack_vec4_.pop_back();
123 return val;
124 }
push_vec4vthread_s125 inline void push_vec4(const vvp_vector4_t&val)
126 {
127 stack_vec4_.push_back(val);
128 }
peek_vec4vthread_s129 inline const vvp_vector4_t& peek_vec4(unsigned depth)
130 {
131 unsigned size = stack_vec4_.size();
132 assert(depth < size);
133 unsigned use_index = size-1-depth;
134 return stack_vec4_[use_index];
135 }
peek_vec4vthread_s136 inline vvp_vector4_t& peek_vec4(void)
137 {
138 unsigned use_index = stack_vec4_.size();
139 assert(use_index >= 1);
140 return stack_vec4_[use_index-1];
141 }
poke_vec4vthread_s142 inline void poke_vec4(unsigned depth, const vvp_vector4_t&val)
143 {
144 assert(depth < stack_vec4_.size());
145 unsigned use_index = stack_vec4_.size()-1-depth;
146 stack_vec4_[use_index] = val;
147 }
pop_vec4vthread_s148 inline void pop_vec4(unsigned cnt)
149 {
150 while (cnt > 0) {
151 stack_vec4_.pop_back();
152 cnt -= 1;
153 }
154 }
155
156
157 private:
158 vector<double> stack_real_;
159 public:
pop_realvthread_s160 inline double pop_real(void)
161 {
162 assert(! stack_real_.empty());
163 double val = stack_real_.back();
164 stack_real_.pop_back();
165 return val;
166 }
push_realvthread_s167 inline void push_real(double val)
168 {
169 stack_real_.push_back(val);
170 }
peek_realvthread_s171 inline double peek_real(unsigned depth)
172 {
173 assert(depth < stack_real_.size());
174 unsigned use_index = stack_real_.size()-1-depth;
175 return stack_real_[use_index];
176 }
poke_realvthread_s177 inline void poke_real(unsigned depth, double val)
178 {
179 assert(depth < stack_real_.size());
180 unsigned use_index = stack_real_.size()-1-depth;
181 stack_real_[use_index] = val;
182 }
pop_realvthread_s183 inline void pop_real(unsigned cnt)
184 {
185 while (cnt > 0) {
186 stack_real_.pop_back();
187 cnt -= 1;
188 }
189 }
190
191 /* Strings are operated on using a forth-like operator
192 set. Items at the top of the stack (back()) are the objects
193 operated on except for special cases. New objects are
194 pushed onto the top (back()) and pulled from the top
195 (back()) only. */
196 private:
197 vector<string> stack_str_;
198 public:
pop_strvthread_s199 inline string pop_str(void)
200 {
201 assert(! stack_str_.empty());
202 string val = stack_str_.back();
203 stack_str_.pop_back();
204 return val;
205 }
push_strvthread_s206 inline void push_str(const string&val)
207 {
208 stack_str_.push_back(val);
209 }
peek_strvthread_s210 inline string&peek_str(unsigned depth)
211 {
212 assert(depth<stack_str_.size());
213 unsigned use_index = stack_str_.size()-1-depth;
214 return stack_str_[use_index];
215 }
poke_strvthread_s216 inline void poke_str(unsigned depth, const string&val)
217 {
218 assert(depth < stack_str_.size());
219 unsigned use_index = stack_str_.size()-1-depth;
220 stack_str_[use_index] = val;
221 }
pop_strvthread_s222 inline void pop_str(unsigned cnt)
223 {
224 while (cnt > 0) {
225 stack_str_.pop_back();
226 cnt -= 1;
227 }
228 }
229
230 /* Objects are also operated on in a stack. */
231 private:
232 enum { STACK_OBJ_MAX_SIZE = 32 };
233 vvp_object_t stack_obj_[STACK_OBJ_MAX_SIZE];
234 unsigned stack_obj_size_;
235 public:
peek_objectvthread_s236 inline vvp_object_t& peek_object(void)
237 {
238 assert(stack_obj_size_ > 0);
239 return stack_obj_[stack_obj_size_-1];
240 }
pop_objectvthread_s241 inline void pop_object(vvp_object_t&obj)
242 {
243 assert(stack_obj_size_ > 0);
244 stack_obj_size_ -= 1;
245 obj = stack_obj_[stack_obj_size_];
246 stack_obj_[stack_obj_size_].reset(0);
247 }
pop_objectvthread_s248 inline void pop_object(unsigned cnt, unsigned skip =0)
249 {
250 assert((cnt+skip) <= stack_obj_size_);
251 for (size_t idx = stack_obj_size_-skip-cnt ; idx < stack_obj_size_-skip ; idx += 1)
252 stack_obj_[idx].reset(0);
253 stack_obj_size_ -= cnt;
254 for (size_t idx = stack_obj_size_-skip ; idx < stack_obj_size_ ; idx += 1)
255 stack_obj_[idx] = stack_obj_[idx+skip];
256 for (size_t idx = stack_obj_size_ ; idx < stack_obj_size_+skip ; idx += 1)
257 stack_obj_[idx].reset(0);
258 }
push_objectvthread_s259 inline void push_object(const vvp_object_t&obj)
260 {
261 assert(stack_obj_size_ < STACK_OBJ_MAX_SIZE);
262 stack_obj_[stack_obj_size_] = obj;
263 stack_obj_size_ += 1;
264 }
265
266 /* My parent sets this when it wants me to wake it up. */
267 unsigned i_am_joining :1;
268 unsigned i_am_detached :1;
269 unsigned i_am_waiting :1;
270 unsigned i_am_in_function :1; // True if running function code
271 unsigned i_have_ended :1;
272 unsigned i_was_disabled :1;
273 unsigned waiting_for_event :1;
274 unsigned is_scheduled :1;
275 unsigned delay_delete :1;
276 /* This points to the children of the thread. */
277 set<struct vthread_s*>children;
278 /* This points to the detached children of the thread. */
279 set<struct vthread_s*>detached_children;
280 /* This points to my parent, if I have one. */
281 struct vthread_s*parent;
282 /* This points to the containing scope. */
283 __vpiScope*parent_scope;
284 /* This is used for keeping wait queues. */
285 struct vthread_s*wait_next;
286 /* These are used to access automatically allocated items. */
287 vvp_context_t wt_context, rd_context;
288 /* These are used to pass non-blocking event control information. */
289 vvp_net_t*event;
290 uint64_t ecount;
291 /* Save the file/line information when available. */
292 private:
293 char *filenm_;
294 unsigned lineno_;
295 public:
296 void set_fileline(char *filenm, unsigned lineno);
297 string get_fileline();
298
cleanupvthread_s299 inline void cleanup()
300 {
301 if (i_was_disabled) {
302 stack_vec4_.clear();
303 stack_real_.clear();
304 stack_str_.clear();
305 pop_object(stack_obj_size_);
306 }
307 free(filenm_);
308 filenm_ = 0;
309 assert(stack_vec4_.empty());
310 assert(stack_real_.empty());
311 assert(stack_str_.empty());
312 assert(stack_obj_size_ == 0);
313 }
314 };
315
vthread_s()316 inline vthread_s::vthread_s()
317 {
318 stack_obj_size_ = 0;
319 filenm_ = 0;
320 lineno_ = 0;
321 }
322
set_fileline(char * filenm,unsigned lineno)323 void vthread_s::set_fileline(char *filenm, unsigned lineno)
324 {
325 assert(filenm);
326 if (!filenm_ || (strcmp(filenm_, filenm) != 0)) {
327 free(filenm_);
328 filenm_ = strdup(filenm);
329 }
330 lineno_ = lineno;
331 }
332
get_fileline()333 inline string vthread_s::get_fileline()
334 {
335 ostringstream buf;
336 if (filenm_) {
337 buf << filenm_ << ":" << lineno_ << ": ";
338 }
339 string res = buf.str();
340 return res;
341 }
342
debug_dump(ostream & fd,const char * label)343 void vthread_s::debug_dump(ostream&fd, const char*label)
344 {
345 fd << "**** " << label << endl;
346 fd << "**** ThreadId: " << this << ", parent id: " << parent << endl;
347
348 fd << "**** Flags: ";
349 for (int idx = 0 ; idx < FLAGS_COUNT ; idx += 1)
350 fd << flags[idx];
351 fd << endl;
352 fd << "**** vec4 stack..." << endl;
353 for (size_t idx = stack_vec4_.size() ; idx > 0 ; idx -= 1)
354 fd << " " << (stack_vec4_.size()-idx) << ": " << stack_vec4_[idx-1] << endl;
355 fd << "**** str stack (" << stack_str_.size() << ")..." << endl;
356 fd << "**** obj stack (" << stack_obj_size_ << ")..." << endl;
357 fd << "**** args_vec4 array (" << args_vec4.size() << ")..." << endl;
358 for (size_t idx = 0 ; idx < args_vec4.size() ; idx += 1)
359 fd << " " << idx << ": " << args_vec4[idx] << endl;
360 fd << "**** file/line (";
361 if (filenm_) fd << filenm_;
362 else fd << "<no file name>";
363 fd << ":" << lineno_ << ")" << endl;
364 fd << "**** Done ****" << endl;
365 }
366
367 static void do_join(vthread_t thr, vthread_t child);
368
vthread_scope(struct vthread_s * thr)369 __vpiScope* vthread_scope(struct vthread_s*thr)
370 {
371 return thr->parent_scope;
372 }
373
374 struct vthread_s*running_thread = 0;
375
get_fileline()376 string get_fileline()
377 {
378 return running_thread->get_fileline();
379 }
380
vthread_push(struct vthread_s * thr,double val)381 void vthread_push(struct vthread_s*thr, double val)
382 {
383 thr->push_real(val);
384 }
385
vthread_push(struct vthread_s * thr,const string & val)386 void vthread_push(struct vthread_s*thr, const string&val)
387 {
388 thr->push_str(val);
389 }
390
vthread_push(struct vthread_s * thr,const vvp_vector4_t & val)391 void vthread_push(struct vthread_s*thr, const vvp_vector4_t&val)
392 {
393 thr->push_vec4(val);
394 }
395
vthread_pop_real(struct vthread_s * thr,unsigned depth)396 void vthread_pop_real(struct vthread_s*thr, unsigned depth)
397 {
398 thr->pop_real(depth);
399 }
400
vthread_pop_str(struct vthread_s * thr,unsigned depth)401 void vthread_pop_str(struct vthread_s*thr, unsigned depth)
402 {
403 thr->pop_str(depth);
404 }
405
vthread_pop_vec4(struct vthread_s * thr,unsigned depth)406 void vthread_pop_vec4(struct vthread_s*thr, unsigned depth)
407 {
408 thr->pop_vec4(depth);
409 }
410
vthread_get_real_stack(struct vthread_s * thr,unsigned depth)411 double vthread_get_real_stack(struct vthread_s*thr, unsigned depth)
412 {
413 return thr->peek_real(depth);
414 }
415
vthread_get_str_stack(struct vthread_s * thr,unsigned depth)416 const string&vthread_get_str_stack(struct vthread_s*thr, unsigned depth)
417 {
418 return thr->peek_str(depth);
419 }
420
vthread_get_vec4_stack(struct vthread_s * thr,unsigned depth)421 const vvp_vector4_t& vthread_get_vec4_stack(struct vthread_s*thr, unsigned depth)
422 {
423 return thr->peek_vec4(depth);
424 }
425
426 /*
427 * Some thread management functions
428 */
429 /*
430 * This is a function to get a vvp_queue handle from the variable
431 * referenced by "net". If the queue is nil, then allocated it and
432 * assign the value to the net. Note that this function is
433 * parameterized by the queue type so that we can create the right
434 * derived type of queue object.
435 */
get_queue_object(vthread_t thr,vvp_net_t * net)436 template <class VVP_QUEUE> static vvp_queue*get_queue_object(vthread_t thr, vvp_net_t*net)
437 {
438 vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
439 assert(obj);
440
441 vvp_queue*queue = obj->get_object().peek<vvp_queue>();
442 if (queue == 0) {
443 assert(obj->get_object().test_nil());
444 queue = new VVP_QUEUE;
445 vvp_object_t val (queue);
446 vvp_net_ptr_t ptr (net, 0);
447 vvp_send_object(ptr, val, thr->wt_context);
448 }
449
450 return queue;
451 }
452
453 /*
454 * The following are used to allow a common template to be written for
455 * queue real/string/vec4 operations
456 */
pop_value(vthread_t thr,double & value,unsigned)457 inline static void pop_value(vthread_t thr, double&value, unsigned)
458 {
459 value = thr->pop_real();
460 }
461
pop_value(vthread_t thr,string & value,unsigned)462 inline static void pop_value(vthread_t thr, string&value, unsigned)
463 {
464 value = thr->pop_str();
465 }
466
pop_value(vthread_t thr,vvp_vector4_t & value,unsigned wid)467 inline static void pop_value(vthread_t thr, vvp_vector4_t&value, unsigned wid)
468 {
469 value = thr->pop_vec4();
470 assert(value.size() == wid);
471 }
472
473 /*
474 * The following are used to allow the queue templates to print correctly.
475 */
get_queue_type(double &)476 inline static string get_queue_type(double&)
477 {
478 return "queue<real>";
479 }
480
get_queue_type(string &)481 inline static string get_queue_type(string&)
482 {
483 return "queue<string>";
484 }
485
get_queue_type(vvp_vector4_t value)486 inline static string get_queue_type(vvp_vector4_t value)
487 {
488 ostringstream buf;
489 buf << "queue<vector[" << value.size() << "]>";
490 string res = buf.str();
491 return res;
492 }
493
print_queue_value(double value)494 inline static void print_queue_value(double value)
495 {
496 cerr << value;
497 }
498
print_queue_value(string value)499 inline static void print_queue_value(string value)
500 {
501 cerr << "\"" << value << "\"";
502 }
503
print_queue_value(vvp_vector4_t value)504 inline static void print_queue_value(vvp_vector4_t value)
505 {
506 cerr << value;
507 }
508
509 /*
510 * The following are used to get a darray/queue default value.
511 */
dq_default(double & value,unsigned)512 inline static void dq_default(double&value, unsigned)
513 {
514 value = 0.0;
515 }
516
dq_default(string & value,unsigned)517 inline static void dq_default(string&value, unsigned)
518 {
519 value = "";
520 }
521
dq_default(vvp_vector4_t & value,unsigned wid)522 inline static void dq_default(vvp_vector4_t&value, unsigned wid)
523 {
524 value = vvp_vector4_t(wid);
525 }
526
527
coerce_to_width(const T & that,unsigned width)528 template <class T> T coerce_to_width(const T&that, unsigned width)
529 {
530 if (that.size() == width)
531 return that;
532
533 assert(that.size() > width);
534 T res (width);
535 for (unsigned idx = 0 ; idx < width ; idx += 1)
536 res.set_bit(idx, that.value(idx));
537
538 return res;
539 }
540
541 /* Explicitly define the vvp_vector4_t version of coerce_to_width(). */
542 template vvp_vector4_t coerce_to_width(const vvp_vector4_t&that,
543 unsigned width);
544
545
multiply_array_imm(unsigned long * res,unsigned long * val,unsigned words,unsigned long imm)546 static void multiply_array_imm(unsigned long*res, unsigned long*val,
547 unsigned words, unsigned long imm)
548 {
549 for (unsigned idx = 0 ; idx < words ; idx += 1)
550 res[idx] = 0;
551
552 for (unsigned mul_idx = 0 ; mul_idx < words ; mul_idx += 1) {
553 unsigned long sum;
554 unsigned long tmp = multiply_with_carry(val[mul_idx], imm, sum);
555
556 unsigned long carry = 0;
557 res[mul_idx] = add_with_carry(res[mul_idx], tmp, carry);
558 for (unsigned add_idx = mul_idx+1 ; add_idx < words ; add_idx += 1) {
559 res[add_idx] = add_with_carry(res[add_idx], sum, carry);
560 sum = 0;
561 }
562 }
563 }
564
565 /*
566 * Allocate a context for use by a child thread. By preference, use
567 * the last freed context. If none available, create a new one. Add
568 * it to the list of live contexts in that scope.
569 */
vthread_alloc_context(__vpiScope * scope)570 static vvp_context_t vthread_alloc_context(__vpiScope*scope)
571 {
572 assert(scope->is_automatic());
573
574 vvp_context_t context = scope->free_contexts;
575 if (context) {
576 scope->free_contexts = vvp_get_next_context(context);
577 for (unsigned idx = 0 ; idx < scope->nitem ; idx += 1) {
578 scope->item[idx]->reset_instance(context);
579 }
580 } else {
581 context = vvp_allocate_context(scope->nitem);
582 for (unsigned idx = 0 ; idx < scope->nitem ; idx += 1) {
583 scope->item[idx]->alloc_instance(context);
584 }
585 }
586
587 vvp_set_next_context(context, scope->live_contexts);
588 scope->live_contexts = context;
589
590 return context;
591 }
592
593 /*
594 * Free a context previously allocated to a child thread by pushing it
595 * onto the freed context stack. Remove it from the list of live contexts
596 * in that scope.
597 */
vthread_free_context(vvp_context_t context,__vpiScope * scope)598 static void vthread_free_context(vvp_context_t context, __vpiScope*scope)
599 {
600 assert(scope->is_automatic());
601 assert(context);
602
603 if (context == scope->live_contexts) {
604 scope->live_contexts = vvp_get_next_context(context);
605 } else {
606 vvp_context_t tmp = scope->live_contexts;
607 while (context != vvp_get_next_context(tmp)) {
608 assert(tmp);
609 tmp = vvp_get_next_context(tmp);
610 }
611 vvp_set_next_context(tmp, vvp_get_next_context(context));
612 }
613
614 vvp_set_next_context(context, scope->free_contexts);
615 scope->free_contexts = context;
616 }
617
618 #ifdef CHECK_WITH_VALGRIND
contexts_delete(struct __vpiScope * scope)619 void contexts_delete(struct __vpiScope*scope)
620 {
621 vvp_context_t context = scope->free_contexts;
622
623 while (context) {
624 scope->free_contexts = vvp_get_next_context(context);
625 for (unsigned idx = 0; idx < scope->nitem; idx += 1) {
626 scope->item[idx]->free_instance(context);
627 }
628 free(context);
629 context = scope->free_contexts;
630 }
631 free(scope->item);
632 }
633 #endif
634
635 /*
636 * Create a new thread with the given start address.
637 */
vthread_new(vvp_code_t pc,__vpiScope * scope)638 vthread_t vthread_new(vvp_code_t pc, __vpiScope*scope)
639 {
640 vthread_t thr = new struct vthread_s;
641 thr->pc = pc;
642 //thr->bits4 = vvp_vector4_t(32);
643 thr->parent = 0;
644 thr->parent_scope = scope;
645 thr->wait_next = 0;
646 thr->wt_context = 0;
647 thr->rd_context = 0;
648
649 thr->i_am_joining = 0;
650 thr->i_am_detached = 0;
651 thr->i_am_waiting = 0;
652 thr->i_am_in_function = 0;
653 thr->is_scheduled = 0;
654 thr->i_have_ended = 0;
655 thr->i_was_disabled = 0;
656 thr->delay_delete = 0;
657 thr->waiting_for_event = 0;
658 thr->event = 0;
659 thr->ecount = 0;
660
661 thr->flags[0] = BIT4_0;
662 thr->flags[1] = BIT4_1;
663 thr->flags[2] = BIT4_X;
664 thr->flags[3] = BIT4_Z;
665 for (int idx = 4 ; idx < 8 ; idx += 1)
666 thr->flags[idx] = BIT4_X;
667
668 scope->threads .insert(thr);
669 return thr;
670 }
671
672 #ifdef CHECK_WITH_VALGRIND
673 #if 0
674 /*
675 * These are not currently correct. If you use them you will get
676 * double delete messages. There is still a leak related to a
677 * waiting event that needs to be investigated.
678 */
679
680 static void wait_next_delete(vthread_t base)
681 {
682 while (base) {
683 vthread_t tmp = base->wait_next;
684 delete base;
685 base = tmp;
686 if (base->waiting_for_event == 0) break;
687 }
688 }
689
690 static void child_delete(vthread_t base)
691 {
692 while (base) {
693 vthread_t tmp = base->child;
694 delete base;
695 base = tmp;
696 }
697 }
698 #endif
699
vthreads_delete(struct __vpiScope * scope)700 void vthreads_delete(struct __vpiScope*scope)
701 {
702 for (std::set<vthread_t>::iterator cur = scope->threads.begin()
703 ; cur != scope->threads.end() ; ++ cur ) {
704 delete *cur;
705 }
706 scope->threads.clear();
707 }
708 #endif
709
710 /*
711 * Reaping pulls the thread out of the stack of threads. If I have a
712 * child, then hand it over to my parent or fully detach it.
713 */
vthread_reap(vthread_t thr)714 static void vthread_reap(vthread_t thr)
715 {
716 if (! thr->children.empty()) {
717 for (set<vthread_t>::iterator cur = thr->children.begin()
718 ; cur != thr->children.end() ; ++cur) {
719 vthread_t child = *cur;
720 assert(child);
721 assert(child->parent == thr);
722 child->parent = thr->parent;
723 }
724 }
725 if (! thr->detached_children.empty()) {
726 for (set<vthread_t>::iterator cur = thr->detached_children.begin()
727 ; cur != thr->detached_children.end() ; ++cur) {
728 vthread_t child = *cur;
729 assert(child);
730 assert(child->parent == thr);
731 assert(child->i_am_detached);
732 child->parent = 0;
733 child->i_am_detached = 0;
734 }
735 }
736 if (thr->parent) {
737 /* assert that the given element was removed. */
738 if (thr->i_am_detached) {
739 size_t res = thr->parent->detached_children.erase(thr);
740 assert(res == 1);
741 } else {
742 size_t res = thr->parent->children.erase(thr);
743 assert(res == 1);
744 }
745 }
746
747 thr->parent = 0;
748
749 // Remove myself from the containing scope if needed.
750 thr->parent_scope->threads.erase(thr);
751
752 thr->pc = codespace_null();
753
754 /* If this thread is not scheduled, then is it safe to delete
755 it now. Otherwise, let the schedule event (which will
756 execute the thread at of_ZOMBIE) delete the object. */
757 if ((thr->is_scheduled == 0) && (thr->waiting_for_event == 0)) {
758 assert(thr->children.empty());
759 assert(thr->wait_next == 0);
760 if (thr->delay_delete)
761 schedule_del_thr(thr);
762 else
763 vthread_delete(thr);
764 }
765 }
766
vthread_delete(vthread_t thr)767 void vthread_delete(vthread_t thr)
768 {
769 thr->cleanup();
770 delete thr;
771 }
772
vthread_mark_scheduled(vthread_t thr)773 void vthread_mark_scheduled(vthread_t thr)
774 {
775 while (thr != 0) {
776 assert(thr->is_scheduled == 0);
777 thr->is_scheduled = 1;
778 thr = thr->wait_next;
779 }
780 }
781
vthread_delay_delete()782 void vthread_delay_delete()
783 {
784 if (running_thread)
785 running_thread->delay_delete = 1;
786 }
787
788 /*
789 * This function runs each thread by fetching an instruction,
790 * incrementing the PC, and executing the instruction. The thread may
791 * be the head of a list, so each thread is run so far as possible.
792 */
vthread_run(vthread_t thr)793 void vthread_run(vthread_t thr)
794 {
795 while (thr != 0) {
796 vthread_t tmp = thr->wait_next;
797 thr->wait_next = 0;
798
799 assert(thr->is_scheduled);
800 thr->is_scheduled = 0;
801
802 running_thread = thr;
803
804 for (;;) {
805 vvp_code_t cp = thr->pc;
806 thr->pc += 1;
807
808 /* Run the opcode implementation. If the execution of
809 the opcode returns false, then the thread is meant to
810 be paused, so break out of the loop. */
811 bool rc = (cp->opcode)(thr, cp);
812 if (rc == false)
813 break;
814 }
815
816 thr = tmp;
817 }
818 running_thread = 0;
819 }
820
821 /*
822 * The CHUNK_LINK instruction is a special next pointer for linking
823 * chunks of code space. It's like a simplified %jmp.
824 */
of_CHUNK_LINK(vthread_t thr,vvp_code_t code)825 bool of_CHUNK_LINK(vthread_t thr, vvp_code_t code)
826 {
827 assert(code->cptr);
828 thr->pc = code->cptr;
829 return true;
830 }
831
832 /*
833 * This is called by an event functor to wake up all the threads on
834 * its list. I in fact created that list in the %wait instruction, and
835 * I also am certain that the waiting_for_event flag is set.
836 */
vthread_schedule_list(vthread_t thr)837 void vthread_schedule_list(vthread_t thr)
838 {
839 for (vthread_t cur = thr ; cur ; cur = cur->wait_next) {
840 assert(cur->waiting_for_event);
841 cur->waiting_for_event = 0;
842 }
843
844 schedule_vthread(thr, 0);
845 }
846
vthread_get_wt_context()847 vvp_context_t vthread_get_wt_context()
848 {
849 if (running_thread)
850 return running_thread->wt_context;
851 else
852 return 0;
853 }
854
vthread_get_rd_context()855 vvp_context_t vthread_get_rd_context()
856 {
857 if (running_thread)
858 return running_thread->rd_context;
859 else
860 return 0;
861 }
862
vthread_get_wt_context_item(unsigned context_idx)863 vvp_context_item_t vthread_get_wt_context_item(unsigned context_idx)
864 {
865 assert(running_thread && running_thread->wt_context);
866 return vvp_get_context_item(running_thread->wt_context, context_idx);
867 }
868
vthread_get_rd_context_item(unsigned context_idx)869 vvp_context_item_t vthread_get_rd_context_item(unsigned context_idx)
870 {
871 assert(running_thread && running_thread->rd_context);
872 return vvp_get_context_item(running_thread->rd_context, context_idx);
873 }
874
875 /*
876 * %abs/wr
877 */
of_ABS_WR(vthread_t thr,vvp_code_t)878 bool of_ABS_WR(vthread_t thr, vvp_code_t)
879 {
880 thr->push_real( fabs(thr->pop_real()) );
881 return true;
882 }
883
of_ALLOC(vthread_t thr,vvp_code_t cp)884 bool of_ALLOC(vthread_t thr, vvp_code_t cp)
885 {
886 /* Allocate a context. */
887 vvp_context_t child_context = vthread_alloc_context(cp->scope);
888
889 /* Push the allocated context onto the write context stack. */
890 vvp_set_stacked_context(child_context, thr->wt_context);
891 thr->wt_context = child_context;
892
893 return true;
894 }
895
of_AND(vthread_t thr,vvp_code_t)896 bool of_AND(vthread_t thr, vvp_code_t)
897 {
898 vvp_vector4_t valb = thr->pop_vec4();
899 vvp_vector4_t&vala = thr->peek_vec4();
900 assert(vala.size() == valb.size());
901 vala &= valb;
902 return true;
903 }
904
905 /*
906 * This function must ALWAYS be called with the val set to the right
907 * size, and initialized with BIT4_0 bits. Certain optimizations rely
908 * on that.
909 */
get_immediate_rval(vvp_code_t cp,vvp_vector4_t & val)910 static void get_immediate_rval(vvp_code_t cp, vvp_vector4_t&val)
911 {
912 uint32_t vala = cp->bit_idx[0];
913 uint32_t valb = cp->bit_idx[1];
914 unsigned wid = cp->number;
915
916 if (valb == 0) {
917 // Special case: if the value is zero, we are done
918 // before we start.
919 if (vala == 0) return;
920
921 // Special case: The value has no X/Z bits, so we can
922 // use the setarray method to write the value all at once.
923 unsigned use_wid = 8*sizeof(unsigned long);
924 if (wid < use_wid)
925 use_wid = wid;
926 unsigned long tmp[1];
927 tmp[0] = vala;
928 val.setarray(0, use_wid, tmp);
929 return;
930 }
931
932 // The immediate value can be values bigger then 32 bits, but
933 // only if the high bits are zero. So at most we need to run
934 // through the loop below 32 times. Maybe less, if the target
935 // width is less. We don't have to do anything special on that
936 // because vala/valb bits will shift away so (vala|valb) will
937 // turn to zero at or before 32 shifts.
938
939 for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
940 uint32_t ba = 0;
941 // Convert the vala/valb bits to a ba number that
942 // matches the encoding of the vvp_bit4_t enumeration.
943 ba = (valb & 1) << 1;
944 ba |= vala & 1;
945
946 // Note that the val is already pre-filled with BIT4_0
947 // bits, os we only need to set non-zero bit values.
948 if (ba) val.set_bit(idx, (vvp_bit4_t)ba);
949
950 vala >>= 1;
951 valb >>= 1;
952 }
953 }
954
955 /*
956 * %add
957 *
958 * Pop r,
959 * Pop l,
960 * Push l+r
961 *
962 * Pop 2 and push 1 is the same as pop 1 and replace the remaining top
963 * of the stack with a new value. That is what we will do.
964 */
of_ADD(vthread_t thr,vvp_code_t)965 bool of_ADD(vthread_t thr, vvp_code_t)
966 {
967 vvp_vector4_t r = thr->pop_vec4();
968 // Rather then pop l, use it directly from the stack. When we
969 // assign to 'l', that will edit the top of the stack, which
970 // replaces a pop and a pull.
971 vvp_vector4_t&l = thr->peek_vec4();
972
973 l.add(r);
974
975 return true;
976 }
977
978 /*
979 * %addi <vala>, <valb>, <wid>
980 *
981 * Pop1 operand, get the other operand from the arguments, and push
982 * the result.
983 */
of_ADDI(vthread_t thr,vvp_code_t cp)984 bool of_ADDI(vthread_t thr, vvp_code_t cp)
985 {
986 unsigned wid = cp->number;
987
988 vvp_vector4_t&l = thr->peek_vec4();
989
990 // I expect that most of the bits of an immediate value are
991 // going to be zero, so start the result vector with all zero
992 // bits. Then we only need to replace the bits that are different.
993 vvp_vector4_t r (wid, BIT4_0);
994 get_immediate_rval (cp, r);
995
996 l.add(r);
997
998 return true;
999 }
1000
1001 /*
1002 * %add/wr
1003 */
of_ADD_WR(vthread_t thr,vvp_code_t)1004 bool of_ADD_WR(vthread_t thr, vvp_code_t)
1005 {
1006 double r = thr->pop_real();
1007 double l = thr->pop_real();
1008 thr->push_real(l + r);
1009 return true;
1010 }
1011
1012 /* %assign/ar <array>, <delay>
1013 * Generate an assignment event to a real array. Index register 3
1014 * contains the canonical address of the word in the memory. <delay>
1015 * is the delay in simulation time. <bit> is the index register
1016 * containing the real value.
1017 */
of_ASSIGN_AR(vthread_t thr,vvp_code_t cp)1018 bool of_ASSIGN_AR(vthread_t thr, vvp_code_t cp)
1019 {
1020 long adr = thr->words[3].w_int;
1021 unsigned delay = cp->bit_idx[0];
1022 double value = thr->pop_real();
1023
1024 if (adr >= 0) {
1025 schedule_assign_array_word(cp->array, adr, value, delay);
1026 }
1027
1028 return true;
1029 }
1030
1031 /* %assign/ar/d <array>, <delay_idx>
1032 * Generate an assignment event to a real array. Index register 3
1033 * contains the canonical address of the word in the memory.
1034 * <delay_idx> is the integer register that contains the delay value.
1035 */
of_ASSIGN_ARD(vthread_t thr,vvp_code_t cp)1036 bool of_ASSIGN_ARD(vthread_t thr, vvp_code_t cp)
1037 {
1038 long adr = thr->words[3].w_int;
1039 vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
1040 double value = thr->pop_real();
1041
1042 if (adr >= 0) {
1043 schedule_assign_array_word(cp->array, adr, value, delay);
1044 }
1045
1046 return true;
1047 }
1048
1049 /* %assign/ar/e <array>
1050 * Generate an assignment event to a real array. Index register 3
1051 * contains the canonical address of the word in the memory. <bit>
1052 * is the index register containing the real value. The event
1053 * information is contained in the thread event control registers
1054 * and is set with %evctl.
1055 */
of_ASSIGN_ARE(vthread_t thr,vvp_code_t cp)1056 bool of_ASSIGN_ARE(vthread_t thr, vvp_code_t cp)
1057 {
1058 long adr = thr->words[3].w_int;
1059 double value = thr->pop_real();
1060
1061 if (adr >= 0) {
1062 if (thr->ecount == 0) {
1063 schedule_assign_array_word(cp->array, adr, value, 0);
1064 } else {
1065 schedule_evctl(cp->array, adr, value, thr->event,
1066 thr->ecount);
1067 }
1068 }
1069
1070 return true;
1071 }
1072
1073 /*
1074 * %assign/vec4 <var>, <delay>
1075 */
of_ASSIGN_VEC4(vthread_t thr,vvp_code_t cp)1076 bool of_ASSIGN_VEC4(vthread_t thr, vvp_code_t cp)
1077 {
1078 vvp_net_ptr_t ptr (cp->net, 0);
1079 unsigned delay = cp->bit_idx[0];
1080 vvp_vector4_t&val = thr->peek_vec4();
1081
1082 schedule_assign_vector(ptr, 0, 0, val, delay);
1083 thr->pop_vec4(1);
1084 return true;
1085 }
1086
1087 /*
1088 * %assign/vec4/a/d <arr>, <offx>, <delx>
1089 */
of_ASSIGN_VEC4_A_D(vthread_t thr,vvp_code_t cp)1090 bool of_ASSIGN_VEC4_A_D(vthread_t thr, vvp_code_t cp)
1091 {
1092 int off_idx = cp->bit_idx[0];
1093 int del_idx = cp->bit_idx[1];
1094 int adr_idx = 3;
1095
1096 long off = off_idx? thr->words[off_idx].w_int : 0;
1097 vvp_time64_t del = del_idx? thr->words[del_idx].w_uint : 0;
1098 long adr = thr->words[adr_idx].w_int;
1099
1100 vvp_vector4_t val = thr->pop_vec4();
1101 unsigned wid = val.size();
1102 const unsigned array_wid = cp->array->get_word_size();
1103
1104 // Abort if flags[4] is set. This can happen if the calculation
1105 // into an index register failed.
1106 if (thr->flags[4] == BIT4_1)
1107 return true;
1108
1109 if (off >= (long)array_wid)
1110 return true;
1111 if (off < 0) {
1112 if ((unsigned)-off >= array_wid)
1113 return true;
1114
1115 int use_off = -off;
1116 assert(wid > (unsigned)use_off);
1117 unsigned use_wid = wid - use_off;
1118 val = val.subvalue(use_off, use_wid);
1119 off = 0;
1120 wid = use_wid;
1121 }
1122 if (off+wid > array_wid) {
1123 val = val.subvalue(0, array_wid-off);
1124 wid = val.size();
1125 }
1126
1127 schedule_assign_array_word(cp->array, adr, off, val, del);
1128
1129 return true;
1130 }
1131
1132 /*
1133 * %assign/vec4/a/e <arr>, <offx>
1134 */
of_ASSIGN_VEC4_A_E(vthread_t thr,vvp_code_t cp)1135 bool of_ASSIGN_VEC4_A_E(vthread_t thr, vvp_code_t cp)
1136 {
1137 int off_idx = cp->bit_idx[0];
1138 int adr_idx = 3;
1139
1140 long off = off_idx? thr->words[off_idx].w_int : 0;
1141 long adr = thr->words[adr_idx].w_int;
1142
1143 vvp_vector4_t val = thr->pop_vec4();
1144 unsigned wid = val.size();
1145 const unsigned array_wid = cp->array->get_word_size();
1146
1147 // Abort if flags[4] is set. This can happen if the calculation
1148 // into an index register failed.
1149 if (thr->flags[4] == BIT4_1)
1150 return true;
1151
1152 if (off >= (long)array_wid)
1153 return true;
1154 if (off < 0) {
1155 if ((unsigned)-off >= array_wid)
1156 return true;
1157
1158 int use_off = -off;
1159 assert(wid > (unsigned)use_off);
1160 unsigned use_wid = wid - use_off;
1161 val = val.subvalue(use_off, use_wid);
1162 off = 0;
1163 wid = use_wid;
1164 }
1165 if (off+wid > array_wid) {
1166 val = val.subvalue(0, array_wid-off);
1167 wid = val.size();
1168 }
1169
1170 if (thr->ecount == 0) {
1171 schedule_assign_array_word(cp->array, adr, off, val, 0);
1172 } else {
1173 schedule_evctl(cp->array, adr, val, off, thr->event, thr->ecount);
1174 }
1175
1176 return true;
1177 }
1178
1179 /*
1180 * %assign/vec4/off/d <var>, <off>, <del>
1181 */
of_ASSIGN_VEC4_OFF_D(vthread_t thr,vvp_code_t cp)1182 bool of_ASSIGN_VEC4_OFF_D(vthread_t thr, vvp_code_t cp)
1183 {
1184 vvp_net_ptr_t ptr (cp->net, 0);
1185 unsigned off_index = cp->bit_idx[0];
1186 unsigned del_index = cp->bit_idx[1];
1187 vvp_vector4_t val = thr->pop_vec4();
1188 unsigned wid = val.size();
1189
1190 int off = thr->words[off_index].w_int;
1191 vvp_time64_t del = thr->words[del_index].w_uint;
1192
1193 // Abort if flags[4] is set. This can happen if the calculation
1194 // into an index register failed.
1195 if (thr->flags[4] == BIT4_1)
1196 return true;
1197
1198 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
1199 assert(sig);
1200
1201 if (off >= (long)sig->value_size())
1202 return true;
1203 if (off < 0) {
1204 if ((unsigned)-off >= wid)
1205 return true;
1206
1207 int use_off = -off;
1208 assert(wid > (unsigned)use_off);
1209 unsigned use_wid = wid - use_off;
1210 val = val.subvalue(use_off, use_wid);
1211 off = 0;
1212 wid = use_wid;
1213 }
1214 if (off+wid > sig->value_size()) {
1215 val = val.subvalue(0, sig->value_size()-off);
1216 wid = val.size();
1217 }
1218
1219 schedule_assign_vector(ptr, off, sig->value_size(), val, del);
1220 return true;
1221 }
1222
1223 /*
1224 * %assign/vec4/off/e <var>, <off>
1225 */
of_ASSIGN_VEC4_OFF_E(vthread_t thr,vvp_code_t cp)1226 bool of_ASSIGN_VEC4_OFF_E(vthread_t thr, vvp_code_t cp)
1227 {
1228 vvp_net_ptr_t ptr (cp->net, 0);
1229 unsigned off_index = cp->bit_idx[0];
1230 vvp_vector4_t val = thr->pop_vec4();
1231 unsigned wid = val.size();
1232
1233 int off = thr->words[off_index].w_int;
1234
1235 // Abort if flags[4] is set. This can happen if the calculation
1236 // into an index register failed.
1237 if (thr->flags[4] == BIT4_1)
1238 return true;
1239
1240 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
1241 assert(sig);
1242
1243 if (off >= (long)sig->value_size())
1244 return true;
1245 if (off < 0) {
1246 if ((unsigned)-off >= wid)
1247 return true;
1248
1249 int use_off = -off;
1250 assert((int)wid > use_off);
1251 unsigned use_wid = wid - use_off;
1252 val = val.subvalue(use_off, use_wid);
1253 off = 0;
1254 wid = use_wid;
1255 }
1256 if (off+wid > sig->value_size()) {
1257 val = val.subvalue(0, sig->value_size()-off);
1258 wid = val.size();
1259 }
1260
1261 if (thr->ecount == 0) {
1262 schedule_assign_vector(ptr, off, sig->value_size(), val, 0);
1263 } else {
1264 schedule_evctl(ptr, val, off, sig->value_size(), thr->event, thr->ecount);
1265 }
1266
1267 return true;
1268 }
1269
1270 /*
1271 * %assign/vec4/d <var-label> <delay>
1272 */
of_ASSIGN_VEC4D(vthread_t thr,vvp_code_t cp)1273 bool of_ASSIGN_VEC4D(vthread_t thr, vvp_code_t cp)
1274 {
1275 vvp_net_ptr_t ptr (cp->net, 0);
1276 unsigned del_index = cp->bit_idx[0];
1277 vvp_time64_t del = thr->words[del_index].w_int;
1278
1279 vvp_vector4_t value = thr->pop_vec4();
1280
1281 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
1282 assert(sig);
1283
1284 schedule_assign_vector(ptr, 0, sig->value_size(), value, del);
1285
1286 return true;
1287 }
1288
1289 /*
1290 * %assign/vec4/e <var-label>
1291 */
of_ASSIGN_VEC4E(vthread_t thr,vvp_code_t cp)1292 bool of_ASSIGN_VEC4E(vthread_t thr, vvp_code_t cp)
1293 {
1294 vvp_net_ptr_t ptr (cp->net, 0);
1295 vvp_vector4_t value = thr->pop_vec4();
1296
1297 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
1298 assert(sig);
1299
1300 if (thr->ecount == 0) {
1301 schedule_assign_vector(ptr, 0, sig->value_size(), value, 0);
1302 } else {
1303 schedule_evctl(ptr, value, 0, sig->value_size(), thr->event, thr->ecount);
1304 }
1305
1306 thr->event = 0;
1307 thr->ecount = 0;
1308 return true;
1309 }
1310
1311 /*
1312 * This is %assign/wr <vpi-label>, <delay>
1313 *
1314 * This assigns (after a delay) a value to a real variable. Use the
1315 * vpi_put_value function to do the assign, with the delay written
1316 * into the vpiInertialDelay carrying the desired delay.
1317 */
of_ASSIGN_WR(vthread_t thr,vvp_code_t cp)1318 bool of_ASSIGN_WR(vthread_t thr, vvp_code_t cp)
1319 {
1320 unsigned delay = cp->bit_idx[0];
1321 double value = thr->pop_real();
1322 s_vpi_time del;
1323
1324 del.type = vpiSimTime;
1325 vpip_time_to_timestruct(&del, delay);
1326
1327 __vpiHandle*tmp = cp->handle;
1328
1329 t_vpi_value val;
1330 val.format = vpiRealVal;
1331 val.value.real = value;
1332 vpi_put_value(tmp, &val, &del, vpiTransportDelay);
1333
1334 return true;
1335 }
1336
of_ASSIGN_WRD(vthread_t thr,vvp_code_t cp)1337 bool of_ASSIGN_WRD(vthread_t thr, vvp_code_t cp)
1338 {
1339 vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
1340 double value = thr->pop_real();
1341 s_vpi_time del;
1342
1343 del.type = vpiSimTime;
1344 vpip_time_to_timestruct(&del, delay);
1345
1346 __vpiHandle*tmp = cp->handle;
1347
1348 t_vpi_value val;
1349 val.format = vpiRealVal;
1350 val.value.real = value;
1351 vpi_put_value(tmp, &val, &del, vpiTransportDelay);
1352
1353 return true;
1354 }
1355
of_ASSIGN_WRE(vthread_t thr,vvp_code_t cp)1356 bool of_ASSIGN_WRE(vthread_t thr, vvp_code_t cp)
1357 {
1358 assert(thr->event != 0);
1359 double value = thr->pop_real();
1360 __vpiHandle*tmp = cp->handle;
1361
1362 // If the count is zero then just put the value.
1363 if (thr->ecount == 0) {
1364 t_vpi_value val;
1365
1366 val.format = vpiRealVal;
1367 val.value.real = value;
1368 vpi_put_value(tmp, &val, 0, vpiNoDelay);
1369 } else {
1370 schedule_evctl(tmp, value, thr->event, thr->ecount);
1371 }
1372
1373 thr->event = 0;
1374 thr->ecount = 0;
1375
1376 return true;
1377 }
1378
of_BLEND(vthread_t thr,vvp_code_t)1379 bool of_BLEND(vthread_t thr, vvp_code_t)
1380 {
1381 vvp_vector4_t vala = thr->pop_vec4();
1382 vvp_vector4_t valb = thr->pop_vec4();
1383 assert(vala.size() == valb.size());
1384
1385 for (unsigned idx = 0 ; idx < vala.size() ; idx += 1) {
1386 if (vala.value(idx) == valb.value(idx))
1387 continue;
1388
1389 vala.set_bit(idx, BIT4_X);
1390 }
1391
1392 thr->push_vec4(vala);
1393 return true;
1394 }
1395
of_BLEND_WR(vthread_t thr,vvp_code_t)1396 bool of_BLEND_WR(vthread_t thr, vvp_code_t)
1397 {
1398 double f = thr->pop_real();
1399 double t = thr->pop_real();
1400 thr->push_real((t == f) ? t : 0.0);
1401 return true;
1402 }
1403
of_BREAKPOINT(vthread_t,vvp_code_t)1404 bool of_BREAKPOINT(vthread_t, vvp_code_t)
1405 {
1406 return true;
1407 }
1408
1409 /*
1410 * %callf/void <code-label>, <scope-label>
1411 * Combine the %fork and %join steps for invoking a function.
1412 */
do_callf_void(vthread_t thr,vthread_t child)1413 static bool do_callf_void(vthread_t thr, vthread_t child)
1414 {
1415
1416 if (child->parent_scope->is_automatic()) {
1417 /* The context allocated for this child is the top entry
1418 on the write context stack */
1419 child->wt_context = thr->wt_context;
1420 child->rd_context = thr->wt_context;
1421 }
1422
1423 // Mark the function thread as a direct child of the current thread.
1424 child->parent = thr;
1425 thr->children.insert(child);
1426 // This should be the only child
1427 assert(thr->children.size()==1);
1428
1429 // Execute the function. This SHOULD run the function to completion,
1430 // but there are some exceptional situations where it won't.
1431 assert(child->parent_scope->get_type_code() == vpiFunction);
1432 child->is_scheduled = 1;
1433 child->i_am_in_function = 1;
1434 vthread_run(child);
1435 running_thread = thr;
1436
1437 if (child->i_have_ended) {
1438 do_join(thr, child);
1439 return true;
1440 } else {
1441 thr->i_am_joining = 1;
1442 return false;
1443 }
1444 }
1445
of_CALLF_OBJ(vthread_t thr,vvp_code_t cp)1446 bool of_CALLF_OBJ(vthread_t thr, vvp_code_t cp)
1447 {
1448 vthread_t child = vthread_new(cp->cptr2, cp->scope);
1449 return do_callf_void(thr, child);
1450
1451 // XXXX NOT IMPLEMENTED
1452 }
1453
of_CALLF_REAL(vthread_t thr,vvp_code_t cp)1454 bool of_CALLF_REAL(vthread_t thr, vvp_code_t cp)
1455 {
1456 vthread_t child = vthread_new(cp->cptr2, cp->scope);
1457
1458 // This is the return value. Push a place-holder value. The function
1459 // will replace this with the actual value using a %ret/real instruction.
1460 thr->push_real(0.0);
1461 child->args_real.push_back(0);
1462
1463 return do_callf_void(thr, child);
1464 }
1465
of_CALLF_STR(vthread_t thr,vvp_code_t cp)1466 bool of_CALLF_STR(vthread_t thr, vvp_code_t cp)
1467 {
1468 vthread_t child = vthread_new(cp->cptr2, cp->scope);
1469
1470 thr->push_str("");
1471 child->args_str.push_back(0);
1472
1473 return do_callf_void(thr, child);
1474 }
1475
of_CALLF_VEC4(vthread_t thr,vvp_code_t cp)1476 bool of_CALLF_VEC4(vthread_t thr, vvp_code_t cp)
1477 {
1478 vthread_t child = vthread_new(cp->cptr2, cp->scope);
1479
1480 vpiScopeFunction*scope_func = dynamic_cast<vpiScopeFunction*>(cp->scope);
1481 assert(scope_func);
1482
1483 // This is the return value. Push a place-holder value. The function
1484 // will replace this with the actual value using a %ret/real instruction.
1485 thr->push_vec4(vvp_vector4_t(scope_func->get_func_width(), scope_func->get_func_init_val()));
1486 child->args_vec4.push_back(0);
1487
1488 return do_callf_void(thr, child);
1489 }
1490
of_CALLF_VOID(vthread_t thr,vvp_code_t cp)1491 bool of_CALLF_VOID(vthread_t thr, vvp_code_t cp)
1492 {
1493 vthread_t child = vthread_new(cp->cptr2, cp->scope);
1494 return do_callf_void(thr, child);
1495 }
1496
1497 /*
1498 * The %cassign/link instruction connects a source node to a
1499 * destination node. The destination node must be a signal, as it is
1500 * marked with the source of the cassign so that it may later be
1501 * unlinked without specifically knowing the source that this
1502 * instruction used.
1503 */
of_CASSIGN_LINK(vthread_t,vvp_code_t cp)1504 bool of_CASSIGN_LINK(vthread_t, vvp_code_t cp)
1505 {
1506 vvp_net_t*dst = cp->net;
1507 vvp_net_t*src = cp->net2;
1508
1509 vvp_fun_signal_base*sig
1510 = dynamic_cast<vvp_fun_signal_base*>(dst->fun);
1511 assert(sig);
1512
1513 /* Any previous continuous assign should have been removed already. */
1514 assert(sig->cassign_link == 0);
1515
1516 sig->cassign_link = src;
1517
1518 /* Link the output of the src to the port[1] (the cassign
1519 port) of the destination. */
1520 vvp_net_ptr_t dst_ptr (dst, 1);
1521 src->link(dst_ptr);
1522
1523 return true;
1524 }
1525
1526 /*
1527 * If there is an existing continuous assign linked to the destination
1528 * node, unlink it. This must be done before applying a new continuous
1529 * assign, otherwise the initial assigned value will be propagated to
1530 * any other nodes driven by the old continuous assign source.
1531 */
cassign_unlink(vvp_net_t * dst)1532 static void cassign_unlink(vvp_net_t*dst)
1533 {
1534 vvp_fun_signal_base*sig
1535 = dynamic_cast<vvp_fun_signal_base*>(dst->fun);
1536 assert(sig);
1537
1538 if (sig->cassign_link == 0)
1539 return;
1540
1541 vvp_net_ptr_t tmp (dst, 1);
1542 sig->cassign_link->unlink(tmp);
1543 sig->cassign_link = 0;
1544 }
1545
1546 /*
1547 * The %cassign/v instruction invokes a continuous assign of a
1548 * constant value to a signal. The instruction arguments are:
1549 *
1550 * %cassign/vec4 <net>;
1551 *
1552 * Where the <net> is the net label assembled into a vvp_net pointer,
1553 * and the <base> and <wid> are stashed in the bit_idx array.
1554 *
1555 * This instruction writes vvp_vector4_t values to port-1 of the
1556 * target signal.
1557 */
of_CASSIGN_VEC4(vthread_t thr,vvp_code_t cp)1558 bool of_CASSIGN_VEC4(vthread_t thr, vvp_code_t cp)
1559 {
1560 vvp_net_t*net = cp->net;
1561 vvp_vector4_t value = thr->pop_vec4();
1562
1563 /* Remove any previous continuous assign to this net. */
1564 cassign_unlink(net);
1565
1566 /* Set the value into port 1 of the destination. */
1567 vvp_net_ptr_t ptr (net, 1);
1568 vvp_send_vec4(ptr, value, 0);
1569
1570 return true;
1571 }
1572
1573 /*
1574 * %cassign/vec4/off <var>, <off>
1575 */
of_CASSIGN_VEC4_OFF(vthread_t thr,vvp_code_t cp)1576 bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t cp)
1577 {
1578 vvp_net_t*net = cp->net;
1579 unsigned base_idx = cp->bit_idx[0];
1580 long base = thr->words[base_idx].w_int;
1581 vvp_vector4_t value = thr->pop_vec4();
1582 unsigned wid = value.size();
1583
1584 if (thr->flags[4] == BIT4_1)
1585 return true;
1586
1587 /* Remove any previous continuous assign to this net. */
1588 cassign_unlink(net);
1589
1590 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (net->fil);
1591 assert(sig);
1592
1593 if (base < 0 && (wid <= (unsigned)-base))
1594 return true;
1595
1596 if (base >= (long)sig->value_size())
1597 return true;
1598
1599 if (base < 0) {
1600 wid -= (unsigned) -base;
1601 base = 0;
1602 value.resize(wid);
1603 }
1604
1605 if (base+wid > sig->value_size()) {
1606 wid = sig->value_size() - base;
1607 value.resize(wid);
1608 }
1609
1610 vvp_net_ptr_t ptr (net, 1);
1611 vvp_send_vec4_pv(ptr, value, base, wid, sig->value_size(), 0);
1612 return true;
1613 }
1614
of_CASSIGN_WR(vthread_t thr,vvp_code_t cp)1615 bool of_CASSIGN_WR(vthread_t thr, vvp_code_t cp)
1616 {
1617 vvp_net_t*net = cp->net;
1618 double value = thr->pop_real();
1619
1620 /* Remove any previous continuous assign to this net. */
1621 cassign_unlink(net);
1622
1623 /* Set the value into port 1 of the destination. */
1624 vvp_net_ptr_t ptr (net, 1);
1625 vvp_send_real(ptr, value, 0);
1626
1627 return true;
1628 }
1629
1630 /*
1631 * %cast2
1632 */
of_CAST2(vthread_t thr,vvp_code_t)1633 bool of_CAST2(vthread_t thr, vvp_code_t)
1634 {
1635 vvp_vector4_t&val = thr->peek_vec4();
1636 unsigned wid = val.size();
1637
1638 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
1639 switch (val.value(idx)) {
1640 case BIT4_0:
1641 case BIT4_1:
1642 break;
1643 default:
1644 val.set_bit(idx, BIT4_0);
1645 break;
1646 }
1647 }
1648
1649 return true;
1650 }
1651
do_cast_vec_dar(vthread_t thr,vvp_code_t cp,bool as_vec4)1652 bool do_cast_vec_dar(vthread_t thr, vvp_code_t cp, bool as_vec4)
1653 {
1654 unsigned wid = cp->number;
1655
1656 vvp_object_t obj;
1657 thr->pop_object(obj);
1658
1659 vvp_darray*darray = obj.peek<vvp_darray>();
1660 assert(darray);
1661
1662 vvp_vector4_t vec = darray->get_bitstream(as_vec4);
1663 if (vec.size() != wid) {
1664 cerr << thr->get_fileline()
1665 << "VVP error: size mismatch when casting dynamic array to vector." << endl;
1666 thr->push_vec4(vvp_vector4_t(wid));
1667 schedule_stop(0);
1668 return false;
1669 }
1670 thr->push_vec4(vec);
1671 return true;
1672 }
1673
1674 /*
1675 * %cast/vec2/dar <wid>
1676 */
of_CAST_VEC2_DAR(vthread_t thr,vvp_code_t cp)1677 bool of_CAST_VEC2_DAR(vthread_t thr, vvp_code_t cp)
1678 {
1679 return do_cast_vec_dar(thr, cp, false);
1680 }
1681
1682 /*
1683 * %cast/vec4/dar <wid>
1684 */
of_CAST_VEC4_DAR(vthread_t thr,vvp_code_t cp)1685 bool of_CAST_VEC4_DAR(vthread_t thr, vvp_code_t cp)
1686 {
1687 return do_cast_vec_dar(thr, cp, true);
1688 }
1689
1690 /*
1691 * %cast/vec4/str <wid>
1692 */
of_CAST_VEC4_STR(vthread_t thr,vvp_code_t cp)1693 bool of_CAST_VEC4_STR(vthread_t thr, vvp_code_t cp)
1694 {
1695 unsigned wid = cp->number;
1696 string str = thr->pop_str();
1697
1698 vvp_vector4_t vec(wid, BIT4_0);
1699
1700 if (wid != 8*str.length()) {
1701 cerr << thr->get_fileline()
1702 << "VVP error: size mismatch when casting string to vector." << endl;
1703 thr->push_vec4(vec);
1704 schedule_stop(0);
1705 return false;
1706 }
1707
1708 unsigned sdx = 0;
1709 unsigned vdx = wid;
1710 while (vdx > 0) {
1711 char ch = str[sdx++];
1712 vdx -= 8;
1713 for (unsigned bdx = 0; bdx < 8; bdx += 1) {
1714 if (ch & 1)
1715 vec.set_bit(vdx+bdx, BIT4_1);
1716 ch >>= 1;
1717 }
1718 }
1719
1720 thr->push_vec4(vec);
1721 return true;
1722 }
1723
do_CMPE(vthread_t thr,const vvp_vector4_t & lval,const vvp_vector4_t & rval)1724 static void do_CMPE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
1725 {
1726 assert(rval.size() == lval.size());
1727
1728 if (lval.has_xz() || rval.has_xz()) {
1729
1730 unsigned wid = lval.size();
1731 vvp_bit4_t eq = BIT4_1;
1732 vvp_bit4_t eeq = BIT4_1;
1733
1734 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
1735 vvp_bit4_t lv = lval.value(idx);
1736 vvp_bit4_t rv = rval.value(idx);
1737
1738 if (lv != rv)
1739 eeq = BIT4_0;
1740
1741 if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
1742 eq = BIT4_X;
1743 if ((lv == BIT4_0) && (rv==BIT4_1))
1744 eq = BIT4_0;
1745 if ((lv == BIT4_1) && (rv==BIT4_0))
1746 eq = BIT4_0;
1747
1748 if (eq == BIT4_0)
1749 break;
1750 }
1751
1752 thr->flags[4] = eq;
1753 thr->flags[6] = eeq;
1754
1755 } else {
1756 // If there are no XZ bits anywhere, then the results of
1757 // == match the === test.
1758 thr->flags[4] = thr->flags[6] = (lval.eeq(rval)? BIT4_1 : BIT4_0);
1759 }
1760 }
1761
1762 /*
1763 * %cmp/e
1764 *
1765 * Pop the operands from the stack, and do not replace them. The
1766 * results are written to flag bits:
1767 *
1768 * 4: eq (equal)
1769 *
1770 * 6: eeq (case equal)
1771 */
of_CMPE(vthread_t thr,vvp_code_t)1772 bool of_CMPE(vthread_t thr, vvp_code_t)
1773 {
1774 // We are going to pop these and push nothing in their
1775 // place, but for now it is more efficient to use a constant
1776 // reference. When we finish, pop the stack without copies.
1777 const vvp_vector4_t&rval = thr->peek_vec4(0);
1778 const vvp_vector4_t&lval = thr->peek_vec4(1);
1779
1780 do_CMPE(thr, lval, rval);
1781
1782 thr->pop_vec4(2);
1783 return true;
1784 }
1785
of_CMPNE(vthread_t thr,vvp_code_t)1786 bool of_CMPNE(vthread_t thr, vvp_code_t)
1787 {
1788 // We are going to pop these and push nothing in their
1789 // place, but for now it is more efficient to use a constant
1790 // reference. When we finish, pop the stack without copies.
1791 const vvp_vector4_t&rval = thr->peek_vec4(0);
1792 const vvp_vector4_t&lval = thr->peek_vec4(1);
1793
1794 do_CMPE(thr, lval, rval);
1795
1796 thr->flags[4] = ~thr->flags[4];
1797 thr->flags[6] = ~thr->flags[6];
1798
1799 thr->pop_vec4(2);
1800 return true;
1801 }
1802
1803 /*
1804 * %cmpi/e <vala>, <valb>, <wid>
1805 *
1806 * Pop1 operand, get the other operand from the arguments.
1807 */
of_CMPIE(vthread_t thr,vvp_code_t cp)1808 bool of_CMPIE(vthread_t thr, vvp_code_t cp)
1809 {
1810 unsigned wid = cp->number;
1811
1812 vvp_vector4_t&lval = thr->peek_vec4();
1813
1814 // I expect that most of the bits of an immediate value are
1815 // going to be zero, so start the result vector with all zero
1816 // bits. Then we only need to replace the bits that are different.
1817 vvp_vector4_t rval (wid, BIT4_0);
1818 get_immediate_rval (cp, rval);
1819
1820 do_CMPE(thr, lval, rval);
1821
1822 thr->pop_vec4(1);
1823 return true;
1824 }
1825
of_CMPINE(vthread_t thr,vvp_code_t cp)1826 bool of_CMPINE(vthread_t thr, vvp_code_t cp)
1827 {
1828 unsigned wid = cp->number;
1829
1830 vvp_vector4_t&lval = thr->peek_vec4();
1831
1832 // I expect that most of the bits of an immediate value are
1833 // going to be zero, so start the result vector with all zero
1834 // bits. Then we only need to replace the bits that are different.
1835 vvp_vector4_t rval (wid, BIT4_0);
1836 get_immediate_rval (cp, rval);
1837
1838 do_CMPE(thr, lval, rval);
1839
1840 thr->flags[4] = ~thr->flags[4];
1841 thr->flags[6] = ~thr->flags[6];
1842
1843 thr->pop_vec4(1);
1844 return true;
1845 }
1846
1847
1848
do_CMPS(vthread_t thr,const vvp_vector4_t & lval,const vvp_vector4_t & rval)1849 static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
1850 {
1851 assert(rval.size() == lval.size());
1852
1853 // If either value has XZ bits, then the eq and lt values are
1854 // known already to be X. Just calculate the eeq result as a
1855 // special case and short circuit the rest of the compare.
1856 if (lval.has_xz() || rval.has_xz()) {
1857 thr->flags[4] = BIT4_X; // eq
1858 thr->flags[5] = BIT4_X; // lt
1859 thr->flags[6] = lval.eeq(rval)? BIT4_1 : BIT4_0;
1860 return;
1861 }
1862
1863 // Past this point, we know we are dealing only with fully
1864 // defined values.
1865 unsigned wid = lval.size();
1866
1867 const vvp_bit4_t sig1 = lval.value(wid-1);
1868 const vvp_bit4_t sig2 = rval.value(wid-1);
1869
1870 // If the lval is <0 and the rval is >=0, then we know the result.
1871 if ((sig1 == BIT4_1) && (sig2 == BIT4_0)) {
1872 thr->flags[4] = BIT4_0; // eq;
1873 thr->flags[5] = BIT4_1; // lt;
1874 thr->flags[6] = BIT4_0; // eeq
1875 return;
1876 }
1877
1878 // If the lval is >=0 and the rval is <0, then we know the result.
1879 if ((sig1 == BIT4_0) && (sig2 == BIT4_1)) {
1880 thr->flags[4] = BIT4_0; // eq;
1881 thr->flags[5] = BIT4_0; // lt;
1882 thr->flags[6] = BIT4_0; // eeq
1883 return;
1884 }
1885
1886 // The values have the same sign, so we have to look at the
1887 // actual value. Scan from the MSB down. As soon as we find a
1888 // bit that differs, we know the result.
1889
1890 for (unsigned idx = 1 ; idx < wid ; idx += 1) {
1891 vvp_bit4_t lv = lval.value(wid-1-idx);
1892 vvp_bit4_t rv = rval.value(wid-1-idx);
1893
1894 if (lv == rv)
1895 continue;
1896
1897 thr->flags[4] = BIT4_0; // eq
1898 thr->flags[6] = BIT4_0; // eeq
1899
1900 if (lv==BIT4_0) {
1901 thr->flags[5] = BIT4_1; // lt
1902 } else {
1903 thr->flags[5] = BIT4_0; // lt
1904 }
1905 return;
1906 }
1907
1908 // If we survive the loop above, then the values must be equal.
1909 thr->flags[4] = BIT4_1;
1910 thr->flags[5] = BIT4_0;
1911 thr->flags[6] = BIT4_1;
1912 }
1913
1914 /*
1915 * %cmp/s
1916 *
1917 * Pop the operands from the stack, and do not replace them. The
1918 * results are written to flag bits:
1919 *
1920 * 4: eq (equal)
1921 * 5: lt (less than)
1922 * 6: eeq (case equal)
1923 */
of_CMPS(vthread_t thr,vvp_code_t)1924 bool of_CMPS(vthread_t thr, vvp_code_t)
1925 {
1926 // We are going to pop these and push nothing in their
1927 // place, but for now it is more efficient to use a constant
1928 // reference. When we finish, pop the stack without copies.
1929 const vvp_vector4_t&rval = thr->peek_vec4(0);
1930 const vvp_vector4_t&lval = thr->peek_vec4(1);
1931
1932 do_CMPS(thr, lval, rval);
1933
1934 thr->pop_vec4(2);
1935 return true;
1936 }
1937
1938 /*
1939 * %cmpi/s <vala>, <valb>, <wid>
1940 *
1941 * Pop1 operand, get the other operand from the arguments.
1942 */
of_CMPIS(vthread_t thr,vvp_code_t cp)1943 bool of_CMPIS(vthread_t thr, vvp_code_t cp)
1944 {
1945 unsigned wid = cp->number;
1946
1947 vvp_vector4_t&lval = thr->peek_vec4();
1948
1949 // I expect that most of the bits of an immediate value are
1950 // going to be zero, so start the result vector with all zero
1951 // bits. Then we only need to replace the bits that are different.
1952 vvp_vector4_t rval (wid, BIT4_0);
1953 get_immediate_rval (cp, rval);
1954
1955 do_CMPS(thr, lval, rval);
1956
1957 thr->pop_vec4(1);
1958 return true;
1959 }
1960
of_CMPSTR(vthread_t thr,vvp_code_t)1961 bool of_CMPSTR(vthread_t thr, vvp_code_t)
1962 {
1963 string re = thr->pop_str();
1964 string le = thr->pop_str();
1965
1966 int rc = strcmp(le.c_str(), re.c_str());
1967
1968 vvp_bit4_t eq;
1969 vvp_bit4_t lt;
1970
1971 if (rc == 0) {
1972 eq = BIT4_1;
1973 lt = BIT4_0;
1974 } else if (rc < 0) {
1975 eq = BIT4_0;
1976 lt = BIT4_1;
1977 } else {
1978 eq = BIT4_0;
1979 lt = BIT4_0;
1980 }
1981
1982 thr->flags[4] = eq;
1983 thr->flags[5] = lt;
1984
1985 return true;
1986 }
1987
of_CMPU_the_hard_way(vthread_t thr,unsigned wid,const vvp_vector4_t & lval,const vvp_vector4_t & rval)1988 static void of_CMPU_the_hard_way(vthread_t thr, unsigned wid,
1989 const vvp_vector4_t&lval,
1990 const vvp_vector4_t&rval)
1991 {
1992 vvp_bit4_t eq = BIT4_1;
1993 vvp_bit4_t eeq = BIT4_1;
1994
1995 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
1996 vvp_bit4_t lv = lval.value(idx);
1997 vvp_bit4_t rv = rval.value(idx);
1998
1999 if (lv != rv)
2000 eeq = BIT4_0;
2001
2002 if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
2003 eq = BIT4_X;
2004 if ((lv == BIT4_0) && (rv==BIT4_1))
2005 eq = BIT4_0;
2006 if ((lv == BIT4_1) && (rv==BIT4_0))
2007 eq = BIT4_0;
2008
2009 if (eq == BIT4_0)
2010 break;
2011
2012 }
2013
2014 thr->flags[4] = eq;
2015 thr->flags[5] = BIT4_X;
2016 thr->flags[6] = eeq;
2017 }
2018
do_CMPU(vthread_t thr,const vvp_vector4_t & lval,const vvp_vector4_t & rval)2019 static void do_CMPU(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
2020 {
2021 vvp_bit4_t eq = BIT4_1;
2022 vvp_bit4_t lt = BIT4_0;
2023
2024 if (rval.size() != lval.size()) {
2025 cerr << thr->get_fileline()
2026 << "VVP ERROR: %cmp/u operand width mismatch: lval=" << lval
2027 << ", rval=" << rval << endl;
2028 }
2029 assert(rval.size() == lval.size());
2030 unsigned wid = lval.size();
2031
2032 unsigned long*larray = lval.subarray(0,wid);
2033 if (larray == 0) return of_CMPU_the_hard_way(thr, wid, lval, rval);
2034
2035 unsigned long*rarray = rval.subarray(0,wid);
2036 if (rarray == 0) {
2037 delete[]larray;
2038 return of_CMPU_the_hard_way(thr, wid, lval, rval);
2039 }
2040
2041 unsigned words = (wid+CPU_WORD_BITS-1) / CPU_WORD_BITS;
2042
2043 for (unsigned wdx = 0 ; wdx < words ; wdx += 1) {
2044 if (larray[wdx] == rarray[wdx])
2045 continue;
2046
2047 eq = BIT4_0;
2048 if (larray[wdx] < rarray[wdx])
2049 lt = BIT4_1;
2050 else
2051 lt = BIT4_0;
2052 }
2053
2054 delete[]larray;
2055 delete[]rarray;
2056
2057 thr->flags[4] = eq;
2058 thr->flags[5] = lt;
2059 thr->flags[6] = eq;
2060 }
2061
of_CMPU(vthread_t thr,vvp_code_t)2062 bool of_CMPU(vthread_t thr, vvp_code_t)
2063 {
2064
2065 const vvp_vector4_t&rval = thr->peek_vec4(0);
2066 const vvp_vector4_t&lval = thr->peek_vec4(1);
2067
2068 do_CMPU(thr, lval, rval);
2069
2070 thr->pop_vec4(2);
2071 return true;
2072 }
2073
2074 /*
2075 * %cmpi/u <vala>, <valb>, <wid>
2076 *
2077 * Pop1 operand, get the other operand from the arguments.
2078 */
of_CMPIU(vthread_t thr,vvp_code_t cp)2079 bool of_CMPIU(vthread_t thr, vvp_code_t cp)
2080 {
2081 unsigned wid = cp->number;
2082
2083 vvp_vector4_t&lval = thr->peek_vec4();
2084
2085 // I expect that most of the bits of an immediate value are
2086 // going to be zero, so start the result vector with all zero
2087 // bits. Then we only need to replace the bits that are different.
2088 vvp_vector4_t rval (wid, BIT4_0);
2089 get_immediate_rval (cp, rval);
2090
2091 do_CMPU(thr, lval, rval);
2092
2093 thr->pop_vec4(1);
2094 return true;
2095 }
2096
2097
2098 /*
2099 * %cmp/x
2100 */
of_CMPX(vthread_t thr,vvp_code_t)2101 bool of_CMPX(vthread_t thr, vvp_code_t)
2102 {
2103 vvp_bit4_t eq = BIT4_1;
2104 vvp_vector4_t rval = thr->pop_vec4();
2105 vvp_vector4_t lval = thr->pop_vec4();
2106
2107 assert(rval.size() == lval.size());
2108 unsigned wid = lval.size();
2109
2110 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
2111 vvp_bit4_t lv = lval.value(idx);
2112 vvp_bit4_t rv = rval.value(idx);
2113 if ((lv != rv) && !bit4_is_xz(lv) && !bit4_is_xz(rv)) {
2114 eq = BIT4_0;
2115 break;
2116 }
2117 }
2118
2119 thr->flags[4] = eq;
2120 return true;
2121 }
2122
do_CMPWE(vthread_t thr,const vvp_vector4_t & lval,const vvp_vector4_t & rval)2123 static void do_CMPWE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
2124 {
2125 assert(rval.size() == lval.size());
2126
2127 if (lval.has_xz() || rval.has_xz()) {
2128
2129 unsigned wid = lval.size();
2130 vvp_bit4_t eq = BIT4_1;
2131
2132 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
2133 vvp_bit4_t lv = lval.value(idx);
2134 vvp_bit4_t rv = rval.value(idx);
2135
2136 if (bit4_is_xz(rv))
2137 continue;
2138 if ((eq == BIT4_1) && bit4_is_xz(lv))
2139 eq = BIT4_X;
2140 if ((lv == BIT4_0) && (rv==BIT4_1))
2141 eq = BIT4_0;
2142 if ((lv == BIT4_1) && (rv==BIT4_0))
2143 eq = BIT4_0;
2144
2145 if (eq == BIT4_0)
2146 break;
2147 }
2148
2149 thr->flags[4] = eq;
2150
2151 } else {
2152 // If there are no XZ bits anywhere, then the results of
2153 // ==? match the === test.
2154 thr->flags[4] = (lval.eeq(rval)? BIT4_1 : BIT4_0);
2155 }
2156 }
2157
of_CMPWE(vthread_t thr,vvp_code_t)2158 bool of_CMPWE(vthread_t thr, vvp_code_t)
2159 {
2160 // We are going to pop these and push nothing in their
2161 // place, but for now it is more efficient to use a constant
2162 // reference. When we finish, pop the stack without copies.
2163 const vvp_vector4_t&rval = thr->peek_vec4(0);
2164 const vvp_vector4_t&lval = thr->peek_vec4(1);
2165
2166 do_CMPWE(thr, lval, rval);
2167
2168 thr->pop_vec4(2);
2169 return true;
2170 }
2171
of_CMPWNE(vthread_t thr,vvp_code_t)2172 bool of_CMPWNE(vthread_t thr, vvp_code_t)
2173 {
2174 // We are going to pop these and push nothing in their
2175 // place, but for now it is more efficient to use a constant
2176 // reference. When we finish, pop the stack without copies.
2177 const vvp_vector4_t&rval = thr->peek_vec4(0);
2178 const vvp_vector4_t&lval = thr->peek_vec4(1);
2179
2180 do_CMPWE(thr, lval, rval);
2181
2182 thr->flags[4] = ~thr->flags[4];
2183
2184 thr->pop_vec4(2);
2185 return true;
2186 }
2187
of_CMPWR(vthread_t thr,vvp_code_t)2188 bool of_CMPWR(vthread_t thr, vvp_code_t)
2189 {
2190 double r = thr->pop_real();
2191 double l = thr->pop_real();
2192
2193 vvp_bit4_t eq = (l == r)? BIT4_1 : BIT4_0;
2194 vvp_bit4_t lt = (l < r)? BIT4_1 : BIT4_0;
2195
2196 thr->flags[4] = eq;
2197 thr->flags[5] = lt;
2198
2199 return true;
2200 }
2201
of_CMPWS(vthread_t thr,vvp_code_t cp)2202 bool of_CMPWS(vthread_t thr, vvp_code_t cp)
2203 {
2204 int64_t l = thr->words[cp->bit_idx[0]].w_int;
2205 int64_t r = thr->words[cp->bit_idx[1]].w_int;
2206
2207 vvp_bit4_t eq = (l == r)? BIT4_1 : BIT4_0;
2208 vvp_bit4_t lt = (l < r)? BIT4_1 : BIT4_0;
2209
2210 thr->flags[4] = eq;
2211 thr->flags[5] = lt;
2212
2213 return true;
2214 }
2215
of_CMPWU(vthread_t thr,vvp_code_t cp)2216 bool of_CMPWU(vthread_t thr, vvp_code_t cp)
2217 {
2218 uint64_t l = thr->words[cp->bit_idx[0]].w_uint;
2219 uint64_t r = thr->words[cp->bit_idx[1]].w_uint;
2220
2221 vvp_bit4_t eq = (l == r)? BIT4_1 : BIT4_0;
2222 vvp_bit4_t lt = (l < r)? BIT4_1 : BIT4_0;
2223
2224 thr->flags[4] = eq;
2225 thr->flags[5] = lt;
2226
2227 return true;
2228 }
2229
2230 /*
2231 * %cmp/z
2232 */
of_CMPZ(vthread_t thr,vvp_code_t)2233 bool of_CMPZ(vthread_t thr, vvp_code_t)
2234 {
2235 vvp_bit4_t eq = BIT4_1;
2236 vvp_vector4_t rval = thr->pop_vec4();
2237 vvp_vector4_t lval = thr->pop_vec4();
2238
2239 assert(rval.size() == lval.size());
2240 unsigned wid = lval.size();
2241
2242 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
2243 vvp_bit4_t lv = lval.value(idx);
2244 vvp_bit4_t rv = rval.value(idx);
2245 if ((lv != rv) && (rv != BIT4_Z) && (lv != BIT4_Z)) {
2246 eq = BIT4_0;
2247 break;
2248 }
2249 }
2250
2251 thr->flags[4] = eq;
2252 return true;
2253 }
2254
2255 /*
2256 * %concat/str;
2257 */
of_CONCAT_STR(vthread_t thr,vvp_code_t)2258 bool of_CONCAT_STR(vthread_t thr, vvp_code_t)
2259 {
2260 string text = thr->pop_str();
2261 thr->peek_str(0).append(text);
2262 return true;
2263 }
2264
2265 /*
2266 * %concati/str <string>;
2267 */
of_CONCATI_STR(vthread_t thr,vvp_code_t cp)2268 bool of_CONCATI_STR(vthread_t thr, vvp_code_t cp)
2269 {
2270 const char*text = cp->text;
2271 thr->peek_str(0).append(text);
2272 return true;
2273 }
2274
2275 /*
2276 * %concat/vec4
2277 */
of_CONCAT_VEC4(vthread_t thr,vvp_code_t)2278 bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t)
2279 {
2280 const vvp_vector4_t&lsb = thr->peek_vec4(0);
2281 const vvp_vector4_t&msb = thr->peek_vec4(1);
2282
2283 // The result is the size of the top two vectors in the stack.
2284 vvp_vector4_t res (msb.size() + lsb.size(), BIT4_X);
2285
2286 // Build up the result.
2287 res.set_vec(0, lsb);
2288 res.set_vec(lsb.size(), msb);
2289
2290 // Rearrange the stack to pop the inputs and push the
2291 // result. Do that by actually popping only 1 stack position
2292 // and replacing the new top with the new value.
2293 thr->pop_vec4(1);
2294 thr->peek_vec4() = res;
2295
2296 return true;
2297 }
2298
2299 /*
2300 * %concati/vec4 <vala>, <valb>, <wid>
2301 *
2302 * Concat the immediate value to the LOW bits of the concatenation.
2303 * Get the HIGH bits from the top of the vec4 stack.
2304 */
of_CONCATI_VEC4(vthread_t thr,vvp_code_t cp)2305 bool of_CONCATI_VEC4(vthread_t thr, vvp_code_t cp)
2306 {
2307 uint32_t vala = cp->bit_idx[0];
2308 uint32_t valb = cp->bit_idx[1];
2309 unsigned wid = cp->number;
2310
2311 vvp_vector4_t&msb = thr->peek_vec4();
2312
2313 // I expect that most of the bits of an immediate value are
2314 // going to be zero, so start the result vector with all zero
2315 // bits. Then we only need to replace the bits that are different.
2316 vvp_vector4_t lsb (wid, BIT4_0);
2317
2318 // The %concati/vec4 can create values bigger then 32 bits, but
2319 // only if the high bits are zero. So at most we need to run
2320 // through the loop below 32 times. Maybe less, if the target
2321 // width is less. We don't have to do anything special on that
2322 // because vala/valb bits will shift away so (vala|valb) will
2323 // turn to zero at or before 32 shifts.
2324
2325 for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
2326 uint32_t ba = 0;
2327 // Convert the vala/valb bits to a ba number that can be
2328 // used to select what goes into the value.
2329 ba = (valb & 1) << 1;
2330 ba |= vala & 1;
2331
2332 switch (ba) {
2333 case 1:
2334 lsb.set_bit(idx, BIT4_1);
2335 break;
2336 case 2:
2337 lsb.set_bit(idx, BIT4_Z);
2338 break;
2339 case 3:
2340 lsb.set_bit(idx, BIT4_X);
2341 break;
2342 default:
2343 break;
2344 }
2345
2346 vala >>= 1;
2347 valb >>= 1;
2348 }
2349
2350 vvp_vector4_t res (msb.size()+lsb.size(), BIT4_X);
2351 res.set_vec(0, lsb);
2352 res.set_vec(lsb.size(), msb);
2353
2354 msb = res;
2355 return true;
2356 }
2357
2358 /*
2359 * %cvt/rv
2360 */
of_CVT_RV(vthread_t thr,vvp_code_t)2361 bool of_CVT_RV(vthread_t thr, vvp_code_t)
2362 {
2363 double val;
2364 vvp_vector4_t val4 = thr->pop_vec4();
2365 vector4_to_value(val4, val, false);
2366 thr->push_real(val);
2367 return true;
2368 }
2369
2370 /*
2371 * %cvt/rv/s
2372 */
of_CVT_RV_S(vthread_t thr,vvp_code_t)2373 bool of_CVT_RV_S(vthread_t thr, vvp_code_t)
2374 {
2375 double val;
2376 vvp_vector4_t val4 = thr->pop_vec4();
2377 vector4_to_value(val4, val, true);
2378 thr->push_real(val);
2379 return true;
2380 }
2381
2382 /*
2383 * %cvt/sr <idx>
2384 * Pop the top value from the real stack, convert it to a 64bit signed
2385 * and save it to the indexed register.
2386 */
of_CVT_SR(vthread_t thr,vvp_code_t cp)2387 bool of_CVT_SR(vthread_t thr, vvp_code_t cp)
2388 {
2389 double r = thr->pop_real();
2390 thr->words[cp->bit_idx[0]].w_int = i64round(r);
2391
2392 return true;
2393 }
2394
2395 /*
2396 * %cvt/ur <idx>
2397 */
of_CVT_UR(vthread_t thr,vvp_code_t cp)2398 bool of_CVT_UR(vthread_t thr, vvp_code_t cp)
2399 {
2400 double r = thr->pop_real();
2401 if (r >= 0.0)
2402 thr->words[cp->bit_idx[0]].w_uint = (uint64_t)floor(r+0.5);
2403 else
2404 thr->words[cp->bit_idx[0]].w_uint = (uint64_t)ceil(r-0.5);
2405
2406 return true;
2407 }
2408
2409 /*
2410 * %cvt/vr <wid>
2411 */
of_CVT_VR(vthread_t thr,vvp_code_t cp)2412 bool of_CVT_VR(vthread_t thr, vvp_code_t cp)
2413 {
2414 double r = thr->pop_real();
2415 unsigned wid = cp->number;
2416
2417 vvp_vector4_t tmp(wid, r);
2418 thr->push_vec4(tmp);
2419 return true;
2420 }
2421
2422 /*
2423 * This implements the %deassign instruction. All we do is write a
2424 * long(1) to port-3 of the addressed net. This turns off an active
2425 * continuous assign activated by %cassign/v
2426 */
of_DEASSIGN(vthread_t,vvp_code_t cp)2427 bool of_DEASSIGN(vthread_t, vvp_code_t cp)
2428 {
2429 vvp_net_t*net = cp->net;
2430 unsigned base = cp->bit_idx[0];
2431 unsigned width = cp->bit_idx[1];
2432
2433 vvp_signal_value*fil = dynamic_cast<vvp_signal_value*> (net->fil);
2434 assert(fil);
2435 vvp_fun_signal_vec*sig = dynamic_cast<vvp_fun_signal_vec*>(net->fun);
2436 assert(sig);
2437
2438 if (base >= fil->value_size()) return true;
2439 if (base+width > fil->value_size()) width = fil->value_size() - base;
2440
2441 bool full_sig = base == 0 && width == fil->value_size();
2442
2443 // This is the net that is forcing me...
2444 if (vvp_net_t*src = sig->cassign_link) {
2445 if (! full_sig) {
2446 fprintf(stderr, "Sorry: when a signal is assigning a "
2447 "register, I cannot deassign part of it.\n");
2448 exit(1);
2449 }
2450 // And this is the pointer to be removed.
2451 vvp_net_ptr_t dst_ptr (net, 1);
2452 src->unlink(dst_ptr);
2453 sig->cassign_link = 0;
2454 }
2455
2456 /* Do we release all or part of the net? */
2457 if (full_sig) {
2458 sig->deassign();
2459 } else {
2460 sig->deassign_pv(base, width);
2461 }
2462
2463 return true;
2464 }
2465
of_DEASSIGN_WR(vthread_t,vvp_code_t cp)2466 bool of_DEASSIGN_WR(vthread_t, vvp_code_t cp)
2467 {
2468 vvp_net_t*net = cp->net;
2469
2470 vvp_fun_signal_real*sig = dynamic_cast<vvp_fun_signal_real*>(net->fun);
2471 assert(sig);
2472
2473 // This is the net that is forcing me...
2474 if (vvp_net_t*src = sig->cassign_link) {
2475 // And this is the pointer to be removed.
2476 vvp_net_ptr_t dst_ptr (net, 1);
2477 src->unlink(dst_ptr);
2478 sig->cassign_link = 0;
2479 }
2480
2481 sig->deassign();
2482
2483 return true;
2484 }
2485
2486 /*
2487 * %debug/thr
2488 */
of_DEBUG_THR(vthread_t thr,vvp_code_t cp)2489 bool of_DEBUG_THR(vthread_t thr, vvp_code_t cp)
2490 {
2491 const char*text = cp->text;
2492 thr->debug_dump(cerr, text);
2493 return true;
2494 }
2495
2496 /*
2497 * The delay takes two 32bit numbers to make up a 64bit time.
2498 *
2499 * %delay <low>, <hig>
2500 */
of_DELAY(vthread_t thr,vvp_code_t cp)2501 bool of_DELAY(vthread_t thr, vvp_code_t cp)
2502 {
2503 vvp_time64_t low = cp->bit_idx[0];
2504 vvp_time64_t hig = cp->bit_idx[1];
2505
2506 vvp_time64_t delay = (hig << 32) | low;
2507
2508 if (delay == 0) schedule_inactive(thr);
2509 else schedule_vthread(thr, delay);
2510 return false;
2511 }
2512
of_DELAYX(vthread_t thr,vvp_code_t cp)2513 bool of_DELAYX(vthread_t thr, vvp_code_t cp)
2514 {
2515 vvp_time64_t delay;
2516
2517 assert(cp->number < vthread_s::WORDS_COUNT);
2518 delay = thr->words[cp->number].w_uint;
2519 if (delay == 0) schedule_inactive(thr);
2520 else schedule_vthread(thr, delay);
2521 return false;
2522 }
2523
of_DELETE_ELEM(vthread_t thr,vvp_code_t cp)2524 bool of_DELETE_ELEM(vthread_t thr, vvp_code_t cp)
2525 {
2526 vvp_net_t*net = cp->net;
2527
2528 int64_t idx_val = thr->words[3].w_int;
2529 if (thr->flags[4] == BIT4_1) {
2530 cerr << thr->get_fileline()
2531 << "Warning: skipping queue delete() with undefined index."
2532 << endl;
2533 return true;
2534 }
2535 if (idx_val < 0) {
2536 cerr << thr->get_fileline()
2537 << "Warning: skipping queue delete() with negative index."
2538 << endl;
2539 return true;
2540 }
2541 size_t idx = idx_val;
2542
2543 vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
2544 assert(obj);
2545
2546 vvp_queue*queue = obj->get_object().peek<vvp_queue>();
2547 if (queue == 0) {
2548 cerr << thr->get_fileline()
2549 << "Warning: skipping delete(" << idx
2550 << ") on empty queue." << endl;
2551 } else {
2552 size_t size = queue->get_size();
2553 if (idx >= size) {
2554 cerr << thr->get_fileline()
2555 << "Warning: skipping out of range delete(" << idx
2556 << ") on queue of size " << size << "." << endl;
2557 } else {
2558 queue->erase(idx);
2559 }
2560 }
2561
2562 return true;
2563 }
2564
2565 /* %delete/obj <label>
2566 *
2567 * This operator works by assigning a nil to the target object. This
2568 * causes any value that might be there to be garbage collected, thus
2569 * deleting the object.
2570 */
of_DELETE_OBJ(vthread_t thr,vvp_code_t cp)2571 bool of_DELETE_OBJ(vthread_t thr, vvp_code_t cp)
2572 {
2573 /* set the value into port 0 of the destination. */
2574 vvp_net_ptr_t ptr (cp->net, 0);
2575 vvp_send_object(ptr, vvp_object_t(), thr->wt_context);
2576
2577 return true;
2578 }
2579
2580 /* %delete/tail <label>, idx
2581 *
2582 * Remove all elements after the one specified.
2583 */
of_DELETE_TAIL(vthread_t thr,vvp_code_t cp)2584 bool of_DELETE_TAIL(vthread_t thr, vvp_code_t cp)
2585 {
2586 vvp_net_t*net = cp->net;
2587
2588 vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
2589 assert(obj);
2590
2591 vvp_queue*queue = obj->get_object().peek<vvp_queue>();
2592 assert(queue);
2593
2594 unsigned idx = thr->words[cp->bit_idx[0]].w_int;
2595 queue->erase_tail(idx);
2596
2597 return true;
2598 }
2599
do_disable(vthread_t thr,vthread_t match)2600 static bool do_disable(vthread_t thr, vthread_t match)
2601 {
2602 bool flag = false;
2603
2604 /* Pull the target thread out of its scope if needed. */
2605 thr->parent_scope->threads.erase(thr);
2606
2607 /* Turn the thread off by setting is program counter to
2608 zero and setting an OFF bit. */
2609 thr->pc = codespace_null();
2610 thr->i_was_disabled = 1;
2611 thr->i_have_ended = 1;
2612
2613 /* Turn off all the children of the thread. Simulate a %join
2614 for as many times as needed to clear the results of all the
2615 %forks that this thread has done. */
2616 while (! thr->children.empty()) {
2617
2618 vthread_t tmp = *(thr->children.begin());
2619 assert(tmp);
2620 assert(tmp->parent == thr);
2621 thr->i_am_joining = 0;
2622 if (do_disable(tmp, match))
2623 flag = true;
2624
2625 vthread_reap(tmp);
2626 }
2627
2628 vthread_t parent = thr->parent;
2629 if (parent && parent->i_am_joining) {
2630 // If a parent is waiting in a %join, wake it up. Note
2631 // that it is possible to be waiting in a %join yet
2632 // already scheduled if multiple child threads are
2633 // ending. So check if the thread is already scheduled
2634 // before scheduling it again.
2635 parent->i_am_joining = 0;
2636 if (! parent->i_have_ended)
2637 schedule_vthread(parent, 0, true);
2638
2639 do_join(parent, thr);
2640
2641 } else if (parent) {
2642 /* If the parent is yet to %join me, let its %join
2643 do the reaping. */
2644 //assert(tmp->is_scheduled == 0);
2645
2646 } else {
2647 /* No parent at all. Goodbye. */
2648 vthread_reap(thr);
2649 }
2650
2651 return flag || (thr == match);
2652 }
2653
2654 /*
2655 * Implement the %disable instruction by scanning the target scope for
2656 * all the target threads. Kill the target threads and wake up a
2657 * parent that is attempting a %join.
2658 */
of_DISABLE(vthread_t thr,vvp_code_t cp)2659 bool of_DISABLE(vthread_t thr, vvp_code_t cp)
2660 {
2661 __vpiScope*scope = (__vpiScope*)cp->handle;
2662
2663 bool disabled_myself_flag = false;
2664
2665 while (! scope->threads.empty()) {
2666 set<vthread_t>::iterator cur = scope->threads.begin();
2667
2668 if (do_disable(*cur, thr))
2669 disabled_myself_flag = true;
2670 }
2671
2672 return ! disabled_myself_flag;
2673 }
2674
2675 /*
2676 * Implement the %disable/fork (SystemVerilog) instruction by disabling
2677 * all the detached children of the given thread.
2678 */
of_DISABLE_FORK(vthread_t thr,vvp_code_t)2679 bool of_DISABLE_FORK(vthread_t thr, vvp_code_t)
2680 {
2681 /* If a %disable/fork is being executed then the parent thread
2682 * cannot be waiting in a join. */
2683 assert(! thr->i_am_joining);
2684
2685 /* There should be no active children to disable. */
2686 assert(thr->children.empty());
2687
2688 /* Disable any detached children. */
2689 while (! thr->detached_children.empty()) {
2690 vthread_t child = *(thr->detached_children.begin());
2691 assert(child);
2692 assert(child->parent == thr);
2693 /* Disabling the children can never match the parent thread. */
2694 bool res = do_disable(child, thr);
2695 assert(! res);
2696 vthread_reap(child);
2697 }
2698
2699 return true;
2700 }
2701
2702 /*
2703 * This function divides a 2-word number {high, a} by a 1-word
2704 * number. Assume that high < b.
2705 */
divide2words(unsigned long a,unsigned long b,unsigned long high)2706 static unsigned long divide2words(unsigned long a, unsigned long b,
2707 unsigned long high)
2708 {
2709 unsigned long result = 0;
2710 while (high > 0) {
2711 unsigned long tmp_result = ULONG_MAX / b;
2712 unsigned long remain = ULONG_MAX % b;
2713
2714 remain += 1;
2715 if (remain >= b) {
2716 remain -= b;
2717 tmp_result += 1;
2718 }
2719
2720 // Now 0x1_0...0 = b*tmp_result + remain
2721 // high*0x1_0...0 = high*(b*tmp_result + remain)
2722 // high*0x1_0...0 = high*b*tmp_result + high*remain
2723
2724 // We know that high*0x1_0...0 >= high*b*tmp_result, and
2725 // we know that high*0x1_0...0 > high*remain. Use
2726 // high*remain as the remainder for another iteration,
2727 // and add tmp_result*high into the current estimate of
2728 // the result.
2729 result += tmp_result * high;
2730
2731 // The new iteration starts with high*remain + a.
2732 remain = multiply_with_carry(high, remain, high);
2733 a += remain;
2734 if(a < remain)
2735 high += 1;
2736
2737 // Now result*b + {high,a} == the input {high,a}. It is
2738 // possible that the new high >= 1. If so, it will
2739 // certainly be less than high from the previous
2740 // iteration. Do another iteration and it will shrink,
2741 // eventually to 0.
2742 }
2743
2744 // high is now 0, so a is the remaining remainder, so we can
2745 // finish off the integer divide with a simple a/b.
2746
2747 return result + a/b;
2748 }
2749
divide_bits(unsigned long * ap,unsigned long * bp,unsigned wid)2750 static unsigned long* divide_bits(unsigned long*ap, unsigned long*bp, unsigned wid)
2751 {
2752 // Do all our work a cpu-word at a time. The "words" variable
2753 // is the number of words of the wid.
2754 unsigned words = (wid+CPU_WORD_BITS-1) / CPU_WORD_BITS;
2755
2756 unsigned btop = words-1;
2757 while (btop > 0 && bp[btop] == 0)
2758 btop -= 1;
2759
2760 // Detect divide by 0, and exit.
2761 if (btop==0 && bp[0]==0)
2762 return 0;
2763
2764 // The result array will eventually accumulate the result. The
2765 // diff array is a difference that we use in the intermediate.
2766 unsigned long*diff = new unsigned long[words];
2767 unsigned long*result= new unsigned long[words];
2768 for (unsigned idx = 0 ; idx < words ; idx += 1)
2769 result[idx] = 0;
2770
2771 for (unsigned cur = words-btop ; cur > 0 ; cur -= 1) {
2772 unsigned cur_ptr = cur-1;
2773 unsigned long cur_res;
2774 if (ap[cur_ptr+btop] >= bp[btop]) {
2775 unsigned long high = 0;
2776 if (cur_ptr+btop+1 < words)
2777 high = ap[cur_ptr+btop+1];
2778 cur_res = divide2words(ap[cur_ptr+btop], bp[btop], high);
2779
2780 } else if (cur_ptr+btop+1 >= words) {
2781 continue;
2782
2783 } else if (ap[cur_ptr+btop+1] == 0) {
2784 continue;
2785
2786 } else {
2787 cur_res = divide2words(ap[cur_ptr+btop], bp[btop],
2788 ap[cur_ptr+btop+1]);
2789 }
2790
2791 // cur_res is a guesstimate of the result this far. It
2792 // may be 1 too big. (But it will also be >0) Try it,
2793 // and if the difference comes out negative, then adjust.
2794
2795 // diff = (bp * cur_res) << cur_ptr;
2796 multiply_array_imm(diff+cur_ptr, bp, words-cur_ptr, cur_res);
2797 // ap -= diff
2798 unsigned long carry = 1;
2799 for (unsigned idx = cur_ptr ; idx < words ; idx += 1)
2800 ap[idx] = add_with_carry(ap[idx], ~diff[idx], carry);
2801
2802 // ap has the diff subtracted out of it. If cur_res was
2803 // too large, then ap will turn negative. (We easily
2804 // tell that ap turned negative by looking at
2805 // carry&1. If it is 0, then it is *negative*.) In that
2806 // case, we know that cur_res was too large by 1. Correct by
2807 // adding 1b back in and reducing cur_res.
2808 if ((carry&1) == 0) {
2809 // Keep adding b back in until the remainder
2810 // becomes positive again.
2811 do {
2812 cur_res -= 1;
2813 carry = 0;
2814 for (unsigned idx = cur_ptr ; idx < words ; idx += 1)
2815 ap[idx] = add_with_carry(ap[idx], bp[idx-cur_ptr], carry);
2816 } while (carry == 0);
2817 }
2818
2819 result[cur_ptr] = cur_res;
2820 }
2821
2822 // Now ap contains the remainder and result contains the
2823 // desired result. We should find that:
2824 // input-a = bp * result + ap;
2825
2826 delete[]diff;
2827 return result;
2828 }
2829
2830 /*
2831 * %div
2832 */
of_DIV(vthread_t thr,vvp_code_t)2833 bool of_DIV(vthread_t thr, vvp_code_t)
2834 {
2835 vvp_vector4_t valb = thr->pop_vec4();
2836 vvp_vector4_t vala = thr->pop_vec4();
2837
2838 assert(vala.size()== valb.size());
2839 unsigned wid = vala.size();
2840
2841 unsigned long*ap = vala.subarray(0, wid);
2842 if (ap == 0) {
2843 vvp_vector4_t tmp(wid, BIT4_X);
2844 thr->push_vec4(tmp);
2845 return true;
2846 }
2847
2848 unsigned long*bp = valb.subarray(0, wid);
2849 if (bp == 0) {
2850 delete[]ap;
2851 vvp_vector4_t tmp(wid, BIT4_X);
2852 thr->push_vec4(tmp);
2853 return true;
2854 }
2855
2856 // If the value fits in a single CPU word, then do it the easy way.
2857 if (wid <= CPU_WORD_BITS) {
2858 if (bp[0] == 0) {
2859 vvp_vector4_t tmp(wid, BIT4_X);
2860 thr->push_vec4(tmp);
2861 } else {
2862 ap[0] /= bp[0];
2863 vala.setarray(0, wid, ap);
2864 thr->push_vec4(vala);
2865 }
2866 delete[]ap;
2867 delete[]bp;
2868 return true;
2869 }
2870
2871 unsigned long*result = divide_bits(ap, bp, wid);
2872 if (result == 0) {
2873 delete[]ap;
2874 delete[]bp;
2875 vvp_vector4_t tmp(wid, BIT4_X);
2876 thr->push_vec4(tmp);
2877 return true;
2878 }
2879
2880 // Now ap contains the remainder and result contains the
2881 // desired result. We should find that:
2882 // input-a = bp * result + ap;
2883
2884 vala.setarray(0, wid, result);
2885 thr->push_vec4(vala);
2886 delete[]ap;
2887 delete[]bp;
2888 delete[]result;
2889
2890 return true;
2891 }
2892
2893
negate_words(unsigned long * val,unsigned words)2894 static void negate_words(unsigned long*val, unsigned words)
2895 {
2896 unsigned long carry = 1;
2897 for (unsigned idx = 0 ; idx < words ; idx += 1)
2898 val[idx] = add_with_carry(0, ~val[idx], carry);
2899 }
2900
2901 /*
2902 * %div/s
2903 */
of_DIV_S(vthread_t thr,vvp_code_t)2904 bool of_DIV_S(vthread_t thr, vvp_code_t)
2905 {
2906 vvp_vector4_t valb = thr->pop_vec4();
2907 vvp_vector4_t&vala = thr->peek_vec4();
2908
2909 assert(vala.size()== valb.size());
2910 unsigned wid = vala.size();
2911 unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
2912
2913 // Get the values, left in right, in binary form. If there is
2914 // a problem with either (caused by an X or Z bit) then we
2915 // know right away that the entire result is X.
2916 unsigned long*ap = vala.subarray(0, wid);
2917 if (ap == 0) {
2918 vvp_vector4_t tmp(wid, BIT4_X);
2919 vala = tmp;
2920 return true;
2921 }
2922
2923 unsigned long*bp = valb.subarray(0, wid);
2924 if (bp == 0) {
2925 delete[]ap;
2926 vvp_vector4_t tmp(wid, BIT4_X);
2927 vala = tmp;
2928 return true;
2929 }
2930
2931 // Sign extend the bits in the array to fill out the array.
2932 unsigned long sign_mask = 0;
2933 if (unsigned long sign_bits = (words*CPU_WORD_BITS) - wid) {
2934 sign_mask = -1UL << (CPU_WORD_BITS-sign_bits);
2935 if (ap[words-1] & (sign_mask>>1))
2936 ap[words-1] |= sign_mask;
2937 if (bp[words-1] & (sign_mask>>1))
2938 bp[words-1] |= sign_mask;
2939 }
2940
2941 // If the value fits in a single word, then use the native divide.
2942 if (wid <= CPU_WORD_BITS) {
2943 if (bp[0] == 0) {
2944 vvp_vector4_t tmp(wid, BIT4_X);
2945 vala = tmp;
2946 } else if (((long)ap[0] == LONG_MIN) && ((long)bp[0] == -1)) {
2947 vvp_vector4_t tmp(wid, BIT4_0);
2948 tmp.set_bit(wid-1, BIT4_1);
2949 vala = tmp;
2950 } else {
2951 long tmpa = (long) ap[0];
2952 long tmpb = (long) bp[0];
2953 long res = tmpa / tmpb;
2954 ap[0] = ((unsigned long)res) & ~sign_mask;
2955 vala.setarray(0, wid, ap);
2956 }
2957 delete[]ap;
2958 delete[]bp;
2959 return true;
2960 }
2961
2962 // We need to the actual division to positive integers. Make
2963 // them positive here, and remember the negations.
2964 bool negate_flag = false;
2965 if ( ((long) ap[words-1]) < 0 ) {
2966 negate_flag = true;
2967 negate_words(ap, words);
2968 }
2969 if ( ((long) bp[words-1]) < 0 ) {
2970 negate_flag ^= true;
2971 negate_words(bp, words);
2972 }
2973
2974 unsigned long*result = divide_bits(ap, bp, wid);
2975 if (result == 0) {
2976 delete[]ap;
2977 delete[]bp;
2978 vvp_vector4_t tmp(wid, BIT4_X);
2979 vala = tmp;
2980 return true;
2981 }
2982
2983 if (negate_flag) {
2984 negate_words(result, words);
2985 }
2986
2987 result[words-1] &= ~sign_mask;
2988
2989 vala.setarray(0, wid, result);
2990 delete[]ap;
2991 delete[]bp;
2992 delete[]result;
2993 return true;
2994 }
2995
of_DIV_WR(vthread_t thr,vvp_code_t)2996 bool of_DIV_WR(vthread_t thr, vvp_code_t)
2997 {
2998 double r = thr->pop_real();
2999 double l = thr->pop_real();
3000 thr->push_real(l / r);
3001
3002 return true;
3003 }
3004
of_DUP_REAL(vthread_t thr,vvp_code_t)3005 bool of_DUP_REAL(vthread_t thr, vvp_code_t)
3006 {
3007 thr->push_real(thr->peek_real(0));
3008 return true;
3009 }
3010
of_DUP_VEC4(vthread_t thr,vvp_code_t)3011 bool of_DUP_VEC4(vthread_t thr, vvp_code_t)
3012 {
3013 thr->push_vec4(thr->peek_vec4(0));
3014 return true;
3015 }
3016
3017 /*
3018 * This terminates the current thread. If there is a parent who is
3019 * waiting for me to die, then I schedule it. At any rate, I mark
3020 * myself as a zombie by setting my pc to 0.
3021 */
of_END(vthread_t thr,vvp_code_t)3022 bool of_END(vthread_t thr, vvp_code_t)
3023 {
3024 assert(! thr->waiting_for_event);
3025 thr->i_have_ended = 1;
3026 thr->pc = codespace_null();
3027
3028 /* Fully detach any detached children. */
3029 while (! thr->detached_children.empty()) {
3030 vthread_t child = *(thr->detached_children.begin());
3031 assert(child);
3032 assert(child->parent == thr);
3033 assert(child->i_am_detached);
3034 child->parent = 0;
3035 child->i_am_detached = 0;
3036 thr->detached_children.erase(thr->detached_children.begin());
3037 }
3038
3039 /* It is an error to still have active children running at this
3040 * point in time. They should have all been detached or joined. */
3041 assert(thr->children.empty());
3042
3043 /* If I have a parent who is waiting for me, then mark that I
3044 have ended, and schedule that parent. Also, finish the
3045 %join for the parent. */
3046 if (!thr->i_am_detached && thr->parent && thr->parent->i_am_joining) {
3047 vthread_t tmp = thr->parent;
3048 assert(! thr->i_am_detached);
3049
3050 tmp->i_am_joining = 0;
3051 schedule_vthread(tmp, 0, true);
3052 do_join(tmp, thr);
3053 return false;
3054 }
3055
3056 /* If this thread is not fully detached then remove it from the
3057 * parents detached_children set and reap it. */
3058 if (thr->i_am_detached) {
3059 vthread_t tmp = thr->parent;
3060 assert(tmp);
3061 size_t res = tmp->detached_children.erase(thr);
3062 assert(res == 1);
3063 /* If the parent is waiting for the detached children to
3064 * finish then the last detached child needs to tell the
3065 * parent to wake up when it is finished. */
3066 if (tmp->i_am_waiting && tmp->detached_children.empty()) {
3067 tmp->i_am_waiting = 0;
3068 schedule_vthread(tmp, 0, true);
3069 }
3070 /* Fully detach this thread so it will be reaped below. */
3071 thr->i_am_detached = 0;
3072 thr->parent = 0;
3073 }
3074
3075 /* If I have no parent, then no one can %join me and there is
3076 * no reason to stick around. This can happen, for example if
3077 * I am an ``initial'' thread. */
3078 if (thr->parent == 0) {
3079 vthread_reap(thr);
3080 return false;
3081 }
3082
3083 /* If I make it this far, then I have a parent who may wish
3084 to %join me. Remain a zombie so that it can. */
3085
3086 return false;
3087 }
3088
3089 /*
3090 * %event <var-label>
3091 */
of_EVENT(vthread_t thr,vvp_code_t cp)3092 bool of_EVENT(vthread_t thr, vvp_code_t cp)
3093 {
3094 vvp_net_ptr_t ptr (cp->net, 0);
3095 vvp_vector4_t tmp (1, BIT4_X);
3096 vvp_send_vec4(ptr, tmp, thr->wt_context);
3097 return true;
3098 }
3099
of_EVCTL(vthread_t thr,vvp_code_t cp)3100 bool of_EVCTL(vthread_t thr, vvp_code_t cp)
3101 {
3102 assert(thr->event == 0 && thr->ecount == 0);
3103 thr->event = cp->net;
3104 thr->ecount = thr->words[cp->bit_idx[0]].w_uint;
3105 return true;
3106 }
of_EVCTLC(vthread_t thr,vvp_code_t)3107 bool of_EVCTLC(vthread_t thr, vvp_code_t)
3108 {
3109 thr->event = 0;
3110 thr->ecount = 0;
3111 return true;
3112 }
3113
of_EVCTLI(vthread_t thr,vvp_code_t cp)3114 bool of_EVCTLI(vthread_t thr, vvp_code_t cp)
3115 {
3116 assert(thr->event == 0 && thr->ecount == 0);
3117 thr->event = cp->net;
3118 thr->ecount = cp->bit_idx[0];
3119 return true;
3120 }
3121
of_EVCTLS(vthread_t thr,vvp_code_t cp)3122 bool of_EVCTLS(vthread_t thr, vvp_code_t cp)
3123 {
3124 assert(thr->event == 0 && thr->ecount == 0);
3125 thr->event = cp->net;
3126 int64_t val = thr->words[cp->bit_idx[0]].w_int;
3127 if (val < 0) val = 0;
3128 thr->ecount = val;
3129 return true;
3130 }
3131
of_FLAG_GET_VEC4(vthread_t thr,vvp_code_t cp)3132 bool of_FLAG_GET_VEC4(vthread_t thr, vvp_code_t cp)
3133 {
3134 int flag = cp->number;
3135 assert(flag < vthread_s::FLAGS_COUNT);
3136
3137 vvp_vector4_t val (1, thr->flags[flag]);
3138 thr->push_vec4(val);
3139
3140 return true;
3141 }
3142
3143 /*
3144 * %flag_inv <flag1>
3145 */
of_FLAG_INV(vthread_t thr,vvp_code_t cp)3146 bool of_FLAG_INV(vthread_t thr, vvp_code_t cp)
3147 {
3148 int flag1 = cp->bit_idx[0];
3149
3150 thr->flags[flag1] = ~ thr->flags[flag1];
3151 return true;
3152 }
3153
3154 /*
3155 * %flag_mov <flag1>, <flag2>
3156 */
of_FLAG_MOV(vthread_t thr,vvp_code_t cp)3157 bool of_FLAG_MOV(vthread_t thr, vvp_code_t cp)
3158 {
3159 int flag1 = cp->bit_idx[0];
3160 int flag2 = cp->bit_idx[1];
3161
3162 thr->flags[flag1] = thr->flags[flag2];
3163 return true;
3164 }
3165
3166 /*
3167 * %flag_or <flag1>, <flag2>
3168 */
of_FLAG_OR(vthread_t thr,vvp_code_t cp)3169 bool of_FLAG_OR(vthread_t thr, vvp_code_t cp)
3170 {
3171 int flag1 = cp->bit_idx[0];
3172 int flag2 = cp->bit_idx[1];
3173
3174 thr->flags[flag1] = thr->flags[flag1] | thr->flags[flag2];
3175 return true;
3176 }
3177
of_FLAG_SET_IMM(vthread_t thr,vvp_code_t cp)3178 bool of_FLAG_SET_IMM(vthread_t thr, vvp_code_t cp)
3179 {
3180 int flag = cp->number;
3181 int vali = cp->bit_idx[0];
3182
3183 assert(flag < vthread_s::FLAGS_COUNT);
3184 assert(vali >= 0 && vali < 4);
3185
3186 static vvp_bit4_t map_bit[4] = {BIT4_0, BIT4_1, BIT4_Z, BIT4_X};
3187 thr->flags[flag] = map_bit[vali];
3188 return true;
3189 }
3190
of_FLAG_SET_VEC4(vthread_t thr,vvp_code_t cp)3191 bool of_FLAG_SET_VEC4(vthread_t thr, vvp_code_t cp)
3192 {
3193 int flag = cp->number;
3194 assert(flag < vthread_s::FLAGS_COUNT);
3195
3196 const vvp_vector4_t&val = thr->peek_vec4();
3197 thr->flags[flag] = val.value(0);
3198 thr->pop_vec4(1);
3199
3200 return true;
3201 }
3202
3203 /*
3204 * the %force/link instruction connects a source node to a
3205 * destination node. The destination node must be a signal, as it is
3206 * marked with the source of the force so that it may later be
3207 * unlinked without specifically knowing the source that this
3208 * instruction used.
3209 */
of_FORCE_LINK(vthread_t,vvp_code_t cp)3210 bool of_FORCE_LINK(vthread_t, vvp_code_t cp)
3211 {
3212 vvp_net_t*dst = cp->net;
3213 vvp_net_t*src = cp->net2;
3214
3215 assert(dst->fil);
3216 dst->fil->force_link(dst, src);
3217
3218 return true;
3219 }
3220
3221 /*
3222 * The %force/vec4 instruction invokes a force assign of a constant value
3223 * to a signal. The instruction arguments are:
3224 *
3225 * %force/vec4 <net> ;
3226 *
3227 * where the <net> is the net label assembled into a vvp_net pointer,
3228 * and the value to be forced is popped from the vec4 stack.\.
3229 *
3230 * The instruction writes a vvp_vector4_t value to port-2 of the
3231 * target signal.
3232 */
of_FORCE_VEC4(vthread_t thr,vvp_code_t cp)3233 bool of_FORCE_VEC4(vthread_t thr, vvp_code_t cp)
3234 {
3235 vvp_net_t*net = cp->net;
3236
3237 vvp_vector4_t value = thr->pop_vec4();
3238
3239 /* Send the force value to the filter on the node. */
3240
3241 assert(net->fil);
3242 if (value.size() != net->fil->filter_size())
3243 value = coerce_to_width(value, net->fil->filter_size());
3244
3245 net->force_vec4(value, vvp_vector2_t(vvp_vector2_t::FILL1, net->fil->filter_size()));
3246
3247 return true;
3248 }
3249
3250 /*
3251 * %force/vec4/off <net>, <off>
3252 */
of_FORCE_VEC4_OFF(vthread_t thr,vvp_code_t cp)3253 bool of_FORCE_VEC4_OFF(vthread_t thr, vvp_code_t cp)
3254 {
3255 vvp_net_t*net = cp->net;
3256 unsigned base_idx = cp->bit_idx[0];
3257 long base = thr->words[base_idx].w_int;
3258 vvp_vector4_t value = thr->pop_vec4();
3259 unsigned wid = value.size();
3260
3261 assert(net->fil);
3262
3263 if (thr->flags[4] == BIT4_1)
3264 return true;
3265
3266 // This is the width of the target vector.
3267 unsigned use_size = net->fil->filter_size();
3268
3269 if (base >= (long)use_size)
3270 return true;
3271 if (base < -(long)use_size)
3272 return true;
3273
3274 if ((base + wid) > use_size)
3275 wid = use_size - base;
3276
3277 // Make a mask of which bits are to be forced, 0 for unforced
3278 // bits and 1 for forced bits.
3279 vvp_vector2_t mask (vvp_vector2_t::FILL0, use_size);
3280 for (unsigned idx = 0 ; idx < wid ; idx += 1)
3281 mask.set_bit(base+idx, 1);
3282
3283 vvp_vector4_t tmp (use_size, BIT4_Z);
3284
3285 // vvp_net_t::force_vec4 propagates all the bits of the
3286 // forced vector value, regardless of the mask. This
3287 // ensures the unforced bits retain their current value.
3288 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*>(net->fil);
3289 assert(sig);
3290 sig->vec4_value(tmp);
3291
3292 tmp.set_vec(base, value);
3293
3294 net->force_vec4(tmp, mask);
3295 return true;
3296 }
3297
3298 /*
3299 * %force/vec4/off/d <net>, <off>, <del>
3300 */
of_FORCE_VEC4_OFF_D(vthread_t thr,vvp_code_t cp)3301 bool of_FORCE_VEC4_OFF_D(vthread_t thr, vvp_code_t cp)
3302 {
3303 vvp_net_t*net = cp->net;
3304
3305 unsigned base_idx = cp->bit_idx[0];
3306 long base = thr->words[base_idx].w_int;
3307
3308 unsigned delay_idx = cp->bit_idx[1];
3309 vvp_time64_t delay = thr->words[delay_idx].w_uint;
3310
3311 vvp_vector4_t value = thr->pop_vec4();
3312
3313 assert(net->fil);
3314
3315 if (thr->flags[4] == BIT4_1)
3316 return true;
3317
3318 // This is the width of the target vector.
3319 unsigned use_size = net->fil->filter_size();
3320
3321 if (base >= (long)use_size)
3322 return true;
3323 if (base < -(long)use_size)
3324 return true;
3325
3326 schedule_force_vector(net, base, use_size, value, delay);
3327 return true;
3328 }
3329
of_FORCE_WR(vthread_t thr,vvp_code_t cp)3330 bool of_FORCE_WR(vthread_t thr, vvp_code_t cp)
3331 {
3332 vvp_net_t*net = cp->net;
3333 double value = thr->pop_real();
3334
3335 net->force_real(value, vvp_vector2_t(vvp_vector2_t::FILL1, 1));
3336
3337 return true;
3338 }
3339
3340 /*
3341 * The %fork instruction causes a new child to be created and pushed
3342 * in front of any existing child. This causes the new child to be
3343 * added to the list of children, and for me to be the parent of the
3344 * new child.
3345 */
of_FORK(vthread_t thr,vvp_code_t cp)3346 bool of_FORK(vthread_t thr, vvp_code_t cp)
3347 {
3348 vthread_t child = vthread_new(cp->cptr2, cp->scope);
3349
3350 if (cp->scope->is_automatic()) {
3351 /* The context allocated for this child is the top entry
3352 on the write context stack. */
3353 child->wt_context = thr->wt_context;
3354 child->rd_context = thr->wt_context;
3355 }
3356
3357 child->parent = thr;
3358 thr->children.insert(child);
3359
3360 if (thr->i_am_in_function) {
3361 child->is_scheduled = 1;
3362 child->i_am_in_function = 1;
3363 vthread_run(child);
3364 running_thread = thr;
3365 } else {
3366 schedule_vthread(child, 0, true);
3367 }
3368 return true;
3369 }
3370
of_FREE(vthread_t thr,vvp_code_t cp)3371 bool of_FREE(vthread_t thr, vvp_code_t cp)
3372 {
3373 /* Pop the child context from the read context stack. */
3374 vvp_context_t child_context = thr->rd_context;
3375 thr->rd_context = vvp_get_stacked_context(child_context);
3376
3377 /* Free the context. */
3378 vthread_free_context(child_context, cp->scope);
3379
3380 return true;
3381 }
3382
3383 /*
3384 * %inv
3385 *
3386 * Logically, this pops a value, inverts is (Verilog style, with Z and
3387 * X converted to X) and pushes the result. We can more efficiently
3388 * just to the invert in place.
3389 */
of_INV(vthread_t thr,vvp_code_t)3390 bool of_INV(vthread_t thr, vvp_code_t)
3391 {
3392 vvp_vector4_t&val = thr->peek_vec4();
3393 val.invert();
3394 return true;
3395 }
3396
3397
3398 /*
3399 * Index registers, arithmetic.
3400 */
3401
get_as_64_bit(uint32_t low_32,uint32_t high_32)3402 static inline int64_t get_as_64_bit(uint32_t low_32, uint32_t high_32)
3403 {
3404 int64_t low = low_32;
3405 int64_t res = high_32;
3406
3407 res <<= 32;
3408 res |= low;
3409 return res;
3410 }
3411
of_IX_ADD(vthread_t thr,vvp_code_t cp)3412 bool of_IX_ADD(vthread_t thr, vvp_code_t cp)
3413 {
3414 thr->words[cp->number].w_int += get_as_64_bit(cp->bit_idx[0],
3415 cp->bit_idx[1]);
3416 return true;
3417 }
3418
of_IX_SUB(vthread_t thr,vvp_code_t cp)3419 bool of_IX_SUB(vthread_t thr, vvp_code_t cp)
3420 {
3421 thr->words[cp->number].w_int -= get_as_64_bit(cp->bit_idx[0],
3422 cp->bit_idx[1]);
3423 return true;
3424 }
3425
of_IX_MUL(vthread_t thr,vvp_code_t cp)3426 bool of_IX_MUL(vthread_t thr, vvp_code_t cp)
3427 {
3428 thr->words[cp->number].w_int *= get_as_64_bit(cp->bit_idx[0],
3429 cp->bit_idx[1]);
3430 return true;
3431 }
3432
of_IX_LOAD(vthread_t thr,vvp_code_t cp)3433 bool of_IX_LOAD(vthread_t thr, vvp_code_t cp)
3434 {
3435 thr->words[cp->number].w_int = get_as_64_bit(cp->bit_idx[0],
3436 cp->bit_idx[1]);
3437 return true;
3438 }
3439
of_IX_MOV(vthread_t thr,vvp_code_t cp)3440 bool of_IX_MOV(vthread_t thr, vvp_code_t cp)
3441 {
3442 thr->words[cp->bit_idx[0]].w_int = thr->words[cp->bit_idx[1]].w_int;
3443 return true;
3444 }
3445
of_IX_GETV(vthread_t thr,vvp_code_t cp)3446 bool of_IX_GETV(vthread_t thr, vvp_code_t cp)
3447 {
3448 unsigned index = cp->bit_idx[0];
3449 vvp_net_t*net = cp->net;
3450
3451 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*>(net->fil);
3452 if (sig == 0) {
3453 assert(net->fil);
3454 cerr << thr->get_fileline()
3455 << "%%ix/getv error: Net arg not a vector signal? "
3456 << typeid(*net->fil).name() << endl;
3457 }
3458 assert(sig);
3459
3460 vvp_vector4_t vec;
3461 sig->vec4_value(vec);
3462 bool overflow_flag;
3463 uint64_t val;
3464 bool known_flag = vector4_to_value(vec, overflow_flag, val);
3465
3466 if (known_flag)
3467 thr->words[index].w_uint = val;
3468 else
3469 thr->words[index].w_uint = 0;
3470
3471 /* Set bit 4 as a flag if the input is unknown. */
3472 thr->flags[4] = known_flag ? (overflow_flag ? BIT4_X : BIT4_0) : BIT4_1;
3473
3474 return true;
3475 }
3476
of_IX_GETV_S(vthread_t thr,vvp_code_t cp)3477 bool of_IX_GETV_S(vthread_t thr, vvp_code_t cp)
3478 {
3479 unsigned index = cp->bit_idx[0];
3480 vvp_net_t*net = cp->net;
3481
3482 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*>(net->fil);
3483 if (sig == 0) {
3484 assert(net->fil);
3485 cerr << thr->get_fileline()
3486 << "%%ix/getv/s error: Net arg not a vector signal? "
3487 << "fun=" << typeid(*net->fil).name()
3488 << ", fil=" << (net->fil? typeid(*net->fil).name() : "<>")
3489 << endl;
3490 }
3491 assert(sig);
3492
3493 vvp_vector4_t vec;
3494 sig->vec4_value(vec);
3495 int64_t val;
3496 bool known_flag = vector4_to_value(vec, val, true, true);
3497
3498 if (known_flag)
3499 thr->words[index].w_int = val;
3500 else
3501 thr->words[index].w_int = 0;
3502
3503 /* Set bit 4 as a flag if the input is unknown. */
3504 thr->flags[4] = known_flag? BIT4_0 : BIT4_1;
3505
3506 return true;
3507 }
3508
vec4_to_index(vthread_t thr,bool signed_flag)3509 static uint64_t vec4_to_index(vthread_t thr, bool signed_flag)
3510 {
3511 // Get all the information we need about the vec4 vector, then
3512 // pop it away. We only need the bool bits and the length.
3513 const vvp_vector4_t&val = thr->peek_vec4();
3514 unsigned val_size = val.size();
3515 unsigned long*bits = val.subarray(0, val_size, false);
3516 thr->pop_vec4(1);
3517
3518 // If there are X/Z bits, then the subarray will give us a nil
3519 // pointer. Set a flag to indicate the error, and give up.
3520 if (bits == 0) {
3521 thr->flags[4] = BIT4_1;
3522 return 0;
3523 }
3524
3525 uint64_t v = 0;
3526 thr->flags[4] = BIT4_0;
3527
3528 assert(sizeof(bits[0]) <= sizeof(v));
3529
3530 v = 0;
3531 for (unsigned idx = 0 ; idx < val_size ; idx += 8*sizeof(bits[0])) {
3532 uint64_t tmp = bits[idx/8/sizeof(bits[0])];
3533 if (idx < 8*sizeof(v)) {
3534 v |= tmp << idx;
3535 } else {
3536 bool overflow = signed_flag && (v >> 63) ? ~tmp != 0 : tmp != 0;
3537 if (overflow) {
3538 thr->flags[4] = BIT4_X;
3539 break;
3540 }
3541 }
3542 }
3543
3544 // Set the high bits that are not necessarily filled in by the
3545 // subarray function.
3546 if (val_size < 8*sizeof(v)) {
3547 if (signed_flag && (v & (static_cast<uint64_t>(1)<<(val_size-1)))) {
3548 // Propagate the sign bit...
3549 v |= (~static_cast<uint64_t>(0)) << val_size;
3550
3551 } else {
3552 // Fill with zeros.
3553 v &= ~((~static_cast<uint64_t>(0)) << val_size);
3554 }
3555
3556 }
3557
3558 delete[]bits;
3559 return v;
3560 }
3561
3562 /*
3563 * %ix/vec4 <idx>
3564 */
of_IX_VEC4(vthread_t thr,vvp_code_t cp)3565 bool of_IX_VEC4(vthread_t thr, vvp_code_t cp)
3566 {
3567 unsigned use_idx = cp->number;
3568 thr->words[use_idx].w_uint = vec4_to_index(thr, false);
3569 return true;
3570 }
3571
3572 /*
3573 * %ix/vec4/s <idx>
3574 */
of_IX_VEC4_S(vthread_t thr,vvp_code_t cp)3575 bool of_IX_VEC4_S(vthread_t thr, vvp_code_t cp)
3576 {
3577 unsigned use_idx = cp->number;
3578 thr->words[use_idx].w_uint = vec4_to_index(thr, true);
3579 return true;
3580 }
3581
3582 /*
3583 * The various JMP instruction work simply by pulling the new program
3584 * counter from the instruction and resuming. If the jump is
3585 * conditional, then test the bit for the expected value first.
3586 */
of_JMP(vthread_t thr,vvp_code_t cp)3587 bool of_JMP(vthread_t thr, vvp_code_t cp)
3588 {
3589 thr->pc = cp->cptr;
3590
3591 /* Normally, this returns true so that the processor just
3592 keeps going to the next instruction. However, if there was
3593 a $stop or vpiStop, returning false here can break the
3594 simulation out of a hung loop. */
3595 if (schedule_stopped()) {
3596 schedule_vthread(thr, 0, false);
3597 return false;
3598 }
3599
3600 return true;
3601 }
3602
3603 /*
3604 * %jmp/0 <pc>, <flag>
3605 */
of_JMP0(vthread_t thr,vvp_code_t cp)3606 bool of_JMP0(vthread_t thr, vvp_code_t cp)
3607 {
3608 if (thr->flags[cp->bit_idx[0]] == BIT4_0)
3609 thr->pc = cp->cptr;
3610
3611 /* Normally, this returns true so that the processor just
3612 keeps going to the next instruction. However, if there was
3613 a $stop or vpiStop, returning false here can break the
3614 simulation out of a hung loop. */
3615 if (schedule_stopped()) {
3616 schedule_vthread(thr, 0, false);
3617 return false;
3618 }
3619
3620 return true;
3621 }
3622
3623 /*
3624 * %jmp/0xz <pc>, <flag>
3625 */
of_JMP0XZ(vthread_t thr,vvp_code_t cp)3626 bool of_JMP0XZ(vthread_t thr, vvp_code_t cp)
3627 {
3628 if (thr->flags[cp->bit_idx[0]] != BIT4_1)
3629 thr->pc = cp->cptr;
3630
3631 /* Normally, this returns true so that the processor just
3632 keeps going to the next instruction. However, if there was
3633 a $stop or vpiStop, returning false here can break the
3634 simulation out of a hung loop. */
3635 if (schedule_stopped()) {
3636 schedule_vthread(thr, 0, false);
3637 return false;
3638 }
3639
3640 return true;
3641 }
3642
3643 /*
3644 * %jmp/1 <pc>, <flag>
3645 */
of_JMP1(vthread_t thr,vvp_code_t cp)3646 bool of_JMP1(vthread_t thr, vvp_code_t cp)
3647 {
3648 if (thr->flags[cp->bit_idx[0]] == BIT4_1)
3649 thr->pc = cp->cptr;
3650
3651 /* Normally, this returns true so that the processor just
3652 keeps going to the next instruction. However, if there was
3653 a $stop or vpiStop, returning false here can break the
3654 simulation out of a hung loop. */
3655 if (schedule_stopped()) {
3656 schedule_vthread(thr, 0, false);
3657 return false;
3658 }
3659
3660 return true;
3661 }
3662
3663 /*
3664 * %jmp/1xz <pc>, <flag>
3665 */
of_JMP1XZ(vthread_t thr,vvp_code_t cp)3666 bool of_JMP1XZ(vthread_t thr, vvp_code_t cp)
3667 {
3668 if (thr->flags[cp->bit_idx[0]] != BIT4_0)
3669 thr->pc = cp->cptr;
3670
3671 /* Normally, this returns true so that the processor just
3672 keeps going to the next instruction. However, if there was
3673 a $stop or vpiStop, returning false here can break the
3674 simulation out of a hung loop. */
3675 if (schedule_stopped()) {
3676 schedule_vthread(thr, 0, false);
3677 return false;
3678 }
3679
3680 return true;
3681 }
3682
3683 /*
3684 * The %join instruction causes the thread to wait for one child
3685 * to die. If a child is already dead (and a zombie) then I reap
3686 * it and go on. Otherwise, I mark myself as waiting in a join so that
3687 * children know to wake me when they finish.
3688 */
3689
do_join(vthread_t thr,vthread_t child)3690 static void do_join(vthread_t thr, vthread_t child)
3691 {
3692 assert(child->parent == thr);
3693
3694 /* If the immediate child thread is in an automatic scope... */
3695 if (child->wt_context) {
3696 /* and is the top level task/function thread... */
3697 if (thr->wt_context != thr->rd_context) {
3698 /* Pop the child context from the write context stack. */
3699 vvp_context_t child_context = thr->wt_context;
3700 thr->wt_context = vvp_get_stacked_context(child_context);
3701
3702 /* Push the child context onto the read context stack */
3703 vvp_set_stacked_context(child_context, thr->rd_context);
3704 thr->rd_context = child_context;
3705 }
3706 }
3707
3708 vthread_reap(child);
3709 }
3710
do_join_opcode(vthread_t thr)3711 static bool do_join_opcode(vthread_t thr)
3712 {
3713 assert( !thr->i_am_joining );
3714 assert( !thr->children.empty());
3715
3716 // Are there any children that have already ended? If so, then
3717 // join with that one.
3718 for (set<vthread_t>::iterator cur = thr->children.begin()
3719 ; cur != thr->children.end() ; ++cur) {
3720 vthread_t curp = *cur;
3721 if (! curp->i_have_ended)
3722 continue;
3723
3724 // found something!
3725 do_join(thr, curp);
3726 return true;
3727 }
3728
3729 // Otherwise, tell my children to awaken me when they end,
3730 // then pause.
3731 thr->i_am_joining = 1;
3732 return false;
3733 }
3734
of_JOIN(vthread_t thr,vvp_code_t)3735 bool of_JOIN(vthread_t thr, vvp_code_t)
3736 {
3737 return do_join_opcode(thr);
3738 }
3739
3740 /*
3741 * This %join/detach <n> instruction causes the thread to detach
3742 * threads that were created by an earlier %fork.
3743 */
of_JOIN_DETACH(vthread_t thr,vvp_code_t cp)3744 bool of_JOIN_DETACH(vthread_t thr, vvp_code_t cp)
3745 {
3746 unsigned long count = cp->number;
3747
3748 assert(count == thr->children.size());
3749
3750 while (! thr->children.empty()) {
3751 vthread_t child = *thr->children.begin();
3752 assert(child->parent == thr);
3753
3754 // We cannot detach automatic tasks/functions within an
3755 // automatic scope. If we try to do that, we might make
3756 // a mess of the allocation of the context. Note that it
3757 // is OK if the child context is distinct (See %exec_ufunc.)
3758 assert(child->wt_context==0 || thr->wt_context!=child->wt_context);
3759 if (child->i_have_ended) {
3760 // If the child has already ended, then reap it.
3761 vthread_reap(child);
3762
3763 } else {
3764 size_t res = child->parent->children.erase(child);
3765 assert(res == 1);
3766 child->i_am_detached = 1;
3767 thr->detached_children.insert(child);
3768 }
3769 }
3770
3771 return true;
3772 }
3773
3774 /*
3775 * %load/ar <array-label>, <index>;
3776 */
of_LOAD_AR(vthread_t thr,vvp_code_t cp)3777 bool of_LOAD_AR(vthread_t thr, vvp_code_t cp)
3778 {
3779 unsigned idx = cp->bit_idx[0];
3780 unsigned adr = thr->words[idx].w_int;
3781 double word;
3782
3783 /* The result is 0.0 if the address is undefined. */
3784 if (thr->flags[4] == BIT4_1) {
3785 word = 0.0;
3786 } else {
3787 word = cp->array->get_word_r(adr);
3788 }
3789
3790 thr->push_real(word);
3791 return true;
3792 }
3793
3794 template <typename ELEM>
load_dar(vthread_t thr,vvp_code_t cp)3795 static bool load_dar(vthread_t thr, vvp_code_t cp)
3796 {
3797 int64_t adr = thr->words[3].w_int;
3798 vvp_net_t*net = cp->net;
3799 assert(net);
3800
3801 vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
3802 assert(obj);
3803
3804 vvp_darray*darray = obj->get_object().peek<vvp_darray>();
3805
3806 ELEM word;
3807 if (darray &&
3808 (adr >= 0) && (thr->flags[4] == BIT4_0)) // A defined address >= 0
3809 darray->get_word(adr, word);
3810 else
3811 dq_default(word, obj->size());
3812
3813 vthread_push(thr, word);
3814 return true;
3815 }
3816
3817 /*
3818 * %load/dar/r <array-label>;
3819 */
of_LOAD_DAR_R(vthread_t thr,vvp_code_t cp)3820 bool of_LOAD_DAR_R(vthread_t thr, vvp_code_t cp)
3821 {
3822 return load_dar<double>(thr, cp);
3823 }
3824
3825 /*
3826 * %load/dar/str <array-label>;
3827 */
of_LOAD_DAR_STR(vthread_t thr,vvp_code_t cp)3828 bool of_LOAD_DAR_STR(vthread_t thr, vvp_code_t cp)
3829 {
3830 return load_dar<string>(thr, cp);
3831 }
3832
3833 /*
3834 * %load/dar/vec4 <array-label>;
3835 */
of_LOAD_DAR_VEC4(vthread_t thr,vvp_code_t cp)3836 bool of_LOAD_DAR_VEC4(vthread_t thr, vvp_code_t cp)
3837 {
3838 return load_dar<vvp_vector4_t>(thr, cp);
3839 }
3840
3841 /*
3842 * %load/obj <var-label>
3843 */
of_LOAD_OBJ(vthread_t thr,vvp_code_t cp)3844 bool of_LOAD_OBJ(vthread_t thr, vvp_code_t cp)
3845 {
3846 vvp_net_t*net = cp->net;
3847 vvp_fun_signal_object*fun = dynamic_cast<vvp_fun_signal_object*> (net->fun);
3848 assert(fun);
3849
3850 vvp_object_t val = fun->get_object();
3851 thr->push_object(val);
3852
3853 return true;
3854 }
3855
3856 /*
3857 * %load/obja <index>
3858 * Loads the object from array, using index <index> as the index
3859 * value. If flags[4] == 1, the calculation of <index> may have
3860 * failed, so push nil.
3861 */
of_LOAD_OBJA(vthread_t thr,vvp_code_t cp)3862 bool of_LOAD_OBJA(vthread_t thr, vvp_code_t cp)
3863 {
3864 unsigned idx = cp->bit_idx[0];
3865 unsigned adr = thr->words[idx].w_int;
3866 vvp_object_t word;
3867
3868 /* The result is 0.0 if the address is undefined. */
3869 if (thr->flags[4] == BIT4_1) {
3870 ; // Return nil
3871 } else {
3872 cp->array->get_word_obj(adr, word);
3873 }
3874
3875 thr->push_object(word);
3876 return true;
3877 }
3878
3879 /*
3880 * %load/real <var-label>
3881 */
of_LOAD_REAL(vthread_t thr,vvp_code_t cp)3882 bool of_LOAD_REAL(vthread_t thr, vvp_code_t cp)
3883 {
3884 __vpiHandle*tmp = cp->handle;
3885 t_vpi_value val;
3886
3887 val.format = vpiRealVal;
3888 vpi_get_value(tmp, &val);
3889
3890 thr->push_real(val.value.real);
3891
3892 return true;
3893 }
3894
3895 /*
3896 * %load/str <var-label>
3897 */
of_LOAD_STR(vthread_t thr,vvp_code_t cp)3898 bool of_LOAD_STR(vthread_t thr, vvp_code_t cp)
3899 {
3900 vvp_net_t*net = cp->net;
3901
3902
3903 vvp_fun_signal_string*fun = dynamic_cast<vvp_fun_signal_string*> (net->fun);
3904 assert(fun);
3905
3906 const string&val = fun->get_string();
3907 thr->push_str(val);
3908
3909 return true;
3910 }
3911
of_LOAD_STRA(vthread_t thr,vvp_code_t cp)3912 bool of_LOAD_STRA(vthread_t thr, vvp_code_t cp)
3913 {
3914 unsigned idx = cp->bit_idx[0];
3915 unsigned adr = thr->words[idx].w_int;
3916 string word;
3917
3918 if (thr->flags[4] == BIT4_1) {
3919 word = "";
3920 } else {
3921 word = cp->array->get_word_str(adr);
3922 }
3923
3924 thr->push_str(word);
3925 return true;
3926 }
3927
3928
3929 /*
3930 * %load/vec4 <net>
3931 */
of_LOAD_VEC4(vthread_t thr,vvp_code_t cp)3932 bool of_LOAD_VEC4(vthread_t thr, vvp_code_t cp)
3933 {
3934 // Push a placeholder onto the stack in order to reserve the
3935 // stack space. Use a reference for the stack top as a target
3936 // for the load.
3937 thr->push_vec4(vvp_vector4_t());
3938 vvp_vector4_t&sig_value = thr->peek_vec4();
3939
3940 vvp_net_t*net = cp->net;
3941
3942 // For the %load to work, the functor must actually be a
3943 // signal functor. Only signals save their vector value.
3944 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (net->fil);
3945 if (sig == 0) {
3946 cerr << thr->get_fileline()
3947 << "%load/v error: Net arg not a signal? "
3948 << (net->fil ? typeid(*net->fil).name() : typeid(*net->fun).name()) << endl;
3949 assert(sig);
3950 }
3951
3952 // Extract the value from the signal and directly into the
3953 // target stack position.
3954 sig->vec4_value(sig_value);
3955
3956 return true;
3957 }
3958
3959 /*
3960 * %load/vec4a <arr>, <adrx>
3961 */
of_LOAD_VEC4A(vthread_t thr,vvp_code_t cp)3962 bool of_LOAD_VEC4A(vthread_t thr, vvp_code_t cp)
3963 {
3964 int adr_index = cp->bit_idx[0];
3965
3966 long adr = thr->words[adr_index].w_int;
3967
3968 // If flag[3] is set, then the calculation of the address
3969 // failed, and this load should return X instead of the actual
3970 // value.
3971 if (thr->flags[4] == BIT4_1) {
3972 vvp_vector4_t tmp (cp->array->get_word_size(), BIT4_X);
3973 thr->push_vec4(tmp);
3974 return true;
3975 }
3976
3977 vvp_vector4_t tmp (cp->array->get_word(adr));
3978 thr->push_vec4(tmp);
3979 return true;
3980 }
3981
do_verylong_mod(vvp_vector4_t & vala,const vvp_vector4_t & valb,bool left_is_neg,bool right_is_neg)3982 static void do_verylong_mod(vvp_vector4_t&vala, const vvp_vector4_t&valb,
3983 bool left_is_neg, bool right_is_neg)
3984 {
3985 bool out_is_neg = left_is_neg;
3986 const int len=vala.size();
3987 unsigned char *a, *z, *t;
3988 a = new unsigned char[len+1];
3989 z = new unsigned char[len+1];
3990 t = new unsigned char[len+1];
3991
3992 unsigned char carry;
3993 unsigned char temp;
3994
3995 int mxa = -1, mxz = -1;
3996 int i;
3997 int current, copylen;
3998
3999 unsigned lb_carry = left_is_neg? 1 : 0;
4000 unsigned rb_carry = right_is_neg? 1 : 0;
4001 for (int idx = 0 ; idx < len ; idx += 1) {
4002 unsigned lb = vala.value(idx);
4003 unsigned rb = valb.value(idx);
4004
4005 if ((lb | rb) & 2) {
4006 delete []t;
4007 delete []z;
4008 delete []a;
4009 vvp_vector4_t tmp(len, BIT4_X);
4010 vala = tmp;
4011 return;
4012 }
4013
4014 if (left_is_neg) {
4015 lb = (1-lb) + lb_carry;
4016 lb_carry = (lb & ~1)? 1 : 0;
4017 lb &= 1;
4018 }
4019 if (right_is_neg) {
4020 rb = (1-rb) + rb_carry;
4021 rb_carry = (rb & ~1)? 1 : 0;
4022 rb &= 1;
4023 }
4024
4025 z[idx]=lb;
4026 a[idx]=1-rb; // for 2s complement add..
4027 }
4028
4029 z[len]=0;
4030 a[len]=1;
4031
4032 for(i=len-1;i>=0;i--) {
4033 if(! a[i]) {
4034 mxa=i;
4035 break;
4036 }
4037 }
4038
4039 for(i=len-1;i>=0;i--) {
4040 if(z[i]) {
4041 mxz=i;
4042 break;
4043 }
4044 }
4045
4046 if((mxa>mxz)||(mxa==-1)) {
4047 if(mxa==-1) {
4048 delete []t;
4049 delete []z;
4050 delete []a;
4051 vvp_vector4_t tmpx (len, BIT4_X);
4052 vala = tmpx;
4053 return;
4054 }
4055
4056 goto tally;
4057 }
4058
4059 copylen = mxa + 2;
4060 current = mxz - mxa;
4061
4062 while(current > -1) {
4063 carry = 1;
4064 for(i=0;i<copylen;i++) {
4065 temp = z[i+current] + a[i] + carry;
4066 t[i] = (temp&1);
4067 carry = (temp>>1);
4068 }
4069
4070 if(carry) {
4071 for(i=0;i<copylen;i++) {
4072 z[i+current] = t[i];
4073 }
4074 }
4075
4076 current--;
4077 }
4078
4079 tally:
4080
4081 vvp_vector4_t tmp (len, BIT4_X);
4082 carry = out_is_neg? 1 : 0;
4083 for (int idx = 0 ; idx < len ; idx += 1) {
4084 unsigned ob = z[idx];
4085 if (out_is_neg) {
4086 ob = (1-ob) + carry;
4087 carry = (ob & ~1)? 1 : 0;
4088 ob = ob & 1;
4089 }
4090 tmp.set_bit(idx, ob?BIT4_1:BIT4_0);
4091 }
4092 vala = tmp;
4093 delete []t;
4094 delete []z;
4095 delete []a;
4096 }
4097
of_MAX_WR(vthread_t thr,vvp_code_t)4098 bool of_MAX_WR(vthread_t thr, vvp_code_t)
4099 {
4100 double r = thr->pop_real();
4101 double l = thr->pop_real();
4102 if (r != r)
4103 thr->push_real(l);
4104 else if (l != l)
4105 thr->push_real(r);
4106 else if (r < l)
4107 thr->push_real(l);
4108 else
4109 thr->push_real(r);
4110 return true;
4111 }
4112
of_MIN_WR(vthread_t thr,vvp_code_t)4113 bool of_MIN_WR(vthread_t thr, vvp_code_t)
4114 {
4115 double r = thr->pop_real();
4116 double l = thr->pop_real();
4117 if (r != r)
4118 thr->push_real(l);
4119 else if (l != l)
4120 thr->push_real(r);
4121 else if (r < l)
4122 thr->push_real(r);
4123 else
4124 thr->push_real(l);
4125 return true;
4126 }
4127
of_MOD(vthread_t thr,vvp_code_t)4128 bool of_MOD(vthread_t thr, vvp_code_t)
4129 {
4130 vvp_vector4_t valb = thr->pop_vec4();
4131 vvp_vector4_t&vala = thr->peek_vec4();
4132
4133 assert(vala.size()==valb.size());
4134 unsigned wid = vala.size();
4135
4136 if(wid <= 8*sizeof(unsigned long long)) {
4137 unsigned long long lv = 0, rv = 0;
4138
4139 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
4140 unsigned long long lb = vala.value(idx);
4141 unsigned long long rb = valb.value(idx);
4142
4143 if ((lb | rb) & 2)
4144 goto x_out;
4145
4146 lv |= (unsigned long long) lb << idx;
4147 rv |= (unsigned long long) rb << idx;
4148 }
4149
4150 if (rv == 0)
4151 goto x_out;
4152
4153 lv %= rv;
4154
4155 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
4156 vala.set_bit(idx, (lv&1)?BIT4_1 : BIT4_0);
4157 lv >>= 1;
4158 }
4159
4160 return true;
4161
4162 } else {
4163 do_verylong_mod(vala, valb, false, false);
4164 return true;
4165 }
4166
4167 x_out:
4168 vala = vvp_vector4_t(wid, BIT4_X);
4169 return true;
4170 }
4171
4172 /*
4173 * %mod/s
4174 */
of_MOD_S(vthread_t thr,vvp_code_t)4175 bool of_MOD_S(vthread_t thr, vvp_code_t)
4176 {
4177 vvp_vector4_t valb = thr->pop_vec4();
4178 vvp_vector4_t&vala = thr->peek_vec4();
4179
4180 assert(vala.size()==valb.size());
4181 unsigned wid = vala.size();
4182
4183 /* Handle the case that we can fit the bits into a long-long
4184 variable. We cause use native % to do the work. */
4185 if(wid <= 8*sizeof(long long)) {
4186 long long lv = 0, rv = 0;
4187
4188 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
4189 long long lb = vala.value(idx);
4190 long long rb = valb.value(idx);
4191
4192 if ((lb | rb) & 2)
4193 goto x_out;
4194
4195 lv |= (long long) lb << idx;
4196 rv |= (long long) rb << idx;
4197 }
4198
4199 if (rv == 0)
4200 goto x_out;
4201
4202 if ((lv == LLONG_MIN) && (rv == -1))
4203 goto zero_out;
4204
4205 /* Sign extend the signed operands when needed. */
4206 if (wid < 8*sizeof(long long)) {
4207 if (lv & (1LL << (wid-1)))
4208 lv |= -1ULL << wid;
4209 if (rv & (1LL << (wid-1)))
4210 rv |= -1ULL << wid;
4211 }
4212
4213 lv %= rv;
4214
4215 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
4216 vala.set_bit(idx, (lv&1)? BIT4_1 : BIT4_0);
4217 lv >>= 1;
4218 }
4219
4220 // vala is the top of the stack, edited in place, so we
4221 // do not need to push the result.
4222
4223 return true;
4224
4225 } else {
4226
4227 bool left_is_neg = vala.value(vala.size()-1) == BIT4_1;
4228 bool right_is_neg = valb.value(valb.size()-1) == BIT4_1;
4229 do_verylong_mod(vala, valb, left_is_neg, right_is_neg);
4230 return true;
4231 }
4232
4233 x_out:
4234 vala = vvp_vector4_t(wid, BIT4_X);
4235 return true;
4236 zero_out:
4237 vala = vvp_vector4_t(wid, BIT4_0);
4238 return true;
4239 }
4240
4241 /*
4242 * %mod/wr
4243 */
of_MOD_WR(vthread_t thr,vvp_code_t)4244 bool of_MOD_WR(vthread_t thr, vvp_code_t)
4245 {
4246 double r = thr->pop_real();
4247 double l = thr->pop_real();
4248 thr->push_real(fmod(l,r));
4249
4250 return true;
4251 }
4252
4253 /*
4254 * %pad/s <wid>
4255 */
of_PAD_S(vthread_t thr,vvp_code_t cp)4256 bool of_PAD_S(vthread_t thr, vvp_code_t cp)
4257 {
4258 unsigned wid = cp->number;
4259
4260 vvp_vector4_t&val = thr->peek_vec4();
4261 unsigned old_size = val.size();
4262
4263 // Sign-extend.
4264 if (old_size < wid)
4265 val.resize(wid, val.value(old_size-1));
4266 else
4267 val.resize(wid);
4268
4269 return true;
4270 }
4271
4272 /*
4273 * %pad/u <wid>
4274 */
of_PAD_U(vthread_t thr,vvp_code_t cp)4275 bool of_PAD_U(vthread_t thr, vvp_code_t cp)
4276 {
4277 unsigned wid = cp->number;
4278
4279 vvp_vector4_t&val = thr->peek_vec4();
4280 val.resize(wid, BIT4_0);
4281
4282 return true;
4283 }
4284
4285 /*
4286 * %part/s <wid>
4287 * %part/u <wid>
4288 * Two values are popped from the stack. First, pop the canonical
4289 * index of the part select, and second is the value to be
4290 * selected. The result is pushed back to the stack.
4291 */
of_PART_base(vthread_t thr,vvp_code_t cp,bool signed_flag)4292 static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
4293 {
4294 unsigned wid = cp->number;
4295
4296 vvp_vector4_t base4 = thr->pop_vec4();
4297 vvp_vector4_t&value = thr->peek_vec4();
4298
4299 vvp_vector4_t res (wid, BIT4_X);
4300
4301 // NOTE: This is treating the vector as signed. Is that correct?
4302 int32_t base;
4303 bool value_ok = vector4_to_value(base4, base, signed_flag);
4304 if (! value_ok) {
4305 value = res;
4306 return true;
4307 }
4308
4309 if (base >= (int32_t)value.size()) {
4310 value = res;
4311 return true;
4312 }
4313
4314 if ((base+(int)wid) <= 0) {
4315 value = res;
4316 return true;
4317 }
4318
4319 long vbase = 0;
4320 if (base < 0) {
4321 vbase = -base;
4322 wid -= vbase;
4323 base = 0;
4324 }
4325
4326 if ((base+wid) > value.size()) {
4327 wid = value.size() - base;
4328 }
4329
4330 res .set_vec(vbase, value.subvalue(base, wid));
4331 value = res;
4332
4333 return true;
4334 }
4335
of_PART_S(vthread_t thr,vvp_code_t cp)4336 bool of_PART_S(vthread_t thr, vvp_code_t cp)
4337 {
4338 return of_PART_base(thr, cp, true);
4339 }
4340
of_PART_U(vthread_t thr,vvp_code_t cp)4341 bool of_PART_U(vthread_t thr, vvp_code_t cp)
4342 {
4343 return of_PART_base(thr, cp, false);
4344 }
4345
4346 /*
4347 * %parti/s <wid>, <basei>, <base_wid>
4348 * %parti/u <wid>, <basei>, <base_wid>
4349 *
4350 * Pop the value to be selected. The result is pushed back to the stack.
4351 */
of_PARTI_base(vthread_t thr,vvp_code_t cp,bool signed_flag)4352 static bool of_PARTI_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
4353 {
4354 unsigned wid = cp->number;
4355 uint32_t base = cp->bit_idx[0];
4356 uint32_t bwid = cp->bit_idx[1];
4357
4358 vvp_vector4_t&value = thr->peek_vec4();
4359
4360 vvp_vector4_t res (wid, BIT4_X);
4361
4362 // NOTE: This is treating the vector as signed. Is that correct?
4363 int32_t use_base = base;
4364 if (signed_flag && bwid < 32 && (base&(1<<(bwid-1)))) {
4365 use_base |= -1UL << bwid;
4366 }
4367
4368 if (use_base >= (int32_t)value.size()) {
4369 value = res;
4370 return true;
4371 }
4372
4373 if ((use_base+(int32_t)wid) <= 0) {
4374 value = res;
4375 return true;
4376 }
4377
4378 long vbase = 0;
4379 if (use_base < 0) {
4380 vbase = -use_base;
4381 wid -= vbase;
4382 use_base = 0;
4383 }
4384
4385 if ((use_base+wid) > value.size()) {
4386 wid = value.size() - use_base;
4387 }
4388
4389 res .set_vec(vbase, value.subvalue(use_base, wid));
4390 value = res;
4391
4392 return true;
4393 }
4394
of_PARTI_S(vthread_t thr,vvp_code_t cp)4395 bool of_PARTI_S(vthread_t thr, vvp_code_t cp)
4396 {
4397 return of_PARTI_base(thr, cp, true);
4398 }
4399
of_PARTI_U(vthread_t thr,vvp_code_t cp)4400 bool of_PARTI_U(vthread_t thr, vvp_code_t cp)
4401 {
4402 return of_PARTI_base(thr, cp, false);
4403 }
4404
4405 /*
4406 * %mov/wu <dst>, <src>
4407 */
of_MOV_WU(vthread_t thr,vvp_code_t cp)4408 bool of_MOV_WU(vthread_t thr, vvp_code_t cp)
4409 {
4410 unsigned dst = cp->bit_idx[0];
4411 unsigned src = cp->bit_idx[1];
4412
4413 thr->words[dst].w_uint = thr->words[src].w_uint;
4414 return true;
4415 }
4416
4417 /*
4418 * %mul
4419 */
of_MUL(vthread_t thr,vvp_code_t)4420 bool of_MUL(vthread_t thr, vvp_code_t)
4421 {
4422 vvp_vector4_t r = thr->pop_vec4();
4423 // Rather then pop l, use it directly from the stack. When we
4424 // assign to 'l', that will edit the top of the stack, which
4425 // replaces a pop and a pull.
4426 vvp_vector4_t&l = thr->peek_vec4();
4427
4428 l.mul(r);
4429 return true;
4430 }
4431
4432 /*
4433 * %muli <vala>, <valb>, <wid>
4434 *
4435 * Pop1 operand, get the other operand from the arguments, and push
4436 * the result.
4437 */
of_MULI(vthread_t thr,vvp_code_t cp)4438 bool of_MULI(vthread_t thr, vvp_code_t cp)
4439 {
4440 unsigned wid = cp->number;
4441
4442 vvp_vector4_t&l = thr->peek_vec4();
4443
4444 // I expect that most of the bits of an immediate value are
4445 // going to be zero, so start the result vector with all zero
4446 // bits. Then we only need to replace the bits that are different.
4447 vvp_vector4_t r (wid, BIT4_0);
4448 get_immediate_rval (cp, r);
4449
4450 l.mul(r);
4451 return true;
4452 }
4453
of_MUL_WR(vthread_t thr,vvp_code_t)4454 bool of_MUL_WR(vthread_t thr, vvp_code_t)
4455 {
4456 double r = thr->pop_real();
4457 double l = thr->pop_real();
4458 thr->push_real(l * r);
4459
4460 return true;
4461 }
4462
of_NAND(vthread_t thr,vvp_code_t)4463 bool of_NAND(vthread_t thr, vvp_code_t)
4464 {
4465 vvp_vector4_t valr = thr->pop_vec4();
4466 vvp_vector4_t&vall = thr->peek_vec4();
4467 assert(vall.size() == valr.size());
4468 unsigned wid = vall.size();
4469
4470 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
4471 vvp_bit4_t lb = vall.value(idx);
4472 vvp_bit4_t rb = valr.value(idx);
4473 vall.set_bit(idx, ~(lb&rb));
4474 }
4475
4476 return true;
4477 }
4478
4479 /*
4480 * %new/cobj <vpi_object>
4481 * This creates a new cobject (SystemVerilog class object) and pushes
4482 * it to the stack. The <vpi-object> is a __vpiHandle that is a
4483 * vpiClassDefn object that defines the item to be created.
4484 */
of_NEW_COBJ(vthread_t thr,vvp_code_t cp)4485 bool of_NEW_COBJ(vthread_t thr, vvp_code_t cp)
4486 {
4487 const class_type*defn = dynamic_cast<const class_type*> (cp->handle);
4488 assert(defn);
4489
4490 vvp_object_t tmp (new vvp_cobject(defn));
4491 thr->push_object(tmp);
4492 return true;
4493 }
4494
of_NEW_DARRAY(vthread_t thr,vvp_code_t cp)4495 bool of_NEW_DARRAY(vthread_t thr, vvp_code_t cp)
4496 {
4497 const char*text = cp->text;
4498 size_t size = thr->words[cp->bit_idx[0]].w_int;
4499 unsigned word_wid;
4500 size_t n;
4501
4502 vvp_object_t obj;
4503 if (strcmp(text,"b8") == 0) {
4504 obj = new vvp_darray_atom<uint8_t>(size);
4505 } else if (strcmp(text,"b16") == 0) {
4506 obj = new vvp_darray_atom<uint16_t>(size);
4507 } else if (strcmp(text,"b32") == 0) {
4508 obj = new vvp_darray_atom<uint32_t>(size);
4509 } else if (strcmp(text,"b64") == 0) {
4510 obj = new vvp_darray_atom<uint64_t>(size);
4511 } else if (strcmp(text,"sb8") == 0) {
4512 obj = new vvp_darray_atom<int8_t>(size);
4513 } else if (strcmp(text,"sb16") == 0) {
4514 obj = new vvp_darray_atom<int16_t>(size);
4515 } else if (strcmp(text,"sb32") == 0) {
4516 obj = new vvp_darray_atom<int32_t>(size);
4517 } else if (strcmp(text,"sb64") == 0) {
4518 obj = new vvp_darray_atom<int64_t>(size);
4519 } else if ((1 == sscanf(text, "b%u%zn", &word_wid, &n)) &&
4520 (n == strlen(text))) {
4521 obj = new vvp_darray_vec2(size, word_wid);
4522 } else if ((1 == sscanf(text, "sb%u%zn", &word_wid, &n)) &&
4523 (n == strlen(text))) {
4524 obj = new vvp_darray_vec2(size, word_wid);
4525 } else if ((1 == sscanf(text, "v%u%zn", &word_wid, &n)) &&
4526 (n == strlen(text))) {
4527 obj = new vvp_darray_vec4(size, word_wid);
4528 } else if ((1 == sscanf(text, "sv%u%zn", &word_wid, &n)) &&
4529 (n == strlen(text))) {
4530 obj = new vvp_darray_vec4(size, word_wid);
4531 } else if (strcmp(text,"r") == 0) {
4532 obj = new vvp_darray_real(size);
4533 } else if (strcmp(text,"S") == 0) {
4534 obj = new vvp_darray_string(size);
4535 } else {
4536 cerr << get_fileline()
4537 << "Internal error: Unsupported dynamic array type: "
4538 << text << "." << endl;
4539 assert(0);
4540 }
4541
4542 thr->push_object(obj);
4543
4544 return true;
4545 }
4546
of_NOOP(vthread_t,vvp_code_t)4547 bool of_NOOP(vthread_t, vvp_code_t)
4548 {
4549 return true;
4550 }
4551
4552 /*
4553 * %nor/r
4554 */
of_NORR(vthread_t thr,vvp_code_t)4555 bool of_NORR(vthread_t thr, vvp_code_t)
4556 {
4557 vvp_vector4_t val = thr->pop_vec4();
4558
4559 vvp_bit4_t lb = BIT4_1;
4560
4561 for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4562
4563 vvp_bit4_t rb = val.value(idx);
4564 if (rb == BIT4_1) {
4565 lb = BIT4_0;
4566 break;
4567 }
4568
4569 if (rb != BIT4_0)
4570 lb = BIT4_X;
4571 }
4572
4573 vvp_vector4_t res (1, lb);
4574 thr->push_vec4(res);
4575
4576 return true;
4577 }
4578
4579 /*
4580 * Push a null to the object stack.
4581 */
of_NULL(vthread_t thr,vvp_code_t)4582 bool of_NULL(vthread_t thr, vvp_code_t)
4583 {
4584 vvp_object_t tmp;
4585 thr->push_object(tmp);
4586 return true;
4587 }
4588
4589 /*
4590 * %and/r
4591 */
of_ANDR(vthread_t thr,vvp_code_t)4592 bool of_ANDR(vthread_t thr, vvp_code_t)
4593 {
4594 vvp_vector4_t val = thr->pop_vec4();
4595
4596 vvp_bit4_t lb = BIT4_1;
4597
4598 for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4599 vvp_bit4_t rb = val.value(idx);
4600 if (rb == BIT4_0) {
4601 lb = BIT4_0;
4602 break;
4603 }
4604
4605 if (rb != 1)
4606 lb = BIT4_X;
4607 }
4608
4609 vvp_vector4_t res (1, lb);
4610 thr->push_vec4(res);
4611
4612 return true;
4613 }
4614
4615 /*
4616 * %nand/r
4617 */
of_NANDR(vthread_t thr,vvp_code_t)4618 bool of_NANDR(vthread_t thr, vvp_code_t)
4619 {
4620 vvp_vector4_t val = thr->pop_vec4();
4621
4622 vvp_bit4_t lb = BIT4_0;
4623 for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4624
4625 vvp_bit4_t rb = val.value(idx);
4626 if (rb == BIT4_0) {
4627 lb = BIT4_1;
4628 break;
4629 }
4630
4631 if (rb != BIT4_1)
4632 lb = BIT4_X;
4633 }
4634
4635 vvp_vector4_t res (1, lb);
4636 thr->push_vec4(res);
4637
4638 return true;
4639 }
4640
4641 /*
4642 * %or/r
4643 */
of_ORR(vthread_t thr,vvp_code_t)4644 bool of_ORR(vthread_t thr, vvp_code_t)
4645 {
4646 vvp_vector4_t val = thr->pop_vec4();
4647
4648 vvp_bit4_t lb = BIT4_0;
4649 for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4650 vvp_bit4_t rb = val.value(idx);
4651 if (rb == BIT4_1) {
4652 lb = BIT4_1;
4653 break;
4654 }
4655
4656 if (rb != BIT4_0)
4657 lb = BIT4_X;
4658 }
4659
4660 vvp_vector4_t res (1, lb);
4661 thr->push_vec4(res);
4662 return true;
4663 }
4664
4665 /*
4666 * %xor/r
4667 */
of_XORR(vthread_t thr,vvp_code_t)4668 bool of_XORR(vthread_t thr, vvp_code_t)
4669 {
4670 vvp_vector4_t val = thr->pop_vec4();
4671
4672 vvp_bit4_t lb = BIT4_0;
4673 for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4674
4675 vvp_bit4_t rb = val.value(idx);
4676 if (rb == BIT4_1)
4677 lb = ~lb;
4678 else if (rb != BIT4_0) {
4679 lb = BIT4_X;
4680 break;
4681 }
4682 }
4683
4684 vvp_vector4_t res (1, lb);
4685 thr->push_vec4(res);
4686 return true;
4687 }
4688
4689 /*
4690 * %xnor/r
4691 */
of_XNORR(vthread_t thr,vvp_code_t)4692 bool of_XNORR(vthread_t thr, vvp_code_t)
4693 {
4694 vvp_vector4_t val = thr->pop_vec4();
4695
4696 vvp_bit4_t lb = BIT4_1;
4697 for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4698
4699 vvp_bit4_t rb = val.value(idx);
4700 if (rb == BIT4_1)
4701 lb = ~lb;
4702 else if (rb != BIT4_0) {
4703 lb = BIT4_X;
4704 break;
4705 }
4706 }
4707
4708 vvp_vector4_t res (1, lb);
4709 thr->push_vec4(res);
4710 return true;
4711 }
4712
4713 /*
4714 * %or
4715 */
of_OR(vthread_t thr,vvp_code_t)4716 bool of_OR(vthread_t thr, vvp_code_t)
4717 {
4718 vvp_vector4_t valb = thr->pop_vec4();
4719 vvp_vector4_t&vala = thr->peek_vec4();
4720 vala |= valb;
4721 return true;
4722 }
4723
4724 /*
4725 * %nor
4726 */
of_NOR(vthread_t thr,vvp_code_t)4727 bool of_NOR(vthread_t thr, vvp_code_t)
4728 {
4729 vvp_vector4_t valr = thr->pop_vec4();
4730 vvp_vector4_t&vall = thr->peek_vec4();
4731 assert(vall.size() == valr.size());
4732 unsigned wid = vall.size();
4733
4734 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
4735 vvp_bit4_t lb = vall.value(idx);
4736 vvp_bit4_t rb = valr.value(idx);
4737 vall.set_bit(idx, ~(lb|rb));
4738 }
4739
4740 return true;
4741 }
4742
4743 /*
4744 * %pop/obj <num>, <skip>
4745 */
of_POP_OBJ(vthread_t thr,vvp_code_t cp)4746 bool of_POP_OBJ(vthread_t thr, vvp_code_t cp)
4747 {
4748 unsigned cnt = cp->bit_idx[0];
4749 unsigned skip = cp->bit_idx[1];
4750
4751 thr->pop_object(cnt, skip);
4752 return true;
4753 }
4754
4755 /*
4756 * %pop/real <number>
4757 */
of_POP_REAL(vthread_t thr,vvp_code_t cp)4758 bool of_POP_REAL(vthread_t thr, vvp_code_t cp)
4759 {
4760 unsigned cnt = cp->number;
4761 thr->pop_real(cnt);
4762 return true;
4763 }
4764
4765 /*
4766 * %pop/str <number>
4767 */
of_POP_STR(vthread_t thr,vvp_code_t cp)4768 bool of_POP_STR(vthread_t thr, vvp_code_t cp)
4769 {
4770 unsigned cnt = cp->number;
4771 thr->pop_str(cnt);
4772 return true;
4773 }
4774
4775 /*
4776 * %pop/vec4 <number>
4777 */
of_POP_VEC4(vthread_t thr,vvp_code_t cp)4778 bool of_POP_VEC4(vthread_t thr, vvp_code_t cp)
4779 {
4780 unsigned cnt = cp->number;
4781 thr->pop_vec4(cnt);
4782 return true;
4783 }
4784
4785 /*
4786 * %pow
4787 * %pow/s
4788 */
of_POW_base(vthread_t thr,bool signed_flag)4789 static bool of_POW_base(vthread_t thr, bool signed_flag)
4790 {
4791 vvp_vector4_t valb = thr->pop_vec4();
4792 vvp_vector4_t vala = thr->pop_vec4();
4793
4794 unsigned wid = vala.size();
4795
4796 vvp_vector2_t xv2 = vvp_vector2_t(vala, true);
4797 vvp_vector2_t yv2 = vvp_vector2_t(valb, true);
4798
4799
4800 /* If we have an X or Z in the arguments return X. */
4801 if (xv2.is_NaN() || yv2.is_NaN()) {
4802 vvp_vector4_t tmp (wid, BIT4_X);
4803 thr->push_vec4(tmp);
4804 return true;
4805 }
4806
4807 // Is the exponent negative? If so, table 5-6 in IEEE1364-2005
4808 // defines what value is returned.
4809 if (signed_flag && yv2.value(yv2.size()-1)) {
4810 int a_val;
4811 vvp_bit4_t pad = BIT4_0, lsb = BIT4_0;
4812 if (vector2_to_value(xv2, a_val, true)) {
4813 if (a_val == 0) {
4814 pad = BIT4_X; lsb = BIT4_X;
4815 }
4816 if (a_val == 1) {
4817 pad = BIT4_0; lsb = BIT4_1;
4818 }
4819 if (a_val == -1) {
4820 if (yv2.value(0)) {
4821 pad = BIT4_1; lsb = BIT4_1;
4822 } else {
4823 pad = BIT4_0; lsb = BIT4_1;
4824 }
4825 }
4826 }
4827 vvp_vector4_t tmp (wid, pad);
4828 tmp.set_bit(0, lsb);
4829 thr->push_vec4(tmp);
4830 return true;
4831 }
4832
4833 vvp_vector2_t result = pow(xv2, yv2);
4834
4835 /* Copy only what we need of the result. If the result is too
4836 small, zero-pad it. */
4837 for (unsigned jdx = 0; jdx < wid; jdx += 1) {
4838 if (jdx >= result.size())
4839 vala.set_bit(jdx, BIT4_0);
4840 else
4841 vala.set_bit(jdx, result.value(jdx) ? BIT4_1 : BIT4_0);
4842 }
4843 thr->push_vec4(vala);
4844
4845 return true;
4846 }
4847
of_POW(vthread_t thr,vvp_code_t)4848 bool of_POW(vthread_t thr, vvp_code_t)
4849 {
4850 return of_POW_base(thr, false);
4851 }
4852
of_POW_S(vthread_t thr,vvp_code_t)4853 bool of_POW_S(vthread_t thr, vvp_code_t)
4854 {
4855 return of_POW_base(thr, true);
4856 }
4857
of_POW_WR(vthread_t thr,vvp_code_t)4858 bool of_POW_WR(vthread_t thr, vvp_code_t)
4859 {
4860 double r = thr->pop_real();
4861 double l = thr->pop_real();
4862 thr->push_real(pow(l,r));
4863
4864 return true;
4865 }
4866
4867 /*
4868 * %prop/obj <pid>, <idx>
4869 *
4870 * Load an object value from the cobject and push it onto the object stack.
4871 */
of_PROP_OBJ(vthread_t thr,vvp_code_t cp)4872 bool of_PROP_OBJ(vthread_t thr, vvp_code_t cp)
4873 {
4874 unsigned pid = cp->number;
4875 unsigned idx = cp->bit_idx[0];
4876
4877 if (idx != 0) {
4878 assert(idx < vthread_s::WORDS_COUNT);
4879 idx = thr->words[idx].w_uint;
4880 }
4881
4882 vvp_object_t&obj = thr->peek_object();
4883 vvp_cobject*cobj = obj.peek<vvp_cobject>();
4884
4885 vvp_object_t val;
4886 cobj->get_object(pid, val, idx);
4887
4888 thr->push_object(val);
4889
4890 return true;
4891 }
4892
get_from_obj(unsigned pid,vvp_cobject * cobj,double & val)4893 static void get_from_obj(unsigned pid, vvp_cobject*cobj, double&val)
4894 {
4895 val = cobj->get_real(pid);
4896 }
4897
get_from_obj(unsigned pid,vvp_cobject * cobj,string & val)4898 static void get_from_obj(unsigned pid, vvp_cobject*cobj, string&val)
4899 {
4900 val = cobj->get_string(pid);
4901 }
4902
get_from_obj(unsigned pid,vvp_cobject * cobj,vvp_vector4_t & val)4903 static void get_from_obj(unsigned pid, vvp_cobject*cobj, vvp_vector4_t&val)
4904 {
4905 cobj->get_vec4(pid, val);
4906 }
4907
4908 template <typename ELEM>
prop(vthread_t thr,vvp_code_t cp)4909 static bool prop(vthread_t thr, vvp_code_t cp)
4910 {
4911 unsigned pid = cp->number;
4912
4913 vvp_object_t&obj = thr->peek_object();
4914 vvp_cobject*cobj = obj.peek<vvp_cobject>();
4915 assert(cobj);
4916
4917 ELEM val;
4918 get_from_obj(pid, cobj, val);
4919 vthread_push(thr, val);
4920
4921 return true;
4922 }
4923
4924 /*
4925 * %prop/r <pid>
4926 *
4927 * Load a real value from the cobject and push it onto the real value
4928 * stack.
4929 */
of_PROP_R(vthread_t thr,vvp_code_t cp)4930 bool of_PROP_R(vthread_t thr, vvp_code_t cp)
4931 {
4932 return prop<double>(thr, cp);
4933 }
4934
4935 /*
4936 * %prop/str <pid>
4937 *
4938 * Load a string value from the cobject and push it onto the real value
4939 * stack.
4940 */
of_PROP_STR(vthread_t thr,vvp_code_t cp)4941 bool of_PROP_STR(vthread_t thr, vvp_code_t cp)
4942 {
4943 return prop<string>(thr, cp);
4944 }
4945
4946 /*
4947 * %prop/v <pid>
4948 *
4949 * Load a property <pid> from the cobject on the top of the stack into
4950 * the vector space at <base>.
4951 */
of_PROP_V(vthread_t thr,vvp_code_t cp)4952 bool of_PROP_V(vthread_t thr, vvp_code_t cp)
4953 {
4954 return prop<vvp_vector4_t>(thr, cp);
4955 }
4956
of_PUSHI_REAL(vthread_t thr,vvp_code_t cp)4957 bool of_PUSHI_REAL(vthread_t thr, vvp_code_t cp)
4958 {
4959 double mant = cp->bit_idx[0];
4960 uint32_t imant = cp->bit_idx[0];
4961 int exp = cp->bit_idx[1];
4962
4963 // Detect +infinity
4964 if (exp==0x3fff && imant==0) {
4965 thr->push_real(INFINITY);
4966 return true;
4967 }
4968 // Detect -infinity
4969 if (exp==0x7fff && imant==0) {
4970 thr->push_real(-INFINITY);
4971 return true;
4972 }
4973 // Detect NaN
4974 if (exp==0x3fff) {
4975 thr->push_real(nan(""));
4976 return true;
4977 }
4978
4979 double sign = (exp & 0x4000)? -1.0 : 1.0;
4980
4981 exp &= 0x1fff;
4982
4983 mant = sign * ldexp(mant, exp - 0x1000);
4984 thr->push_real(mant);
4985 return true;
4986 }
4987
of_PUSHI_STR(vthread_t thr,vvp_code_t cp)4988 bool of_PUSHI_STR(vthread_t thr, vvp_code_t cp)
4989 {
4990 const char*text = cp->text;
4991 thr->push_str(string(text));
4992 return true;
4993 }
4994
4995 /*
4996 * %pushi/vec4 <vala>, <valb>, <wid>
4997 */
of_PUSHI_VEC4(vthread_t thr,vvp_code_t cp)4998 bool of_PUSHI_VEC4(vthread_t thr, vvp_code_t cp)
4999 {
5000 uint32_t vala = cp->bit_idx[0];
5001 uint32_t valb = cp->bit_idx[1];
5002 unsigned wid = cp->number;
5003
5004 // I expect that most of the bits of an immediate value are
5005 // going to be zero, so start the result vector with all zero
5006 // bits. Then we only need to replace the bits that are different.
5007 vvp_vector4_t val (wid, BIT4_0);
5008
5009 // Special case: Immediate zero is super easy.
5010 if (vala==0 && valb==0) {
5011 thr->push_vec4(val);
5012 return true;
5013 }
5014
5015 // Special case: If the value is defined (no X or Z) and fits
5016 // in an unsigned long, then use the setarray method to write
5017 // the value all in one shot.
5018 if ((valb==0) && (wid <= 8*sizeof(unsigned long))) {
5019 unsigned long tmp = vala;
5020 val.setarray(0, wid, &tmp);
5021 thr->push_vec4(val);
5022 return true;
5023 }
5024
5025 // The %pushi/vec4 can create values bigger then 32 bits, but
5026 // only if the high bits are zero. So at most we need to run
5027 // through the loop below 32 times. Maybe less, if the target
5028 // width is less. We don't have to do anything special on that
5029 // because vala/valb bits will shift away so (vala|valb) will
5030 // turn to zero at or before 32 shifts.
5031
5032 for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
5033 uint32_t ba = 0;
5034 // Convert the vala/valb bits to a ba number that can be
5035 // used to select what goes into the value.
5036 ba = (valb & 1) << 1;
5037 ba |= vala & 1;
5038
5039 switch (ba) {
5040 case 1:
5041 val.set_bit(idx, BIT4_1);
5042 break;
5043 case 2:
5044 val.set_bit(idx, BIT4_Z);
5045 break;
5046 case 3:
5047 val.set_bit(idx, BIT4_X);
5048 break;
5049 default:
5050 break;
5051 }
5052
5053 vala >>= 1;
5054 valb >>= 1;
5055 }
5056
5057 thr->push_vec4(val);
5058
5059 return true;
5060 }
5061
5062 /*
5063 * %pushv/str
5064 * Pops a vec4 value, and pushes a string.
5065 */
of_PUSHV_STR(vthread_t thr,vvp_code_t)5066 bool of_PUSHV_STR(vthread_t thr, vvp_code_t)
5067 {
5068 vvp_vector4_t vec = thr->pop_vec4();
5069
5070 size_t slen = (vec.size() + 7)/8;
5071 vector<char>buf;
5072 buf.reserve(slen);
5073
5074 for (size_t idx = 0 ; idx < vec.size() ; idx += 8) {
5075 char tmp = 0;
5076 size_t trans = 8;
5077 if (idx+trans > vec.size())
5078 trans = vec.size() - idx;
5079
5080 for (size_t bdx = 0 ; bdx < trans ; bdx += 1) {
5081 if (vec.value(idx+bdx) == BIT4_1)
5082 tmp |= 1 << bdx;
5083 }
5084
5085 if (tmp != 0)
5086 buf.push_back(tmp);
5087 }
5088
5089 string val;
5090 for (vector<char>::reverse_iterator cur = buf.rbegin()
5091 ; cur != buf.rend() ; ++cur) {
5092 val.push_back(*cur);
5093 }
5094
5095 thr->push_str(val);
5096
5097 return true;
5098 }
5099
5100 /*
5101 * %putc/str/vec4 <var>, <mux>
5102 */
of_PUTC_STR_VEC4(vthread_t thr,vvp_code_t cp)5103 bool of_PUTC_STR_VEC4(vthread_t thr, vvp_code_t cp)
5104 {
5105 unsigned muxr = cp->bit_idx[0];
5106 int32_t mux = muxr? thr->words[muxr].w_int : 0;
5107
5108 vvp_vector4_t val = thr->pop_vec4();
5109 assert(val.size() == 8);
5110
5111 if (mux < 0)
5112 return true;
5113
5114 /* Get the existing value of the string. If we find that the
5115 index is too big for the string, then give up. */
5116 vvp_net_t*net = cp->net;
5117 vvp_fun_signal_string*fun = dynamic_cast<vvp_fun_signal_string*> (net->fun);
5118 assert(fun);
5119
5120 string tmp = fun->get_string();
5121 if (tmp.size() <= (size_t)mux)
5122 return true;
5123
5124 char val_str = 0;
5125 for (size_t idx = 0 ; idx < 8 ; idx += 1) {
5126 if (val.value(idx)==BIT4_1)
5127 val_str |= 1<<idx;
5128 }
5129
5130 // It is a quirk of the Verilog standard that putc(..., 'h00)
5131 // has no effect. Test for that case here.
5132 if (val_str == 0)
5133 return true;
5134
5135 tmp[mux] = val_str;
5136
5137 vvp_send_string(vvp_net_ptr_t(cp->net, 0), tmp, thr->wt_context);
5138 return true;
5139 }
5140
5141 template <typename ELEM, class QTYPE>
qinsert(vthread_t thr,vvp_code_t cp,unsigned wid=0)5142 static bool qinsert(vthread_t thr, vvp_code_t cp, unsigned wid=0)
5143 {
5144 int64_t idx = thr->words[3].w_int;
5145 ELEM value;
5146 vvp_net_t*net = cp->net;
5147 unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
5148 pop_value(thr, value, wid); // Pop the value to store.
5149
5150 vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
5151 assert(queue);
5152 if (idx < 0) {
5153 cerr << thr->get_fileline()
5154 << "Warning: cannot insert at a negative "
5155 << get_queue_type(value)
5156 << " index (" << idx << "). ";
5157 print_queue_value(value);
5158 cerr << " was not added." << endl;
5159 } else if (thr->flags[4] != BIT4_0) {
5160 cerr << thr->get_fileline()
5161 << "Warning: cannot insert at an undefined "
5162 << get_queue_type(value) << " index. ";
5163 print_queue_value(value);
5164 cerr << " was not added." << endl;
5165 } else
5166 queue->insert(idx, value, max_size);
5167 return true;
5168 }
5169
5170 /*
5171 * %qinsert/real <var-label>
5172 */
of_QINSERT_REAL(vthread_t thr,vvp_code_t cp)5173 bool of_QINSERT_REAL(vthread_t thr, vvp_code_t cp)
5174 {
5175 return qinsert<double, vvp_queue_real>(thr, cp);
5176 }
5177
5178 /*
5179 * %qinsert/str <var-label>
5180 */
of_QINSERT_STR(vthread_t thr,vvp_code_t cp)5181 bool of_QINSERT_STR(vthread_t thr, vvp_code_t cp)
5182 {
5183 return qinsert<string, vvp_queue_string>(thr, cp);
5184 }
5185
5186 /*
5187 * %qinsert/v <var-label>
5188 */
of_QINSERT_V(vthread_t thr,vvp_code_t cp)5189 bool of_QINSERT_V(vthread_t thr, vvp_code_t cp)
5190 {
5191 return qinsert<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
5192 }
5193
5194 /*
5195 * Helper functions used in the queue pop templates
5196 */
push_value(vthread_t thr,double value,unsigned)5197 inline void push_value(vthread_t thr, double value, unsigned)
5198 {
5199 thr->push_real(value);
5200 }
5201
push_value(vthread_t thr,string value,unsigned)5202 inline void push_value(vthread_t thr, string value, unsigned)
5203 {
5204 thr->push_str(value);
5205 }
5206
push_value(vthread_t thr,vvp_vector4_t value,unsigned wid)5207 inline void push_value(vthread_t thr, vvp_vector4_t value, unsigned wid)
5208 {
5209 assert(wid == value.size());
5210 thr->push_vec4(value);
5211 }
5212
5213 template <typename ELEM, class QTYPE>
q_pop(vthread_t thr,vvp_code_t cp,void (* get_val_func)(vvp_queue *,ELEM &),const char * loc,unsigned wid)5214 static bool q_pop(vthread_t thr, vvp_code_t cp,
5215 void (*get_val_func)(vvp_queue*, ELEM&),
5216 const char*loc, unsigned wid)
5217 {
5218 vvp_net_t*net = cp->net;
5219
5220 vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
5221 assert(queue);
5222
5223 size_t size = queue->get_size();
5224
5225 ELEM value;
5226 if (size) {
5227 get_val_func(queue, value);
5228 } else {
5229 dq_default(value, wid);
5230 cerr << thr->get_fileline()
5231 << "Warning: pop_" << loc << "() on empty "
5232 << get_queue_type(value) << "." << endl;
5233 }
5234
5235 push_value(thr, value, wid);
5236 return true;
5237 }
5238
5239 template <typename ELEM>
get_back_value(vvp_queue * queue,ELEM & value)5240 static void get_back_value(vvp_queue*queue, ELEM&value)
5241 {
5242 queue->get_word(queue->get_size()-1, value);
5243 queue->pop_back();
5244 }
5245
5246 template <typename ELEM, class QTYPE>
qpop_b(vthread_t thr,vvp_code_t cp,unsigned wid=0)5247 static bool qpop_b(vthread_t thr, vvp_code_t cp, unsigned wid=0)
5248 {
5249 return q_pop<ELEM, QTYPE>(thr, cp, get_back_value<ELEM>, "back", wid);
5250 }
5251
5252 /*
5253 * %qpop/b/real <var-label>
5254 */
of_QPOP_B_REAL(vthread_t thr,vvp_code_t cp)5255 bool of_QPOP_B_REAL(vthread_t thr, vvp_code_t cp)
5256 {
5257 return qpop_b<double, vvp_queue_real>(thr, cp);
5258 }
5259
5260 /*
5261 * %qpop/b/str <var-label>
5262 */
of_QPOP_B_STR(vthread_t thr,vvp_code_t cp)5263 bool of_QPOP_B_STR(vthread_t thr, vvp_code_t cp)
5264 {
5265 return qpop_b<string, vvp_queue_string>(thr, cp);
5266 }
5267
5268 /*
5269 * %qpop/b/v <var-label>
5270 */
of_QPOP_B_V(vthread_t thr,vvp_code_t cp)5271 bool of_QPOP_B_V(vthread_t thr, vvp_code_t cp)
5272 {
5273 return qpop_b<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[0]);
5274 }
5275
5276 template <typename ELEM>
get_front_value(vvp_queue * queue,ELEM & value)5277 static void get_front_value(vvp_queue*queue, ELEM&value)
5278 {
5279 queue->get_word(0, value);
5280 queue->pop_front();
5281 }
5282
5283 template <typename ELEM, class QTYPE>
qpop_f(vthread_t thr,vvp_code_t cp,unsigned wid=0)5284 static bool qpop_f(vthread_t thr, vvp_code_t cp, unsigned wid=0)
5285 {
5286 return q_pop<ELEM, QTYPE>(thr, cp, get_front_value<ELEM>, "front", wid);
5287 }
5288
5289
5290 /*
5291 * %qpop/f/real <var-label>
5292 */
of_QPOP_F_REAL(vthread_t thr,vvp_code_t cp)5293 bool of_QPOP_F_REAL(vthread_t thr, vvp_code_t cp)
5294 {
5295 return qpop_f<double, vvp_queue_real>(thr, cp);
5296 }
5297
5298 /*
5299 * %qpop/f/str <var-label>
5300 */
of_QPOP_F_STR(vthread_t thr,vvp_code_t cp)5301 bool of_QPOP_F_STR(vthread_t thr, vvp_code_t cp)
5302 {
5303 return qpop_f<string, vvp_queue_string>(thr, cp);
5304 }
5305
5306 /*
5307 * %qpop/f/v <var-label>
5308 */
of_QPOP_F_V(vthread_t thr,vvp_code_t cp)5309 bool of_QPOP_F_V(vthread_t thr, vvp_code_t cp)
5310 {
5311 return qpop_f<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[0]);
5312 }
5313
5314 /*
5315 * These implement the %release/net and %release/reg instructions. The
5316 * %release/net instruction applies to a net kind of functor by
5317 * sending the release/net command to the command port. (See vvp_net.h
5318 * for details.) The %release/reg instruction is the same, but sends
5319 * the release/reg command instead. These are very similar to the
5320 * %deassign instruction.
5321 */
do_release_vec(vvp_code_t cp,bool net_flag)5322 static bool do_release_vec(vvp_code_t cp, bool net_flag)
5323 {
5324 vvp_net_t*net = cp->net;
5325 unsigned base = cp->bit_idx[0];
5326 unsigned width = cp->bit_idx[1];
5327
5328 assert(net->fil);
5329
5330 if (base >= net->fil->filter_size()) return true;
5331 if (base+width > net->fil->filter_size())
5332 width = net->fil->filter_size() - base;
5333
5334 bool full_sig = base == 0 && width == net->fil->filter_size();
5335
5336 // XXXX Can't really do this if this is a partial release?
5337 net->fil->force_unlink();
5338
5339 /* Do we release all or part of the net? */
5340 vvp_net_ptr_t ptr (net, 0);
5341 if (full_sig) {
5342 net->fil->release(ptr, net_flag);
5343 } else {
5344 net->fil->release_pv(ptr, base, width, net_flag);
5345 }
5346 net->fun->force_flag(false);
5347
5348 return true;
5349 }
5350
of_RELEASE_NET(vthread_t,vvp_code_t cp)5351 bool of_RELEASE_NET(vthread_t, vvp_code_t cp)
5352 {
5353 return do_release_vec(cp, true);
5354 }
5355
5356
of_RELEASE_REG(vthread_t,vvp_code_t cp)5357 bool of_RELEASE_REG(vthread_t, vvp_code_t cp)
5358 {
5359 return do_release_vec(cp, false);
5360 }
5361
5362 /* The type is 1 for registers and 0 for everything else. */
of_RELEASE_WR(vthread_t,vvp_code_t cp)5363 bool of_RELEASE_WR(vthread_t, vvp_code_t cp)
5364 {
5365 vvp_net_t*net = cp->net;
5366 unsigned type = cp->bit_idx[0];
5367
5368 assert(net->fil);
5369 net->fil->force_unlink();
5370
5371 // Send a command to this signal to unforce itself.
5372 vvp_net_ptr_t ptr (net, 0);
5373 net->fil->release(ptr, type==0);
5374 return true;
5375 }
5376
of_REPLICATE(vthread_t thr,vvp_code_t cp)5377 bool of_REPLICATE(vthread_t thr, vvp_code_t cp)
5378 {
5379 int rept = cp->number;
5380 vvp_vector4_t val = thr->pop_vec4();
5381 vvp_vector4_t res (val.size() * rept, BIT4_X);
5382
5383 for (int idx = 0 ; idx < rept ; idx += 1) {
5384 res.set_vec(idx * val.size(), val);
5385 }
5386
5387 thr->push_vec4(res);
5388
5389 return true;
5390 }
5391
poke_val(vthread_t fun_thr,unsigned depth,double val)5392 static void poke_val(vthread_t fun_thr, unsigned depth, double val)
5393 {
5394 fun_thr->parent->poke_real(depth, val);
5395 }
5396
poke_val(vthread_t fun_thr,unsigned depth,string val)5397 static void poke_val(vthread_t fun_thr, unsigned depth, string val)
5398 {
5399 fun_thr->parent->poke_str(depth, val);
5400 }
5401
get_max(vthread_t fun_thr,double &)5402 static size_t get_max(vthread_t fun_thr, double&)
5403 {
5404 return fun_thr->args_real.size();
5405 }
5406
get_max(vthread_t fun_thr,string &)5407 static size_t get_max(vthread_t fun_thr, string&)
5408 {
5409 return fun_thr->args_str.size();
5410 }
5411
get_max(vthread_t fun_thr,vvp_vector4_t &)5412 static size_t get_max(vthread_t fun_thr, vvp_vector4_t&)
5413 {
5414 return fun_thr->args_vec4.size();
5415 }
5416
get_depth(vthread_t fun_thr,size_t index,double &)5417 static unsigned get_depth(vthread_t fun_thr, size_t index, double&)
5418 {
5419 return fun_thr->args_real[index];
5420 }
5421
get_depth(vthread_t fun_thr,size_t index,string &)5422 static unsigned get_depth(vthread_t fun_thr, size_t index, string&)
5423 {
5424 return fun_thr->args_str[index];
5425 }
5426
get_depth(vthread_t fun_thr,size_t index,vvp_vector4_t &)5427 static unsigned get_depth(vthread_t fun_thr, size_t index, vvp_vector4_t&)
5428 {
5429 return fun_thr->args_vec4[index];
5430 }
5431
get_func(vthread_t thr)5432 static vthread_t get_func(vthread_t thr)
5433 {
5434 vthread_t fun_thr = thr;
5435
5436 while (fun_thr->parent_scope->get_type_code() != vpiFunction) {
5437 assert(fun_thr->parent);
5438 fun_thr = fun_thr->parent;
5439 }
5440
5441 return fun_thr;
5442 }
5443
5444 template <typename ELEM>
ret(vthread_t thr,vvp_code_t cp)5445 static bool ret(vthread_t thr, vvp_code_t cp)
5446 {
5447 size_t index = cp->number;
5448 ELEM val;
5449 pop_value(thr, val, 0);
5450
5451 vthread_t fun_thr = get_func(thr);
5452 assert(index < get_max(fun_thr, val));
5453
5454 unsigned depth = get_depth(fun_thr, index, val);
5455 // Use the depth to put the value into the stack of
5456 // the parent thread.
5457 poke_val(fun_thr, depth, val);
5458 return true;
5459 }
5460
5461 /*
5462 * %ret/real <index>
5463 */
of_RET_REAL(vthread_t thr,vvp_code_t cp)5464 bool of_RET_REAL(vthread_t thr, vvp_code_t cp)
5465 {
5466 return ret<double>(thr, cp);
5467 }
5468
5469 /*
5470 * %ret/str <index>
5471 */
of_RET_STR(vthread_t thr,vvp_code_t cp)5472 bool of_RET_STR(vthread_t thr, vvp_code_t cp)
5473 {
5474 return ret<string>(thr, cp);
5475 }
5476
5477 /*
5478 * %ret/vec4 <index>, <offset>, <wid>
5479 */
of_RET_VEC4(vthread_t thr,vvp_code_t cp)5480 bool of_RET_VEC4(vthread_t thr, vvp_code_t cp)
5481 {
5482 size_t index = cp->number;
5483 unsigned off_index = cp->bit_idx[0];
5484 int wid = cp->bit_idx[1];
5485 vvp_vector4_t&val = thr->peek_vec4();
5486
5487 vthread_t fun_thr = get_func(thr);
5488 assert(index < get_max(fun_thr, val));
5489 unsigned depth = get_depth(fun_thr, index, val);
5490
5491 int off = off_index? thr->words[off_index].w_int : 0;
5492 const int sig_value_size = fun_thr->parent->peek_vec4(depth).size();
5493
5494 unsigned val_size = val.size();
5495
5496 if (off_index!=0 && thr->flags[4] == BIT4_1) {
5497 thr->pop_vec4(1);
5498 return true;
5499 }
5500
5501 if (off <= -wid) {
5502 thr->pop_vec4(1);
5503 return true;
5504 }
5505
5506 if (off >= sig_value_size) {
5507 thr->pop_vec4(1);
5508 return true;
5509 }
5510
5511 // If the index is below the vector, then only assign the high
5512 // bits that overlap with the target
5513 if (off < 0) {
5514 int use_off = -off;
5515 wid -= use_off;
5516 val = val.subvalue(use_off, wid);
5517 val_size = wid;
5518 off = 0;
5519 }
5520
5521 // If the value is partly above the target, then only assign
5522 // the bits that overlap
5523 if ((off+wid) > sig_value_size) {
5524 wid = sig_value_size - off;
5525 val = val.subvalue(0, wid);
5526 val.resize(wid);
5527 val_size = wid;
5528 }
5529
5530 if (off==0 && val_size==(unsigned)sig_value_size) {
5531 fun_thr->parent->poke_vec4(depth, val);
5532
5533 } else {
5534 vvp_vector4_t tmp_dst = fun_thr->parent->peek_vec4(depth);
5535 assert(wid>=0 && val.size() == (unsigned)wid);
5536 tmp_dst.set_vec(off, val);
5537 fun_thr->parent->poke_vec4(depth, tmp_dst);
5538 }
5539
5540 thr->pop_vec4(1);
5541 return true;
5542 }
5543
push_from_parent(vthread_t thr,vthread_t fun_thr,unsigned depth,double &)5544 static void push_from_parent(vthread_t thr, vthread_t fun_thr, unsigned depth, double&)
5545 {
5546 thr->push_real(fun_thr->parent->peek_real(depth));
5547 }
5548
push_from_parent(vthread_t thr,vthread_t fun_thr,unsigned depth,string &)5549 static void push_from_parent(vthread_t thr, vthread_t fun_thr, unsigned depth, string&)
5550 {
5551 thr->push_str(fun_thr->parent->peek_str(depth));
5552 }
5553
push_from_parent(vthread_t thr,vthread_t fun_thr,unsigned depth,vvp_vector4_t &)5554 static void push_from_parent(vthread_t thr, vthread_t fun_thr, unsigned depth, vvp_vector4_t&)
5555 {
5556 thr->push_vec4(fun_thr->parent->peek_vec4(depth));
5557 }
5558
5559 template <typename ELEM>
retload(vthread_t thr,vvp_code_t cp)5560 static bool retload(vthread_t thr, vvp_code_t cp)
5561 {
5562 size_t index = cp->number;
5563 ELEM type;
5564
5565 vthread_t fun_thr = get_func(thr);
5566 assert(index < get_max(fun_thr, type));
5567
5568 unsigned depth = get_depth(fun_thr, index, type);
5569 // Use the depth to extract the values from the stack
5570 // of the parent thread.
5571 push_from_parent(thr, fun_thr, depth, type);
5572 return true;
5573 }
5574
5575 /*
5576 * %retload/real <index>
5577 */
of_RETLOAD_REAL(vthread_t thr,vvp_code_t cp)5578 bool of_RETLOAD_REAL(vthread_t thr, vvp_code_t cp)
5579 {
5580 return retload<double>(thr, cp);
5581 }
5582
5583 /*
5584 * %retload/str <index>
5585 */
of_RETLOAD_STR(vthread_t thr,vvp_code_t cp)5586 bool of_RETLOAD_STR(vthread_t thr, vvp_code_t cp)
5587 {
5588 return retload<string>(thr, cp);
5589 }
5590
5591 /*
5592 * %retload/vec4 <index>
5593 */
of_RETLOAD_VEC4(vthread_t thr,vvp_code_t cp)5594 bool of_RETLOAD_VEC4(vthread_t thr, vvp_code_t cp)
5595 {
5596 return retload<vvp_vector4_t>(thr, cp);
5597 }
5598
of_SCOPY(vthread_t thr,vvp_code_t)5599 bool of_SCOPY(vthread_t thr, vvp_code_t)
5600 {
5601 vvp_object_t tmp;
5602 thr->pop_object(tmp);
5603
5604 vvp_object_t&dest = thr->peek_object();
5605 dest.shallow_copy(tmp);
5606
5607 return true;
5608 }
5609
thread_peek(vthread_t thr,double & value)5610 static void thread_peek(vthread_t thr, double&value)
5611 {
5612 value = thr->peek_real(0);
5613 }
5614
thread_peek(vthread_t thr,string & value)5615 static void thread_peek(vthread_t thr, string&value)
5616 {
5617 value = thr->peek_str(0);
5618 }
5619
thread_peek(vthread_t thr,vvp_vector4_t & value)5620 static void thread_peek(vthread_t thr, vvp_vector4_t&value)
5621 {
5622 value = thr->peek_vec4(0);
5623 }
5624
5625 template <typename ELEM>
set_dar_obj(vthread_t thr,vvp_code_t cp)5626 static bool set_dar_obj(vthread_t thr, vvp_code_t cp)
5627 {
5628 unsigned adr = thr->words[cp->number].w_int;
5629
5630 ELEM value;
5631 thread_peek(thr, value);
5632
5633 vvp_object_t&top = thr->peek_object();
5634 vvp_darray*darray = top.peek<vvp_darray>();
5635 assert(darray);
5636
5637 darray->set_word(adr, value);
5638 return true;
5639 }
5640
5641 /*
5642 * %set/dar/obj/real <index>
5643 */
of_SET_DAR_OBJ_REAL(vthread_t thr,vvp_code_t cp)5644 bool of_SET_DAR_OBJ_REAL(vthread_t thr, vvp_code_t cp)
5645 {
5646 return set_dar_obj<double>(thr, cp);
5647 }
5648
5649 /*
5650 * %set/dar/obj/str <index>
5651 */
of_SET_DAR_OBJ_STR(vthread_t thr,vvp_code_t cp)5652 bool of_SET_DAR_OBJ_STR(vthread_t thr, vvp_code_t cp)
5653 {
5654 return set_dar_obj<string>(thr, cp);
5655 }
5656
5657 /*
5658 * %set/dar/obj/vec4 <index>
5659 */
of_SET_DAR_OBJ_VEC4(vthread_t thr,vvp_code_t cp)5660 bool of_SET_DAR_OBJ_VEC4(vthread_t thr, vvp_code_t cp)
5661 {
5662 return set_dar_obj<vvp_vector4_t>(thr, cp);
5663 }
5664
5665 /*
5666 * %shiftl <idx>
5667 *
5668 * Pop the operand, then push the result.
5669 */
of_SHIFTL(vthread_t thr,vvp_code_t cp)5670 bool of_SHIFTL(vthread_t thr, vvp_code_t cp)
5671 {
5672 int use_index = cp->number;
5673 uint64_t shift = thr->words[use_index].w_uint;
5674
5675 vvp_vector4_t&val = thr->peek_vec4();
5676 unsigned wid = val.size();
5677
5678 if (thr->flags[4] == BIT4_1) {
5679 // The result is 'bx if the shift amount is undefined
5680 val = vvp_vector4_t(wid, BIT4_X);
5681
5682 } else if (thr->flags[4] == BIT4_X || shift >= wid) {
5683 // Shift is so big that all value is shifted out. Write
5684 // a constant 0 result.
5685 val = vvp_vector4_t(wid, BIT4_0);
5686
5687 } else if (shift > 0) {
5688 vvp_vector4_t blk = val.subvalue(0, wid-shift);
5689 vvp_vector4_t tmp (shift, BIT4_0);
5690 val.set_vec(0, tmp);
5691 val.set_vec(shift, blk);
5692 }
5693
5694 return true;
5695 }
5696
5697 /*
5698 * %shiftr <idx>
5699 * This is an unsigned right shift. The <idx> is a number that selects
5700 * the index register with the amount of the shift. This instruction
5701 * checks flag bit 4, which will be true if the shift is invalid.
5702 */
of_SHIFTR(vthread_t thr,vvp_code_t cp)5703 bool of_SHIFTR(vthread_t thr, vvp_code_t cp)
5704 {
5705 int use_index = cp->number;
5706 uint64_t shift = thr->words[use_index].w_uint;
5707
5708 vvp_vector4_t val = thr->pop_vec4();
5709 unsigned wid = val.size();
5710
5711 if (thr->flags[4] == BIT4_1) {
5712 val = vvp_vector4_t(wid, BIT4_X);
5713
5714 } else if (thr->flags[4] == BIT4_X || shift > wid) {
5715 val = vvp_vector4_t(wid, BIT4_0);
5716
5717 } else if (shift > 0) {
5718 vvp_vector4_t blk = val.subvalue(shift, wid-shift);
5719 vvp_vector4_t tmp (shift, BIT4_0);
5720 val.set_vec(0, blk);
5721 val.set_vec(wid-shift, tmp);
5722 }
5723
5724 thr->push_vec4(val);
5725 return true;
5726 }
5727
5728 /*
5729 * %shiftr/s <wid>
5730 */
of_SHIFTR_S(vthread_t thr,vvp_code_t cp)5731 bool of_SHIFTR_S(vthread_t thr, vvp_code_t cp)
5732 {
5733 int use_index = cp->number;
5734 uint64_t shift = thr->words[use_index].w_uint;
5735
5736 vvp_vector4_t val = thr->pop_vec4();
5737 unsigned wid = val.size();
5738
5739 vvp_bit4_t sign_bit = val.value(val.size()-1);
5740
5741 if (thr->flags[4] == BIT4_1) {
5742 val = vvp_vector4_t(wid, BIT4_X);
5743
5744 } else if (thr->flags[4] == BIT4_X || shift > wid) {
5745 val = vvp_vector4_t(wid, sign_bit);
5746
5747 } else if (shift > 0) {
5748 vvp_vector4_t blk = val.subvalue(shift, wid-shift);
5749 vvp_vector4_t tmp (shift, sign_bit);
5750 val.set_vec(0, blk);
5751 val.set_vec(wid-shift, tmp);
5752 }
5753
5754 thr->push_vec4(val);
5755 return true;
5756 }
5757
5758 /*
5759 * %split/vec4 <wid>
5760 * Pop 1 value,
5761 * Take <wid> bits from the lsb,
5762 * Push the remaining msb,
5763 * Push the lsb.
5764 */
of_SPLIT_VEC4(vthread_t thr,vvp_code_t cp)5765 bool of_SPLIT_VEC4(vthread_t thr, vvp_code_t cp)
5766 {
5767 unsigned lsb_wid = cp->number;
5768
5769 vvp_vector4_t&val = thr->peek_vec4();
5770 assert(lsb_wid < val.size());
5771
5772 vvp_vector4_t lsb = val.subvalue(0, lsb_wid);
5773 val = val.subvalue(lsb_wid, val.size()-lsb_wid);
5774
5775 thr->push_vec4(lsb);
5776 return true;
5777 }
5778
5779 /*
5780 * The following are used to allow the darray templates to print correctly.
5781 */
get_darray_type(double &)5782 inline static string get_darray_type(double&)
5783 {
5784 return "darray<real>";
5785 }
5786
get_darray_type(string &)5787 inline static string get_darray_type(string&)
5788 {
5789 return "darray<string>";
5790 }
5791
get_darray_type(vvp_vector4_t value)5792 inline static string get_darray_type(vvp_vector4_t value)
5793 {
5794 ostringstream buf;
5795 buf << "darray<vector[" << value.size() << "]>";
5796 string res = buf.str();
5797 return res;
5798 }
5799
5800 /*
5801 * The following are used to allow a common template to be written for
5802 * darray real/string/vec4 operations
5803 */
dar_pop_value(vthread_t thr,double & value)5804 inline static void dar_pop_value(vthread_t thr, double&value)
5805 {
5806 value = thr->pop_real();
5807 }
5808
dar_pop_value(vthread_t thr,string & value)5809 inline static void dar_pop_value(vthread_t thr, string&value)
5810 {
5811 value = thr->pop_str();
5812 }
5813
dar_pop_value(vthread_t thr,vvp_vector4_t & value)5814 inline static void dar_pop_value(vthread_t thr, vvp_vector4_t&value)
5815 {
5816 value = thr->pop_vec4();
5817 }
5818
5819 template <typename ELEM>
store_dar(vthread_t thr,vvp_code_t cp)5820 static bool store_dar(vthread_t thr, vvp_code_t cp)
5821 {
5822 int64_t adr = thr->words[3].w_int;
5823 ELEM value;
5824 // FIXME: Can we get the size of the underlying array element
5825 // and then use the normal pop_value?
5826 dar_pop_value(thr, value);
5827
5828 vvp_net_t*net = cp->net;
5829 assert(net);
5830
5831 vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
5832 assert(obj);
5833
5834 vvp_darray*darray = obj->get_object().peek<vvp_darray>();
5835
5836 if (adr < 0)
5837 cerr << thr->get_fileline()
5838 << "Warning: cannot write to a negative " << get_darray_type(value)
5839 << " index (" << adr << ")." << endl;
5840 else if (thr->flags[4] != BIT4_0)
5841 cerr << thr->get_fileline()
5842 << "Warning: cannot write to an undefined " << get_darray_type(value)
5843 << " index." << endl;
5844 else if (darray)
5845 darray->set_word(adr, value);
5846 else
5847 cerr << thr->get_fileline()
5848 << "Warning: cannot write to an undefined " << get_darray_type(value)
5849 << "." << endl;
5850
5851 return true;
5852 }
5853
5854 /*
5855 * %store/dar/real <var>
5856 */
of_STORE_DAR_R(vthread_t thr,vvp_code_t cp)5857 bool of_STORE_DAR_R(vthread_t thr, vvp_code_t cp)
5858 {
5859 return store_dar<double>(thr, cp);
5860 }
5861
5862 /*
5863 * %store/dar/str <var>
5864 */
of_STORE_DAR_STR(vthread_t thr,vvp_code_t cp)5865 bool of_STORE_DAR_STR(vthread_t thr, vvp_code_t cp)
5866 {
5867 return store_dar<string>(thr, cp);
5868 }
5869
5870 /*
5871 * %store/dar/vec4 <var>
5872 */
of_STORE_DAR_VEC4(vthread_t thr,vvp_code_t cp)5873 bool of_STORE_DAR_VEC4(vthread_t thr, vvp_code_t cp)
5874 {
5875 return store_dar<vvp_vector4_t>(thr, cp);
5876 }
5877
of_STORE_OBJ(vthread_t thr,vvp_code_t cp)5878 bool of_STORE_OBJ(vthread_t thr, vvp_code_t cp)
5879 {
5880 /* set the value into port 0 of the destination. */
5881 vvp_net_ptr_t ptr (cp->net, 0);
5882
5883 vvp_object_t val;
5884 thr->pop_object(val);
5885
5886 vvp_send_object(ptr, val, thr->wt_context);
5887
5888 return true;
5889 }
5890
5891 /*
5892 * %store/obja <array-label> <index>
5893 */
of_STORE_OBJA(vthread_t thr,vvp_code_t cp)5894 bool of_STORE_OBJA(vthread_t thr, vvp_code_t cp)
5895 {
5896 unsigned idx = cp->bit_idx[0];
5897 unsigned adr = thr->words[idx].w_int;
5898
5899 vvp_object_t val;
5900 thr->pop_object(val);
5901
5902 cp->array->set_word(adr, val);
5903
5904 return true;
5905 }
5906
5907
5908 /*
5909 * %store/prop/obj <pid>, <idx>
5910 *
5911 * Pop an object value from the object stack, and store the value into
5912 * the property of the object references by the top of the stack. Do NOT
5913 * pop the object stack.
5914 */
of_STORE_PROP_OBJ(vthread_t thr,vvp_code_t cp)5915 bool of_STORE_PROP_OBJ(vthread_t thr, vvp_code_t cp)
5916 {
5917 size_t pid = cp->number;
5918 unsigned idx = cp->bit_idx[0];
5919
5920 if (idx != 0) {
5921 assert(idx < vthread_s::WORDS_COUNT);
5922 idx = thr->words[idx].w_uint;
5923 }
5924
5925 vvp_object_t val;
5926 thr->pop_object(val);
5927
5928 vvp_object_t&obj = thr->peek_object();
5929 vvp_cobject*cobj = obj.peek<vvp_cobject>();
5930 assert(cobj);
5931
5932 cobj->set_object(pid, val, idx);
5933
5934 return true;
5935 }
5936
pop_prop_val(vthread_t thr,double & val,unsigned)5937 static void pop_prop_val(vthread_t thr, double&val, unsigned)
5938 {
5939 val = thr->pop_real();
5940 }
5941
pop_prop_val(vthread_t thr,string & val,unsigned)5942 static void pop_prop_val(vthread_t thr, string&val, unsigned)
5943 {
5944 val = thr->pop_str();
5945 }
5946
pop_prop_val(vthread_t thr,vvp_vector4_t & val,unsigned wid)5947 static void pop_prop_val(vthread_t thr, vvp_vector4_t&val, unsigned wid)
5948 {
5949 val = thr->pop_vec4();
5950 assert(val.size() >= wid);
5951 val.resize(wid);
5952 }
5953
set_val(vvp_cobject * cobj,size_t pid,double & val)5954 static void set_val(vvp_cobject*cobj, size_t pid, double&val)
5955 {
5956 cobj->set_real(pid, val);
5957 }
5958
set_val(vvp_cobject * cobj,size_t pid,string & val)5959 static void set_val(vvp_cobject*cobj, size_t pid, string&val)
5960 {
5961 cobj->set_string(pid, val);
5962 }
5963
set_val(vvp_cobject * cobj,size_t pid,vvp_vector4_t & val)5964 static void set_val(vvp_cobject*cobj, size_t pid, vvp_vector4_t&val)
5965 {
5966 cobj->set_vec4(pid, val);
5967 }
5968
5969 template <typename ELEM>
store_prop(vthread_t thr,vvp_code_t cp,unsigned wid=0)5970 static bool store_prop(vthread_t thr, vvp_code_t cp, unsigned wid=0)
5971 {
5972 size_t pid = cp->number;
5973 ELEM val;
5974 pop_prop_val(thr, val, wid); // Pop the value to store.
5975
5976 vvp_object_t&obj = thr->peek_object();
5977 vvp_cobject*cobj = obj.peek<vvp_cobject>();
5978 assert(cobj);
5979
5980 set_val(cobj, pid, val);
5981
5982 return true;
5983 }
5984
5985 /*
5986 * %store/prop/r <id>
5987 *
5988 * Pop a real value from the real stack, and store the value into the
5989 * property of the object references by the top of the stack. Do NOT
5990 * pop the object stack.
5991 */
of_STORE_PROP_R(vthread_t thr,vvp_code_t cp)5992 bool of_STORE_PROP_R(vthread_t thr, vvp_code_t cp)
5993 {
5994 return store_prop<double>(thr, cp);
5995 }
5996
5997 /*
5998 * %store/prop/str <id>
5999 *
6000 * Pop a string value from the string stack, and store the value into
6001 * the property of the object references by the top of the stack. Do NOT
6002 * pop the object stack.
6003 */
of_STORE_PROP_STR(vthread_t thr,vvp_code_t cp)6004 bool of_STORE_PROP_STR(vthread_t thr, vvp_code_t cp)
6005 {
6006 return store_prop<string>(thr, cp);
6007 }
6008
6009 /*
6010 * %store/prop/v <pid>, <wid>
6011 *
6012 * Store vector value into property <id> of cobject in the top of the
6013 * stack. Do NOT pop the object stack.
6014 */
of_STORE_PROP_V(vthread_t thr,vvp_code_t cp)6015 bool of_STORE_PROP_V(vthread_t thr, vvp_code_t cp)
6016 {
6017 return store_prop<vvp_vector4_t>(thr, cp, cp->bit_idx[0]);
6018 }
6019
6020 template <typename ELEM, class QTYPE>
store_qb(vthread_t thr,vvp_code_t cp,unsigned wid=0)6021 static bool store_qb(vthread_t thr, vvp_code_t cp, unsigned wid=0)
6022 {
6023 ELEM value;
6024 vvp_net_t*net = cp->net;
6025 unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
6026 pop_value(thr, value, wid); // Pop the value to store.
6027
6028 vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
6029 assert(queue);
6030 queue->push_back(value, max_size);
6031 return true;
6032 }
6033
6034 /*
6035 * %store/qb/r <var-label>, <max-idx>
6036 */
of_STORE_QB_R(vthread_t thr,vvp_code_t cp)6037 bool of_STORE_QB_R(vthread_t thr, vvp_code_t cp)
6038 {
6039 return store_qb<double, vvp_queue_real>(thr, cp);
6040 }
6041
6042 /*
6043 * %store/qb/str <var-label>, <max-idx>
6044 */
of_STORE_QB_STR(vthread_t thr,vvp_code_t cp)6045 bool of_STORE_QB_STR(vthread_t thr, vvp_code_t cp)
6046 {
6047 return store_qb<string, vvp_queue_string>(thr, cp);
6048 }
6049
6050 /*
6051 * %store/qb/v <var-label>, <max-idx>, <wid>
6052 */
of_STORE_QB_V(vthread_t thr,vvp_code_t cp)6053 bool of_STORE_QB_V(vthread_t thr, vvp_code_t cp)
6054 {
6055 return store_qb<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
6056 }
6057
6058 template <typename ELEM, class QTYPE>
store_qdar(vthread_t thr,vvp_code_t cp,unsigned wid=0)6059 static bool store_qdar(vthread_t thr, vvp_code_t cp, unsigned wid=0)
6060 {
6061 int64_t idx = thr->words[3].w_int;
6062 ELEM value;
6063 vvp_net_t*net = cp->net;
6064 unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
6065 pop_value(thr, value, wid); // Pop the value to store.
6066
6067 vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
6068 assert(queue);
6069 if (idx < 0) {
6070 cerr << thr->get_fileline()
6071 << "Warning: cannot assign to a negative "
6072 << get_queue_type(value)
6073 << " index (" << idx << "). ";
6074 print_queue_value(value);
6075 cerr << " was not added." << endl;
6076 } else if (thr->flags[4] != BIT4_0) {
6077 cerr << thr->get_fileline()
6078 << "Warning: cannot assign to an undefined "
6079 << get_queue_type(value) << " index. ";
6080 print_queue_value(value);
6081 cerr << " was not added." << endl;
6082 } else
6083 queue->set_word_max(idx, value, max_size);
6084 return true;
6085 }
6086
6087 /*
6088 * %store/qdar/r <var>, idx
6089 */
of_STORE_QDAR_R(vthread_t thr,vvp_code_t cp)6090 bool of_STORE_QDAR_R(vthread_t thr, vvp_code_t cp)
6091 {
6092 return store_qdar<double, vvp_queue_real>(thr, cp);
6093 }
6094
6095 /*
6096 * %store/qdar/str <var>, idx
6097 */
of_STORE_QDAR_STR(vthread_t thr,vvp_code_t cp)6098 bool of_STORE_QDAR_STR(vthread_t thr, vvp_code_t cp)
6099 {
6100 return store_qdar<string, vvp_queue_string>(thr, cp);
6101 }
6102
6103 /*
6104 * %store/qdar/v <var>, idx
6105 */
of_STORE_QDAR_V(vthread_t thr,vvp_code_t cp)6106 bool of_STORE_QDAR_V(vthread_t thr, vvp_code_t cp)
6107 {
6108 return store_qdar<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
6109 }
6110
6111 template <typename ELEM, class QTYPE>
store_qf(vthread_t thr,vvp_code_t cp,unsigned wid=0)6112 static bool store_qf(vthread_t thr, vvp_code_t cp, unsigned wid=0)
6113 {
6114 ELEM value;
6115 vvp_net_t*net = cp->net;
6116 unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
6117 pop_value(thr, value, wid); // Pop the value to store.
6118
6119 vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
6120 assert(queue);
6121 queue->push_front(value, max_size);
6122 return true;
6123 }
6124 /*
6125 * %store/qf/r <var-label>, <max-idx>
6126 */
of_STORE_QF_R(vthread_t thr,vvp_code_t cp)6127 bool of_STORE_QF_R(vthread_t thr, vvp_code_t cp)
6128 {
6129 return store_qf<double, vvp_queue_real>(thr, cp);
6130 }
6131
6132 /*
6133 * %store/qf/str <var-label>, <max-idx>
6134 */
of_STORE_QF_STR(vthread_t thr,vvp_code_t cp)6135 bool of_STORE_QF_STR(vthread_t thr, vvp_code_t cp)
6136 {
6137 return store_qf<string, vvp_queue_string>(thr, cp);
6138 }
6139
6140 /*
6141 * %store/qb/v <var-label>, <max-idx>, <wid>
6142 */
of_STORE_QF_V(vthread_t thr,vvp_code_t cp)6143 bool of_STORE_QF_V(vthread_t thr, vvp_code_t cp)
6144 {
6145 return store_qf<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
6146 }
6147
6148 template <typename ELEM, class QTYPE>
store_qobj(vthread_t thr,vvp_code_t cp,unsigned wid=0)6149 static bool store_qobj(vthread_t thr, vvp_code_t cp, unsigned wid=0)
6150 {
6151 // FIXME: Can we actually use wid here?
6152 (void)wid;
6153 vvp_net_t*net = cp->net;
6154 unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
6155
6156 vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
6157 assert(queue);
6158
6159 vvp_object_t src;
6160 thr->pop_object(src);
6161
6162 queue->copy_elems(src, max_size);
6163 return true;
6164 }
6165
of_STORE_QOBJ_R(vthread_t thr,vvp_code_t cp)6166 bool of_STORE_QOBJ_R(vthread_t thr, vvp_code_t cp)
6167 {
6168 return store_qobj<double, vvp_queue_real>(thr, cp);
6169 }
6170
of_STORE_QOBJ_STR(vthread_t thr,vvp_code_t cp)6171 bool of_STORE_QOBJ_STR(vthread_t thr, vvp_code_t cp)
6172 {
6173 return store_qobj<string, vvp_queue_string>(thr, cp);
6174 }
6175
of_STORE_QOBJ_V(vthread_t thr,vvp_code_t cp)6176 bool of_STORE_QOBJ_V(vthread_t thr, vvp_code_t cp)
6177 {
6178 return store_qobj<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
6179 }
6180
vvp_send(vthread_t thr,vvp_net_ptr_t ptr,double & val)6181 static void vvp_send(vthread_t thr, vvp_net_ptr_t ptr, double&val)
6182 {
6183 vvp_send_real(ptr, val, thr->wt_context);
6184 }
6185
vvp_send(vthread_t thr,vvp_net_ptr_t ptr,string & val)6186 static void vvp_send(vthread_t thr, vvp_net_ptr_t ptr, string&val)
6187 {
6188 vvp_send_string(ptr, val, thr->wt_context);
6189 }
6190
6191 template <typename ELEM>
store(vthread_t thr,vvp_code_t cp)6192 static bool store(vthread_t thr, vvp_code_t cp)
6193 {
6194 ELEM val;
6195 pop_value(thr, val, 0);
6196 /* set the value into port 0 of the destination. */
6197 vvp_net_ptr_t ptr (cp->net, 0);
6198 vvp_send(thr, ptr, val);
6199 return true;
6200 }
6201
of_STORE_REAL(vthread_t thr,vvp_code_t cp)6202 bool of_STORE_REAL(vthread_t thr, vvp_code_t cp)
6203 {
6204 return store<double>(thr, cp);
6205 }
6206
6207 template <typename ELEM>
storea(vthread_t thr,vvp_code_t cp)6208 static bool storea(vthread_t thr, vvp_code_t cp)
6209 {
6210 unsigned idx = cp->bit_idx[0];
6211 unsigned adr = thr->words[idx].w_int;
6212 ELEM val;
6213 pop_value(thr, val, 0);
6214
6215 if (thr->flags[4] != BIT4_1)
6216 cp->array->set_word(adr, val);
6217
6218 return true;
6219 }
6220
6221 /*
6222 * %store/reala <var-label> <index>
6223 */
of_STORE_REALA(vthread_t thr,vvp_code_t cp)6224 bool of_STORE_REALA(vthread_t thr, vvp_code_t cp)
6225 {
6226 return storea<double>(thr, cp);
6227 }
6228
of_STORE_STR(vthread_t thr,vvp_code_t cp)6229 bool of_STORE_STR(vthread_t thr, vvp_code_t cp)
6230 {
6231 return store<string>(thr, cp);
6232 }
6233
6234 /*
6235 * %store/stra <array-label> <index>
6236 */
of_STORE_STRA(vthread_t thr,vvp_code_t cp)6237 bool of_STORE_STRA(vthread_t thr, vvp_code_t cp)
6238 {
6239 return storea<string>(thr, cp);
6240 }
6241
6242 /*
6243 * %store/vec4 <var-label>, <offset>, <wid>
6244 *
6245 * <offset> is the index register that contains the base offset into
6246 * the destination. If zero, the offset of 0 is used instead of index
6247 * register zero. The offset value is SIGNED, and can be negative.
6248 *
6249 * <wid> is the actual width, an unsigned number.
6250 *
6251 * This function tests flag bit 4. If that flag is set, and <offset>
6252 * is an actual index register (not zero) then this assumes that the
6253 * calculation of the <offset> contents failed, and the store is
6254 * aborted.
6255 *
6256 * NOTE: This instruction may loose the <wid> argument because it is
6257 * not consistent with the %store/vec4/<etc> instructions which have
6258 * no <wid>.
6259 */
of_STORE_VEC4(vthread_t thr,vvp_code_t cp)6260 bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp)
6261 {
6262 vvp_net_ptr_t ptr(cp->net, 0);
6263 vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
6264 unsigned off_index = cp->bit_idx[0];
6265 int wid = cp->bit_idx[1];
6266
6267 int off = off_index? thr->words[off_index].w_int : 0;
6268 const int sig_value_size = sig->value_size();
6269
6270 vvp_vector4_t&val = thr->peek_vec4();
6271 unsigned val_size = val.size();
6272
6273 if ((int)val_size < wid) {
6274 cerr << thr->get_fileline()
6275 << "XXXX Internal error: val.size()=" << val_size
6276 << ", expecting >= " << wid << endl;
6277 }
6278 assert((int)val_size >= wid);
6279 if ((int)val_size > wid) {
6280 val.resize(wid);
6281 val_size = wid;
6282 }
6283
6284 // If there is a problem loading the index register, flags-4
6285 // will be set to 1, and we know here to skip the actual assignment.
6286 if (off_index!=0 && thr->flags[4] == BIT4_1) {
6287 thr->pop_vec4(1);
6288 return true;
6289 }
6290
6291 if (off <= -wid) {
6292 thr->pop_vec4(1);
6293 return true;
6294 }
6295 if (off >= sig_value_size) {
6296 thr->pop_vec4(1);
6297 return true;
6298 }
6299
6300 // If the index is below the vector, then only assign the high
6301 // bits that overlap with the target.
6302 if (off < 0) {
6303 int use_off = -off;
6304 wid -= use_off;
6305 val = val.subvalue(use_off, wid);
6306 val_size = wid;
6307 off = 0;
6308 }
6309
6310 // If the value is partly above the target, then only assign
6311 // the bits that overlap.
6312 if ((off+wid) > sig_value_size) {
6313 wid = sig_value_size - off;
6314 val = val.subvalue(0, wid);
6315 val.resize(wid);
6316 val_size = wid;
6317 }
6318
6319
6320 if (off==0 && val_size==(unsigned)sig_value_size)
6321 vvp_send_vec4(ptr, val, thr->wt_context);
6322 else
6323 vvp_send_vec4_pv(ptr, val, off, wid, sig_value_size, thr->wt_context);
6324
6325 thr->pop_vec4(1);
6326 return true;
6327 }
6328
6329 /*
6330 * %store/vec4a <var-label>, <addr>, <offset>
6331 */
of_STORE_VEC4A(vthread_t thr,vvp_code_t cp)6332 bool of_STORE_VEC4A(vthread_t thr, vvp_code_t cp)
6333 {
6334 unsigned adr_index = cp->bit_idx[0];
6335 unsigned off_index = cp->bit_idx[1];
6336
6337 vvp_vector4_t&value = thr->peek_vec4();
6338
6339 long adr = adr_index? thr->words[adr_index].w_int : 0;
6340 long off = off_index? thr->words[off_index].w_int : 0;
6341
6342 // Suppress action if flags-4 is true.
6343 if (thr->flags[4] == BIT4_1) {
6344 thr->pop_vec4(1);
6345 return true;
6346 }
6347
6348 cp->array->set_word(adr, off, value);
6349
6350 thr->pop_vec4(1);
6351 return true;
6352 }
6353
6354 /*
6355 * %sub
6356 * pop r;
6357 * pop l;
6358 * push l-r;
6359 */
of_SUB(vthread_t thr,vvp_code_t)6360 bool of_SUB(vthread_t thr, vvp_code_t)
6361 {
6362 vvp_vector4_t r = thr->pop_vec4();
6363 vvp_vector4_t&l = thr->peek_vec4();
6364
6365 l.sub(r);
6366 return true;
6367 }
6368
6369 /*
6370 * %subi <vala>, <valb>, <wid>
6371 *
6372 * Pop1 operand, get the other operand from the arguments, and push
6373 * the result.
6374 */
of_SUBI(vthread_t thr,vvp_code_t cp)6375 bool of_SUBI(vthread_t thr, vvp_code_t cp)
6376 {
6377 unsigned wid = cp->number;
6378
6379 vvp_vector4_t&l = thr->peek_vec4();
6380
6381 // I expect that most of the bits of an immediate value are
6382 // going to be zero, so start the result vector with all zero
6383 // bits. Then we only need to replace the bits that are different.
6384 vvp_vector4_t r (wid, BIT4_0);
6385 get_immediate_rval (cp, r);
6386
6387 l.sub(r);
6388
6389 return true;
6390
6391 }
6392
of_SUB_WR(vthread_t thr,vvp_code_t)6393 bool of_SUB_WR(vthread_t thr, vvp_code_t)
6394 {
6395 double r = thr->pop_real();
6396 double l = thr->pop_real();
6397 thr->push_real(l - r);
6398 return true;
6399 }
6400
6401 /*
6402 * %substr <first>, <last>
6403 * Pop a string, take the substring (SystemVerilog style), and return
6404 * the result to the stack. This opcode actually works by editing the
6405 * string in place.
6406 */
of_SUBSTR(vthread_t thr,vvp_code_t cp)6407 bool of_SUBSTR(vthread_t thr, vvp_code_t cp)
6408 {
6409 int32_t first = thr->words[cp->bit_idx[0]].w_int;
6410 int32_t last = thr->words[cp->bit_idx[1]].w_int;
6411 string&val = thr->peek_str(0);
6412
6413 if (first < 0 || last < first || last >= (int32_t)val.size()) {
6414 val = string("");
6415 return true;
6416 }
6417
6418 val = val.substr(first, last-first+1);
6419 return true;
6420 }
6421
6422 /*
6423 * %substr/vec4 <index>, <wid>
6424 */
of_SUBSTR_VEC4(vthread_t thr,vvp_code_t cp)6425 bool of_SUBSTR_VEC4(vthread_t thr, vvp_code_t cp)
6426 {
6427 unsigned sel_idx = cp->bit_idx[0];
6428 unsigned wid = cp->bit_idx[1];
6429
6430 int32_t sel = thr->words[sel_idx].w_int;
6431 string&val = thr->peek_str(0);
6432
6433 assert(wid%8 == 0);
6434
6435 if (sel < 0 || sel >= (int32_t)val.size()) {
6436 vvp_vector4_t res (wid, BIT4_0);
6437 thr->push_vec4(res);
6438 return true;
6439 }
6440
6441 vvp_vector4_t res (wid, BIT4_0);
6442
6443 assert(wid==8);
6444 unsigned char tmp = val[sel];
6445 for (int idx = 0 ; idx < 8 ; idx += 1) {
6446 if (tmp & (1<<idx))
6447 res.set_bit(idx, BIT4_1);
6448 }
6449
6450 thr->push_vec4(res);
6451 return true;
6452 }
6453
of_FILE_LINE(vthread_t thr,vvp_code_t cp)6454 bool of_FILE_LINE(vthread_t thr, vvp_code_t cp)
6455 {
6456 vpiHandle handle = cp->handle;
6457
6458 /* When it is available, keep the file/line information in the
6459 thread for error/warning messages. */
6460 thr->set_fileline(vpi_get_str(vpiFile, handle),
6461 vpi_get(vpiLineNo, handle));
6462
6463 if (show_file_line)
6464 cerr << thr->get_fileline()
6465 << vpi_get_str(_vpiDescription, handle) << endl;
6466
6467 return true;
6468 }
6469
6470 /*
6471 * %test_nul <var-label>;
6472 * Test if the object at the specified variable is nil. If so, write
6473 * "1" into flags[4], otherwise write "0" into flags[4].
6474 */
of_TEST_NUL(vthread_t thr,vvp_code_t cp)6475 bool of_TEST_NUL(vthread_t thr, vvp_code_t cp)
6476 {
6477 vvp_net_t*net = cp->net;
6478
6479 assert(net);
6480 vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
6481 assert(obj);
6482
6483 if (obj->get_object().test_nil())
6484 thr->flags[4] = BIT4_1;
6485 else
6486 thr->flags[4] = BIT4_0;
6487
6488 return true;
6489 }
6490
of_TEST_NUL_A(vthread_t thr,vvp_code_t cp)6491 bool of_TEST_NUL_A(vthread_t thr, vvp_code_t cp)
6492 {
6493 unsigned idx = cp->bit_idx[0];
6494 unsigned adr = thr->words[idx].w_int;
6495 vvp_object_t word;
6496
6497 /* If the address is undefined, return true. */
6498 if (thr->flags[4] == BIT4_1) {
6499 return true;
6500 }
6501
6502 cp->array->get_word_obj(adr, word);
6503 if (word.test_nil())
6504 thr->flags[4] = BIT4_1;
6505 else
6506 thr->flags[4] = BIT4_0;
6507
6508 return true;
6509 }
6510
of_TEST_NUL_OBJ(vthread_t thr,vvp_code_t)6511 bool of_TEST_NUL_OBJ(vthread_t thr, vvp_code_t)
6512 {
6513 if (thr->peek_object().test_nil())
6514 thr->flags[4] = BIT4_1;
6515 else
6516 thr->flags[4] = BIT4_0;
6517 return true;
6518 }
6519
6520 /*
6521 * %test_nul/prop <pid>, <idx>
6522 */
of_TEST_NUL_PROP(vthread_t thr,vvp_code_t cp)6523 bool of_TEST_NUL_PROP(vthread_t thr, vvp_code_t cp)
6524 {
6525 unsigned pid = cp->number;
6526 unsigned idx = cp->bit_idx[0];
6527
6528 if (idx != 0) {
6529 assert(idx < vthread_s::WORDS_COUNT);
6530 idx = thr->words[idx].w_uint;
6531 }
6532
6533 vvp_object_t&obj = thr->peek_object();
6534 vvp_cobject*cobj = obj.peek<vvp_cobject>();
6535
6536 vvp_object_t val;
6537 cobj->get_object(pid, val, idx);
6538
6539 if (val.test_nil())
6540 thr->flags[4] = BIT4_1;
6541 else
6542 thr->flags[4] = BIT4_0;
6543
6544 return true;
6545 }
6546
of_VPI_CALL(vthread_t thr,vvp_code_t cp)6547 bool of_VPI_CALL(vthread_t thr, vvp_code_t cp)
6548 {
6549 vpip_execute_vpi_call(thr, cp->handle);
6550
6551 if (schedule_stopped()) {
6552 if (! schedule_finished())
6553 schedule_vthread(thr, 0, false);
6554
6555 return false;
6556 }
6557
6558 return schedule_finished()? false : true;
6559 }
6560
6561 /* %wait <label>;
6562 * Implement the wait by locating the vvp_net_T for the event, and
6563 * adding this thread to the threads list for the event. The some
6564 * argument is the reference to the functor to wait for. This must be
6565 * an event object of some sort.
6566 */
of_WAIT(vthread_t thr,vvp_code_t cp)6567 bool of_WAIT(vthread_t thr, vvp_code_t cp)
6568 {
6569 assert(! thr->i_am_in_function);
6570 assert(! thr->waiting_for_event);
6571 thr->waiting_for_event = 1;
6572
6573 /* Add this thread to the list in the event. */
6574 waitable_hooks_s*ep = dynamic_cast<waitable_hooks_s*> (cp->net->fun);
6575 assert(ep);
6576 thr->wait_next = ep->add_waiting_thread(thr);
6577
6578 /* Return false to suspend this thread. */
6579 return false;
6580 }
6581
6582 /*
6583 * Implement the %wait/fork (SystemVerilog) instruction by suspending
6584 * the current thread until all the detached children have finished.
6585 */
of_WAIT_FORK(vthread_t thr,vvp_code_t)6586 bool of_WAIT_FORK(vthread_t thr, vvp_code_t)
6587 {
6588 /* If a %wait/fork is being executed then the parent thread
6589 * cannot be waiting in a join or already waiting. */
6590 assert(! thr->i_am_in_function);
6591 assert(! thr->i_am_joining);
6592 assert(! thr->i_am_waiting);
6593
6594 /* There should be no active children when waiting. */
6595 assert(thr->children.empty());
6596
6597 /* If there are no detached children then there is nothing to
6598 * wait for. */
6599 if (thr->detached_children.empty()) return true;
6600
6601 /* Flag that this process is waiting for the detached children
6602 * to finish and suspend it. */
6603 thr->i_am_waiting = 1;
6604 return false;
6605 }
6606
6607 /*
6608 * %xnor
6609 */
of_XNOR(vthread_t thr,vvp_code_t)6610 bool of_XNOR(vthread_t thr, vvp_code_t)
6611 {
6612 vvp_vector4_t valr = thr->pop_vec4();
6613 vvp_vector4_t&vall = thr->peek_vec4();
6614 assert(vall.size() == valr.size());
6615 unsigned wid = vall.size();
6616
6617 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
6618
6619 vvp_bit4_t lb = vall.value(idx);
6620 vvp_bit4_t rb = valr.value(idx);
6621 vall.set_bit(idx, ~(lb ^ rb));
6622 }
6623
6624 return true;
6625 }
6626
6627 /*
6628 * %xor
6629 */
of_XOR(vthread_t thr,vvp_code_t)6630 bool of_XOR(vthread_t thr, vvp_code_t)
6631 {
6632 vvp_vector4_t valr = thr->pop_vec4();
6633 vvp_vector4_t&vall = thr->peek_vec4();
6634 assert(vall.size() == valr.size());
6635 unsigned wid = vall.size();
6636
6637 for (unsigned idx = 0 ; idx < wid ; idx += 1) {
6638
6639 vvp_bit4_t lb = vall.value(idx);
6640 vvp_bit4_t rb = valr.value(idx);
6641 vall.set_bit(idx, lb ^ rb);
6642 }
6643
6644 return true;
6645 }
6646
6647
of_ZOMBIE(vthread_t thr,vvp_code_t)6648 bool of_ZOMBIE(vthread_t thr, vvp_code_t)
6649 {
6650 thr->pc = codespace_null();
6651 if ((thr->parent == 0) && (thr->children.empty())) {
6652 if (thr->delay_delete)
6653 schedule_del_thr(thr);
6654 else
6655 vthread_delete(thr);
6656 }
6657 return false;
6658 }
6659
6660 /*
6661 * This is a phantom opcode used to call user defined functions. It
6662 * is used in code generated by the .ufunc statement. It contains a
6663 * pointer to the executable code of the function and a pointer to
6664 * a ufunc_core object that has all the port information about the
6665 * function.
6666 */
do_exec_ufunc(vthread_t thr,vvp_code_t cp,vthread_t child)6667 static bool do_exec_ufunc(vthread_t thr, vvp_code_t cp, vthread_t child)
6668 {
6669 __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope();
6670 assert(child_scope);
6671
6672 assert(child_scope->get_type_code() == vpiFunction);
6673 assert(thr->children.empty());
6674
6675
6676 /* We can take a number of shortcuts because we know that a
6677 continuous assignment can only occur in a static scope. */
6678 assert(thr->wt_context == 0);
6679 assert(thr->rd_context == 0);
6680
6681 /* If an automatic function, allocate a context for this call. */
6682 vvp_context_t child_context = 0;
6683 if (child_scope->is_automatic()) {
6684 child_context = vthread_alloc_context(child_scope);
6685 thr->wt_context = child_context;
6686 thr->rd_context = child_context;
6687 }
6688
6689 child->wt_context = child_context;
6690 child->rd_context = child_context;
6691
6692 /* Copy all the inputs to the ufunc object to the port
6693 variables of the function. This copies all the values
6694 atomically. */
6695 cp->ufunc_core_ptr->assign_bits_to_ports(child_context);
6696 child->delay_delete = 1;
6697
6698 child->parent = thr;
6699 thr->children.insert(child);
6700 // This should be the only child
6701 assert(thr->children.size()==1);
6702
6703 child->is_scheduled = 1;
6704 child->i_am_in_function = 1;
6705 vthread_run(child);
6706 running_thread = thr;
6707
6708 if (child->i_have_ended) {
6709 do_join(thr, child);
6710 return true;
6711 } else {
6712 thr->i_am_joining = 1;
6713 return false;
6714 }
6715 }
6716
of_EXEC_UFUNC_REAL(vthread_t thr,vvp_code_t cp)6717 bool of_EXEC_UFUNC_REAL(vthread_t thr, vvp_code_t cp)
6718 {
6719 __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope();
6720 assert(child_scope);
6721
6722 /* Create a temporary thread and run it immediately. */
6723 vthread_t child = vthread_new(cp->cptr, child_scope);
6724 thr->push_real(0.0);
6725 child->args_real.push_back(0);
6726
6727 return do_exec_ufunc(thr, cp, child);
6728 }
6729
of_EXEC_UFUNC_VEC4(vthread_t thr,vvp_code_t cp)6730 bool of_EXEC_UFUNC_VEC4(vthread_t thr, vvp_code_t cp)
6731 {
6732 __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope();
6733 assert(child_scope);
6734
6735 vpiScopeFunction*scope_func = dynamic_cast<vpiScopeFunction*>(child_scope);
6736 assert(scope_func);
6737
6738 /* Create a temporary thread and run it immediately. */
6739 vthread_t child = vthread_new(cp->cptr, child_scope);
6740 thr->push_vec4(vvp_vector4_t(scope_func->get_func_width(), scope_func->get_func_init_val()));
6741 child->args_vec4.push_back(0);
6742
6743 return do_exec_ufunc(thr, cp, child);
6744 }
6745
6746 /*
6747 * This is a phantom opcode used to harvest the result of calling a user
6748 * defined function. It is used in code generated by the .ufunc statement.
6749 */
of_REAP_UFUNC(vthread_t thr,vvp_code_t cp)6750 bool of_REAP_UFUNC(vthread_t thr, vvp_code_t cp)
6751 {
6752 __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope();
6753 assert(child_scope);
6754
6755 /* Copy the output from the result variable to the output
6756 ports of the .ufunc device. */
6757 cp->ufunc_core_ptr->finish_thread();
6758
6759 /* If an automatic function, free the context for this call. */
6760 if (child_scope->is_automatic()) {
6761 vthread_free_context(thr->rd_context, child_scope);
6762 thr->wt_context = 0;
6763 thr->rd_context = 0;
6764 }
6765
6766 return true;
6767 }
6768