1 /*
2  * Copyright (c) 2001-2020 Stephen Williams (steve@icarus.com)
3  *
4  *    This source code is free software; you can redistribute it
5  *    and/or modify it in source code form under the terms of the GNU
6  *    General Public License as published by the Free Software
7  *    Foundation; either version 2 of the License, or (at your option)
8  *    any later version.
9  *
10  *    This program is distributed in the hope that it will be useful,
11  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *    GNU General Public License for more details.
14  *
15  *    You should have received a copy of the GNU General Public License
16  *    along with this program; if not, write to the Free Software
17  *    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18  */
19 
20 # include  "config.h"
21 # include  "vthread.h"
22 # include  "codes.h"
23 # include  "schedule.h"
24 # include  "ufunc.h"
25 # include  "event.h"
26 # include  "vpi_priv.h"
27 # include  "vvp_net_sig.h"
28 # include  "vvp_cobject.h"
29 # include  "vvp_darray.h"
30 # include  "class_type.h"
31 #ifdef CHECK_WITH_VALGRIND
32 # include  "vvp_cleanup.h"
33 #endif
34 # include  <set>
35 # include  <typeinfo>
36 # include  <vector>
37 # include  <cstdlib>
38 # include  <climits>
39 # include  <cstring>
40 # include  <cmath>
41 # include  <cassert>
42 
43 # include  <iostream>
44 # include  <sstream>
45 # include  <cstdio>
46 
47 using namespace std;
48 
49 /* This is the size of an unsigned long in bits. This is just a
50    convenience macro. */
51 # define CPU_WORD_BITS (8*sizeof(unsigned long))
52 # define TOP_BIT (1UL << (CPU_WORD_BITS-1))
53 
54 /*
55  * This vthread_s structure describes all there is to know about a
56  * thread, including its program counter, all the private bits it
57  * holds, and its place in other lists.
58  *
59  *
60  * ** Notes On The Interactions of %fork/%join/%end:
61  *
62  * The %fork instruction creates a new thread and pushes that into a
63  * set of children for the thread. This new thread, then, becomes a
64  * child of the current thread, and the current thread a parent of the
65  * new thread. Any child can be reaped by a %join.
66  *
67  * Children that are detached with %join/detach need to have a different
68  * parent/child relationship since the parent can still effect them if
69  * it uses the %disable/fork or %wait/fork opcodes. The i_am_detached
70  * flag and detached_children set are used for this relationship.
71  *
72  * It is a programming error for a thread that created threads to not
73  * %join (or %join/detach) as many as it created before it %ends. The
74  * children set will get messed up otherwise.
75  *
76  * the i_am_joining flag is a clue to children that the parent is
77  * blocked in a %join and may need to be scheduled. The %end
78  * instruction will check this flag in the parent to see if it should
79  * notify the parent that something is interesting.
80  *
81  * The i_have_ended flag, on the other hand, is used by threads to
82  * tell their parents that they are already dead. A thread that
83  * executes %end will set its own i_have_ended flag and let its parent
84  * reap it when the parent does the %join. If a thread has its
85  * schedule_parent_on_end flag set already when it %ends, then it
86  * reaps itself and simply schedules its parent. If a child has its
87  * i_have_ended flag set when a thread executes %join, then it is free
88  * to reap the child immediately.
89  */
90 
91 struct vthread_s {
92       vthread_s();
93 
94       void debug_dump(ostream&fd, const char*label_text);
95 
96 	/* This is the program counter. */
97       vvp_code_t pc;
98 	/* These hold the private thread bits. */
99       enum { FLAGS_COUNT = 512, WORDS_COUNT = 16 };
100       vvp_bit4_t flags[FLAGS_COUNT];
101 
102 	/* These are the word registers. */
103       union {
104 	    int64_t  w_int;
105 	    uint64_t w_uint;
106       } words[WORDS_COUNT];
107 
108 	// These vectors are depths within the parent thread's
109 	// corresponding stack.  This is how the %ret/* instructions
110 	// get at parent thread arguments.
111       vector<unsigned> args_real;
112       vector<unsigned> args_str;
113       vector<unsigned> args_vec4;
114 
115     private:
116       vector<vvp_vector4_t>stack_vec4_;
117     public:
pop_vec4vthread_s118       inline vvp_vector4_t pop_vec4(void)
119       {
120 	    assert(! stack_vec4_.empty());
121 	    vvp_vector4_t val = stack_vec4_.back();
122 	    stack_vec4_.pop_back();
123 	    return val;
124       }
push_vec4vthread_s125       inline void push_vec4(const vvp_vector4_t&val)
126       {
127 	    stack_vec4_.push_back(val);
128       }
peek_vec4vthread_s129       inline const vvp_vector4_t& peek_vec4(unsigned depth)
130       {
131 	    unsigned size = stack_vec4_.size();
132 	    assert(depth < size);
133 	    unsigned use_index = size-1-depth;
134 	    return stack_vec4_[use_index];
135       }
peek_vec4vthread_s136       inline vvp_vector4_t& peek_vec4(void)
137       {
138 	    unsigned use_index = stack_vec4_.size();
139 	    assert(use_index >= 1);
140 	    return stack_vec4_[use_index-1];
141       }
poke_vec4vthread_s142       inline void poke_vec4(unsigned depth, const vvp_vector4_t&val)
143       {
144 	    assert(depth < stack_vec4_.size());
145 	    unsigned use_index = stack_vec4_.size()-1-depth;
146 	    stack_vec4_[use_index] = val;
147       }
pop_vec4vthread_s148       inline void pop_vec4(unsigned cnt)
149       {
150 	    while (cnt > 0) {
151 		  stack_vec4_.pop_back();
152 		  cnt -= 1;
153 	    }
154       }
155 
156 
157     private:
158       vector<double> stack_real_;
159     public:
pop_realvthread_s160       inline double pop_real(void)
161       {
162 	    assert(! stack_real_.empty());
163 	    double val = stack_real_.back();
164 	    stack_real_.pop_back();
165 	    return val;
166       }
push_realvthread_s167       inline void push_real(double val)
168       {
169 	    stack_real_.push_back(val);
170       }
peek_realvthread_s171       inline double peek_real(unsigned depth)
172       {
173 	    assert(depth < stack_real_.size());
174 	    unsigned use_index = stack_real_.size()-1-depth;
175 	    return stack_real_[use_index];
176       }
poke_realvthread_s177       inline void poke_real(unsigned depth, double val)
178       {
179 	    assert(depth < stack_real_.size());
180 	    unsigned use_index = stack_real_.size()-1-depth;
181 	    stack_real_[use_index] = val;
182       }
pop_realvthread_s183       inline void pop_real(unsigned cnt)
184       {
185 	    while (cnt > 0) {
186 		  stack_real_.pop_back();
187 		  cnt -= 1;
188 	    }
189       }
190 
191 	/* Strings are operated on using a forth-like operator
192 	   set. Items at the top of the stack (back()) are the objects
193 	   operated on except for special cases. New objects are
194 	   pushed onto the top (back()) and pulled from the top
195 	   (back()) only. */
196     private:
197       vector<string> stack_str_;
198     public:
pop_strvthread_s199       inline string pop_str(void)
200       {
201 	    assert(! stack_str_.empty());
202 	    string val = stack_str_.back();
203 	    stack_str_.pop_back();
204 	    return val;
205       }
push_strvthread_s206       inline void push_str(const string&val)
207       {
208 	    stack_str_.push_back(val);
209       }
peek_strvthread_s210       inline string&peek_str(unsigned depth)
211       {
212 	    assert(depth<stack_str_.size());
213 	    unsigned use_index = stack_str_.size()-1-depth;
214 	    return stack_str_[use_index];
215       }
poke_strvthread_s216       inline void poke_str(unsigned depth, const string&val)
217       {
218 	    assert(depth < stack_str_.size());
219 	    unsigned use_index = stack_str_.size()-1-depth;
220 	    stack_str_[use_index] = val;
221       }
pop_strvthread_s222       inline void pop_str(unsigned cnt)
223       {
224 	    while (cnt > 0) {
225 		  stack_str_.pop_back();
226 		  cnt -= 1;
227 	    }
228       }
229 
230 	/* Objects are also operated on in a stack. */
231     private:
232       enum { STACK_OBJ_MAX_SIZE = 32 };
233       vvp_object_t stack_obj_[STACK_OBJ_MAX_SIZE];
234       unsigned stack_obj_size_;
235     public:
peek_objectvthread_s236       inline vvp_object_t& peek_object(void)
237       {
238 	    assert(stack_obj_size_ > 0);
239 	    return stack_obj_[stack_obj_size_-1];
240       }
pop_objectvthread_s241       inline void pop_object(vvp_object_t&obj)
242       {
243 	    assert(stack_obj_size_ > 0);
244 	    stack_obj_size_ -= 1;
245 	    obj = stack_obj_[stack_obj_size_];
246 	    stack_obj_[stack_obj_size_].reset(0);
247       }
pop_objectvthread_s248       inline void pop_object(unsigned cnt, unsigned skip =0)
249       {
250 	    assert((cnt+skip) <= stack_obj_size_);
251 	    for (size_t idx = stack_obj_size_-skip-cnt ; idx < stack_obj_size_-skip ; idx += 1)
252 		  stack_obj_[idx].reset(0);
253 	    stack_obj_size_ -= cnt;
254 	    for (size_t idx = stack_obj_size_-skip ; idx < stack_obj_size_ ; idx += 1)
255 		  stack_obj_[idx] = stack_obj_[idx+skip];
256 	    for (size_t idx = stack_obj_size_ ; idx < stack_obj_size_+skip ; idx += 1)
257 		  stack_obj_[idx].reset(0);
258       }
push_objectvthread_s259       inline void push_object(const vvp_object_t&obj)
260       {
261 	    assert(stack_obj_size_ < STACK_OBJ_MAX_SIZE);
262 	    stack_obj_[stack_obj_size_] = obj;
263 	    stack_obj_size_ += 1;
264       }
265 
266 	/* My parent sets this when it wants me to wake it up. */
267       unsigned i_am_joining      :1;
268       unsigned i_am_detached     :1;
269       unsigned i_am_waiting      :1;
270       unsigned i_am_in_function  :1; // True if running function code
271       unsigned i_have_ended      :1;
272       unsigned i_was_disabled    :1;
273       unsigned waiting_for_event :1;
274       unsigned is_scheduled      :1;
275       unsigned delay_delete      :1;
276 	/* This points to the children of the thread. */
277       set<struct vthread_s*>children;
278 	/* This points to the detached children of the thread. */
279       set<struct vthread_s*>detached_children;
280 	/* This points to my parent, if I have one. */
281       struct vthread_s*parent;
282 	/* This points to the containing scope. */
283       __vpiScope*parent_scope;
284 	/* This is used for keeping wait queues. */
285       struct vthread_s*wait_next;
286 	/* These are used to access automatically allocated items. */
287       vvp_context_t wt_context, rd_context;
288 	/* These are used to pass non-blocking event control information. */
289       vvp_net_t*event;
290       uint64_t ecount;
291 	/* Save the file/line information when available. */
292     private:
293       char *filenm_;
294       unsigned lineno_;
295     public:
296       void set_fileline(char *filenm, unsigned lineno);
297       string get_fileline();
298 
cleanupvthread_s299       inline void cleanup()
300       {
301 	    if (i_was_disabled) {
302 		  stack_vec4_.clear();
303 		  stack_real_.clear();
304 		  stack_str_.clear();
305 		  pop_object(stack_obj_size_);
306 	    }
307 	    free(filenm_);
308 	    filenm_ = 0;
309 	    assert(stack_vec4_.empty());
310 	    assert(stack_real_.empty());
311 	    assert(stack_str_.empty());
312 	    assert(stack_obj_size_ == 0);
313       }
314 };
315 
vthread_s()316 inline vthread_s::vthread_s()
317 {
318       stack_obj_size_ = 0;
319       filenm_ = 0;
320       lineno_ = 0;
321 }
322 
set_fileline(char * filenm,unsigned lineno)323 void vthread_s::set_fileline(char *filenm, unsigned lineno)
324 {
325       assert(filenm);
326       if (!filenm_ || (strcmp(filenm_, filenm) != 0)) {
327 	    free(filenm_);
328 	    filenm_ = strdup(filenm);
329       }
330       lineno_ = lineno;
331 }
332 
get_fileline()333 inline string vthread_s::get_fileline()
334 {
335       ostringstream buf;
336       if (filenm_) {
337 	    buf << filenm_ << ":" << lineno_ << ": ";
338       }
339       string res = buf.str();
340       return res;
341 }
342 
debug_dump(ostream & fd,const char * label)343 void vthread_s::debug_dump(ostream&fd, const char*label)
344 {
345       fd << "**** " << label << endl;
346       fd << "**** ThreadId: " << this << ", parent id: " << parent << endl;
347 
348       fd << "**** Flags: ";
349       for (int idx = 0 ; idx < FLAGS_COUNT ; idx += 1)
350 	    fd << flags[idx];
351       fd << endl;
352       fd << "**** vec4 stack..." << endl;
353       for (size_t idx = stack_vec4_.size() ; idx > 0 ; idx -= 1)
354 	    fd << "    " << (stack_vec4_.size()-idx) << ": " << stack_vec4_[idx-1] << endl;
355       fd << "**** str stack (" << stack_str_.size() << ")..." << endl;
356       fd << "**** obj stack (" << stack_obj_size_ << ")..." << endl;
357       fd << "**** args_vec4 array (" << args_vec4.size() << ")..." << endl;
358       for (size_t idx = 0 ; idx < args_vec4.size() ; idx += 1)
359 	    fd << "    " << idx << ": " << args_vec4[idx] << endl;
360       fd << "**** file/line (";
361       if (filenm_) fd << filenm_;
362       else fd << "<no file name>";
363       fd << ":" << lineno_ << ")" << endl;
364       fd << "**** Done ****" << endl;
365 }
366 
367 static void do_join(vthread_t thr, vthread_t child);
368 
vthread_scope(struct vthread_s * thr)369 __vpiScope* vthread_scope(struct vthread_s*thr)
370 {
371       return thr->parent_scope;
372 }
373 
374 struct vthread_s*running_thread = 0;
375 
get_fileline()376 string get_fileline()
377 {
378       return running_thread->get_fileline();
379 }
380 
vthread_push(struct vthread_s * thr,double val)381 void vthread_push(struct vthread_s*thr, double val)
382 {
383       thr->push_real(val);
384 }
385 
vthread_push(struct vthread_s * thr,const string & val)386 void vthread_push(struct vthread_s*thr, const string&val)
387 {
388       thr->push_str(val);
389 }
390 
vthread_push(struct vthread_s * thr,const vvp_vector4_t & val)391 void vthread_push(struct vthread_s*thr, const vvp_vector4_t&val)
392 {
393       thr->push_vec4(val);
394 }
395 
vthread_pop_real(struct vthread_s * thr,unsigned depth)396 void vthread_pop_real(struct vthread_s*thr, unsigned depth)
397 {
398       thr->pop_real(depth);
399 }
400 
vthread_pop_str(struct vthread_s * thr,unsigned depth)401 void vthread_pop_str(struct vthread_s*thr, unsigned depth)
402 {
403       thr->pop_str(depth);
404 }
405 
vthread_pop_vec4(struct vthread_s * thr,unsigned depth)406 void vthread_pop_vec4(struct vthread_s*thr, unsigned depth)
407 {
408       thr->pop_vec4(depth);
409 }
410 
vthread_get_real_stack(struct vthread_s * thr,unsigned depth)411 double vthread_get_real_stack(struct vthread_s*thr, unsigned depth)
412 {
413       return thr->peek_real(depth);
414 }
415 
vthread_get_str_stack(struct vthread_s * thr,unsigned depth)416 const string&vthread_get_str_stack(struct vthread_s*thr, unsigned depth)
417 {
418       return thr->peek_str(depth);
419 }
420 
vthread_get_vec4_stack(struct vthread_s * thr,unsigned depth)421 const vvp_vector4_t& vthread_get_vec4_stack(struct vthread_s*thr, unsigned depth)
422 {
423       return thr->peek_vec4(depth);
424 }
425 
426 /*
427  * Some thread management functions
428  */
429 /*
430  * This is a function to get a vvp_queue handle from the variable
431  * referenced by "net". If the queue is nil, then allocated it and
432  * assign the value to the net. Note that this function is
433  * parameterized by the queue type so that we can create the right
434  * derived type of queue object.
435  */
get_queue_object(vthread_t thr,vvp_net_t * net)436 template <class VVP_QUEUE> static vvp_queue*get_queue_object(vthread_t thr, vvp_net_t*net)
437 {
438       vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
439       assert(obj);
440 
441       vvp_queue*queue = obj->get_object().peek<vvp_queue>();
442       if (queue == 0) {
443 	    assert(obj->get_object().test_nil());
444 	    queue = new VVP_QUEUE;
445 	    vvp_object_t val (queue);
446 	    vvp_net_ptr_t ptr (net, 0);
447 	    vvp_send_object(ptr, val, thr->wt_context);
448       }
449 
450       return queue;
451 }
452 
453 /*
454  * The following are used to allow a common template to be written for
455  * queue real/string/vec4 operations
456  */
pop_value(vthread_t thr,double & value,unsigned)457 inline static void pop_value(vthread_t thr, double&value, unsigned)
458 {
459       value = thr->pop_real();
460 }
461 
pop_value(vthread_t thr,string & value,unsigned)462 inline static void pop_value(vthread_t thr, string&value, unsigned)
463 {
464       value = thr->pop_str();
465 }
466 
pop_value(vthread_t thr,vvp_vector4_t & value,unsigned wid)467 inline static void pop_value(vthread_t thr, vvp_vector4_t&value, unsigned wid)
468 {
469       value = thr->pop_vec4();
470       assert(value.size() == wid);
471 }
472 
473 /*
474  * The following are used to allow the queue templates to print correctly.
475  */
get_queue_type(double &)476 inline static string get_queue_type(double&)
477 {
478       return "queue<real>";
479 }
480 
get_queue_type(string &)481 inline static string get_queue_type(string&)
482 {
483       return "queue<string>";
484 }
485 
get_queue_type(vvp_vector4_t value)486 inline static string get_queue_type(vvp_vector4_t value)
487 {
488       ostringstream buf;
489       buf << "queue<vector[" << value.size() << "]>";
490       string res = buf.str();
491       return res;
492 }
493 
print_queue_value(double value)494 inline static void print_queue_value(double value)
495 {
496       cerr << value;
497 }
498 
print_queue_value(string value)499 inline static void print_queue_value(string value)
500 {
501       cerr << "\"" << value << "\"";
502 }
503 
print_queue_value(vvp_vector4_t value)504 inline static void print_queue_value(vvp_vector4_t value)
505 {
506       cerr << value;
507 }
508 
509 /*
510  * The following are used to get a darray/queue default value.
511  */
dq_default(double & value,unsigned)512 inline static void dq_default(double&value, unsigned)
513 {
514       value = 0.0;
515 }
516 
dq_default(string & value,unsigned)517 inline static void dq_default(string&value, unsigned)
518 {
519       value = "";
520 }
521 
dq_default(vvp_vector4_t & value,unsigned wid)522 inline static void dq_default(vvp_vector4_t&value, unsigned wid)
523 {
524       value = vvp_vector4_t(wid);
525 }
526 
527 
coerce_to_width(const T & that,unsigned width)528 template <class T> T coerce_to_width(const T&that, unsigned width)
529 {
530       if (that.size() == width)
531 	    return that;
532 
533       assert(that.size() > width);
534       T res (width);
535       for (unsigned idx = 0 ;  idx < width ;  idx += 1)
536 	    res.set_bit(idx, that.value(idx));
537 
538       return res;
539 }
540 
541 /* Explicitly define the vvp_vector4_t version of coerce_to_width(). */
542 template vvp_vector4_t coerce_to_width(const vvp_vector4_t&that,
543                                        unsigned width);
544 
545 
multiply_array_imm(unsigned long * res,unsigned long * val,unsigned words,unsigned long imm)546 static void multiply_array_imm(unsigned long*res, unsigned long*val,
547 			       unsigned words, unsigned long imm)
548 {
549       for (unsigned idx = 0 ; idx < words ; idx += 1)
550 	    res[idx] = 0;
551 
552       for (unsigned mul_idx = 0 ; mul_idx < words ; mul_idx += 1) {
553 	    unsigned long sum;
554 	    unsigned long tmp = multiply_with_carry(val[mul_idx], imm, sum);
555 
556 	    unsigned long carry = 0;
557 	    res[mul_idx] = add_with_carry(res[mul_idx], tmp, carry);
558 	    for (unsigned add_idx = mul_idx+1 ; add_idx < words ; add_idx += 1) {
559 		  res[add_idx] = add_with_carry(res[add_idx], sum, carry);
560 		  sum = 0;
561 	    }
562       }
563 }
564 
565 /*
566  * Allocate a context for use by a child thread. By preference, use
567  * the last freed context. If none available, create a new one. Add
568  * it to the list of live contexts in that scope.
569  */
vthread_alloc_context(__vpiScope * scope)570 static vvp_context_t vthread_alloc_context(__vpiScope*scope)
571 {
572       assert(scope->is_automatic());
573 
574       vvp_context_t context = scope->free_contexts;
575       if (context) {
576             scope->free_contexts = vvp_get_next_context(context);
577             for (unsigned idx = 0 ; idx < scope->nitem ; idx += 1) {
578                   scope->item[idx]->reset_instance(context);
579             }
580       } else {
581             context = vvp_allocate_context(scope->nitem);
582             for (unsigned idx = 0 ; idx < scope->nitem ; idx += 1) {
583                   scope->item[idx]->alloc_instance(context);
584             }
585       }
586 
587       vvp_set_next_context(context, scope->live_contexts);
588       scope->live_contexts = context;
589 
590       return context;
591 }
592 
593 /*
594  * Free a context previously allocated to a child thread by pushing it
595  * onto the freed context stack. Remove it from the list of live contexts
596  * in that scope.
597  */
vthread_free_context(vvp_context_t context,__vpiScope * scope)598 static void vthread_free_context(vvp_context_t context, __vpiScope*scope)
599 {
600       assert(scope->is_automatic());
601       assert(context);
602 
603       if (context == scope->live_contexts) {
604             scope->live_contexts = vvp_get_next_context(context);
605       } else {
606             vvp_context_t tmp = scope->live_contexts;
607             while (context != vvp_get_next_context(tmp)) {
608                   assert(tmp);
609                   tmp = vvp_get_next_context(tmp);
610             }
611             vvp_set_next_context(tmp, vvp_get_next_context(context));
612       }
613 
614       vvp_set_next_context(context, scope->free_contexts);
615       scope->free_contexts = context;
616 }
617 
618 #ifdef CHECK_WITH_VALGRIND
contexts_delete(struct __vpiScope * scope)619 void contexts_delete(struct __vpiScope*scope)
620 {
621       vvp_context_t context = scope->free_contexts;
622 
623       while (context) {
624 	    scope->free_contexts = vvp_get_next_context(context);
625 	    for (unsigned idx = 0; idx < scope->nitem; idx += 1) {
626 		  scope->item[idx]->free_instance(context);
627 	    }
628 	    free(context);
629 	    context = scope->free_contexts;
630       }
631       free(scope->item);
632 }
633 #endif
634 
635 /*
636  * Create a new thread with the given start address.
637  */
vthread_new(vvp_code_t pc,__vpiScope * scope)638 vthread_t vthread_new(vvp_code_t pc, __vpiScope*scope)
639 {
640       vthread_t thr = new struct vthread_s;
641       thr->pc     = pc;
642 	//thr->bits4  = vvp_vector4_t(32);
643       thr->parent = 0;
644       thr->parent_scope = scope;
645       thr->wait_next = 0;
646       thr->wt_context = 0;
647       thr->rd_context = 0;
648 
649       thr->i_am_joining  = 0;
650       thr->i_am_detached = 0;
651       thr->i_am_waiting  = 0;
652       thr->i_am_in_function = 0;
653       thr->is_scheduled  = 0;
654       thr->i_have_ended  = 0;
655       thr->i_was_disabled = 0;
656       thr->delay_delete  = 0;
657       thr->waiting_for_event = 0;
658       thr->event  = 0;
659       thr->ecount = 0;
660 
661       thr->flags[0] = BIT4_0;
662       thr->flags[1] = BIT4_1;
663       thr->flags[2] = BIT4_X;
664       thr->flags[3] = BIT4_Z;
665       for (int idx = 4 ; idx < 8 ; idx += 1)
666 	    thr->flags[idx] = BIT4_X;
667 
668       scope->threads .insert(thr);
669       return thr;
670 }
671 
672 #ifdef CHECK_WITH_VALGRIND
673 #if 0
674 /*
675  * These are not currently correct. If you use them you will get
676  * double delete messages. There is still a leak related to a
677  * waiting event that needs to be investigated.
678  */
679 
680 static void wait_next_delete(vthread_t base)
681 {
682       while (base) {
683 	    vthread_t tmp = base->wait_next;
684 	    delete base;
685 	    base = tmp;
686 	    if (base->waiting_for_event == 0) break;
687       }
688 }
689 
690 static void child_delete(vthread_t base)
691 {
692       while (base) {
693 	    vthread_t tmp = base->child;
694 	    delete base;
695 	    base = tmp;
696       }
697 }
698 #endif
699 
vthreads_delete(struct __vpiScope * scope)700 void vthreads_delete(struct __vpiScope*scope)
701 {
702       for (std::set<vthread_t>::iterator cur = scope->threads.begin()
703 		 ; cur != scope->threads.end() ; ++ cur ) {
704 	    delete *cur;
705       }
706       scope->threads.clear();
707 }
708 #endif
709 
710 /*
711  * Reaping pulls the thread out of the stack of threads. If I have a
712  * child, then hand it over to my parent or fully detach it.
713  */
vthread_reap(vthread_t thr)714 static void vthread_reap(vthread_t thr)
715 {
716       if (! thr->children.empty()) {
717 	    for (set<vthread_t>::iterator cur = thr->children.begin()
718 		       ; cur != thr->children.end() ; ++cur) {
719 		  vthread_t child = *cur;
720 		  assert(child);
721 		  assert(child->parent == thr);
722 		  child->parent = thr->parent;
723 	    }
724       }
725       if (! thr->detached_children.empty()) {
726 	    for (set<vthread_t>::iterator cur = thr->detached_children.begin()
727 		       ; cur != thr->detached_children.end() ; ++cur) {
728 		  vthread_t child = *cur;
729 		  assert(child);
730 		  assert(child->parent == thr);
731 		  assert(child->i_am_detached);
732 		  child->parent = 0;
733 		  child->i_am_detached = 0;
734 	    }
735       }
736       if (thr->parent) {
737 	      /* assert that the given element was removed. */
738 	    if (thr->i_am_detached) {
739 		  size_t res = thr->parent->detached_children.erase(thr);
740 		  assert(res == 1);
741 	    } else {
742 		  size_t res = thr->parent->children.erase(thr);
743 		  assert(res == 1);
744 	    }
745       }
746 
747       thr->parent = 0;
748 
749 	// Remove myself from the containing scope if needed.
750       thr->parent_scope->threads.erase(thr);
751 
752       thr->pc = codespace_null();
753 
754 	/* If this thread is not scheduled, then is it safe to delete
755 	   it now. Otherwise, let the schedule event (which will
756 	   execute the thread at of_ZOMBIE) delete the object. */
757       if ((thr->is_scheduled == 0) && (thr->waiting_for_event == 0)) {
758 	    assert(thr->children.empty());
759 	    assert(thr->wait_next == 0);
760 	    if (thr->delay_delete)
761 		  schedule_del_thr(thr);
762 	    else
763 		  vthread_delete(thr);
764       }
765 }
766 
vthread_delete(vthread_t thr)767 void vthread_delete(vthread_t thr)
768 {
769       thr->cleanup();
770       delete thr;
771 }
772 
vthread_mark_scheduled(vthread_t thr)773 void vthread_mark_scheduled(vthread_t thr)
774 {
775       while (thr != 0) {
776 	    assert(thr->is_scheduled == 0);
777 	    thr->is_scheduled = 1;
778 	    thr = thr->wait_next;
779       }
780 }
781 
vthread_delay_delete()782 void vthread_delay_delete()
783 {
784       if (running_thread)
785 	    running_thread->delay_delete = 1;
786 }
787 
788 /*
789  * This function runs each thread by fetching an instruction,
790  * incrementing the PC, and executing the instruction. The thread may
791  * be the head of a list, so each thread is run so far as possible.
792  */
vthread_run(vthread_t thr)793 void vthread_run(vthread_t thr)
794 {
795       while (thr != 0) {
796 	    vthread_t tmp = thr->wait_next;
797 	    thr->wait_next = 0;
798 
799 	    assert(thr->is_scheduled);
800 	    thr->is_scheduled = 0;
801 
802             running_thread = thr;
803 
804 	    for (;;) {
805 		  vvp_code_t cp = thr->pc;
806 		  thr->pc += 1;
807 
808 		    /* Run the opcode implementation. If the execution of
809 		       the opcode returns false, then the thread is meant to
810 		       be paused, so break out of the loop. */
811 		  bool rc = (cp->opcode)(thr, cp);
812 		  if (rc == false)
813 			break;
814 	    }
815 
816 	    thr = tmp;
817       }
818       running_thread = 0;
819 }
820 
821 /*
822  * The CHUNK_LINK instruction is a special next pointer for linking
823  * chunks of code space. It's like a simplified %jmp.
824  */
of_CHUNK_LINK(vthread_t thr,vvp_code_t code)825 bool of_CHUNK_LINK(vthread_t thr, vvp_code_t code)
826 {
827       assert(code->cptr);
828       thr->pc = code->cptr;
829       return true;
830 }
831 
832 /*
833  * This is called by an event functor to wake up all the threads on
834  * its list. I in fact created that list in the %wait instruction, and
835  * I also am certain that the waiting_for_event flag is set.
836  */
vthread_schedule_list(vthread_t thr)837 void vthread_schedule_list(vthread_t thr)
838 {
839       for (vthread_t cur = thr ;  cur ;  cur = cur->wait_next) {
840 	    assert(cur->waiting_for_event);
841 	    cur->waiting_for_event = 0;
842       }
843 
844       schedule_vthread(thr, 0);
845 }
846 
vthread_get_wt_context()847 vvp_context_t vthread_get_wt_context()
848 {
849       if (running_thread)
850             return running_thread->wt_context;
851       else
852             return 0;
853 }
854 
vthread_get_rd_context()855 vvp_context_t vthread_get_rd_context()
856 {
857       if (running_thread)
858             return running_thread->rd_context;
859       else
860             return 0;
861 }
862 
vthread_get_wt_context_item(unsigned context_idx)863 vvp_context_item_t vthread_get_wt_context_item(unsigned context_idx)
864 {
865       assert(running_thread && running_thread->wt_context);
866       return vvp_get_context_item(running_thread->wt_context, context_idx);
867 }
868 
vthread_get_rd_context_item(unsigned context_idx)869 vvp_context_item_t vthread_get_rd_context_item(unsigned context_idx)
870 {
871       assert(running_thread && running_thread->rd_context);
872       return vvp_get_context_item(running_thread->rd_context, context_idx);
873 }
874 
875 /*
876  * %abs/wr
877  */
of_ABS_WR(vthread_t thr,vvp_code_t)878 bool of_ABS_WR(vthread_t thr, vvp_code_t)
879 {
880       thr->push_real( fabs(thr->pop_real()) );
881       return true;
882 }
883 
of_ALLOC(vthread_t thr,vvp_code_t cp)884 bool of_ALLOC(vthread_t thr, vvp_code_t cp)
885 {
886         /* Allocate a context. */
887       vvp_context_t child_context = vthread_alloc_context(cp->scope);
888 
889         /* Push the allocated context onto the write context stack. */
890       vvp_set_stacked_context(child_context, thr->wt_context);
891       thr->wt_context = child_context;
892 
893       return true;
894 }
895 
of_AND(vthread_t thr,vvp_code_t)896 bool of_AND(vthread_t thr, vvp_code_t)
897 {
898       vvp_vector4_t valb = thr->pop_vec4();
899       vvp_vector4_t&vala = thr->peek_vec4();
900       assert(vala.size() == valb.size());
901       vala &= valb;
902       return true;
903 }
904 
905 /*
906  * This function must ALWAYS be called with the val set to the right
907  * size, and initialized with BIT4_0 bits. Certain optimizations rely
908  * on that.
909  */
get_immediate_rval(vvp_code_t cp,vvp_vector4_t & val)910 static void get_immediate_rval(vvp_code_t cp, vvp_vector4_t&val)
911 {
912       uint32_t vala = cp->bit_idx[0];
913       uint32_t valb = cp->bit_idx[1];
914       unsigned wid  = cp->number;
915 
916       if (valb == 0) {
917 	      // Special case: if the value is zero, we are done
918 	      // before we start.
919 	    if (vala == 0) return;
920 
921 	      // Special case: The value has no X/Z bits, so we can
922 	      // use the setarray method to write the value all at once.
923 	    unsigned use_wid = 8*sizeof(unsigned long);
924 	    if (wid < use_wid)
925 		  use_wid = wid;
926 	    unsigned long tmp[1];
927 	    tmp[0] = vala;
928 	    val.setarray(0, use_wid, tmp);
929 	    return;
930       }
931 
932 	// The immediate value can be values bigger then 32 bits, but
933 	// only if the high bits are zero. So at most we need to run
934 	// through the loop below 32 times. Maybe less, if the target
935 	// width is less. We don't have to do anything special on that
936 	// because vala/valb bits will shift away so (vala|valb) will
937 	// turn to zero at or before 32 shifts.
938 
939       for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
940 	    uint32_t ba = 0;
941 	      // Convert the vala/valb bits to a ba number that
942 	      // matches the encoding of the vvp_bit4_t enumeration.
943 	    ba = (valb & 1) << 1;
944 	    ba |= vala & 1;
945 
946 	      // Note that the val is already pre-filled with BIT4_0
947 	      // bits, os we only need to set non-zero bit values.
948 	    if (ba) val.set_bit(idx, (vvp_bit4_t)ba);
949 
950 	    vala >>= 1;
951 	    valb >>= 1;
952       }
953 }
954 
955 /*
956  * %add
957  *
958  * Pop r,
959  * Pop l,
960  * Push l+r
961  *
962  * Pop 2 and push 1 is the same as pop 1 and replace the remaining top
963  * of the stack with a new value. That is what we will do.
964  */
of_ADD(vthread_t thr,vvp_code_t)965 bool of_ADD(vthread_t thr, vvp_code_t)
966 {
967       vvp_vector4_t r = thr->pop_vec4();
968 	// Rather then pop l, use it directly from the stack. When we
969 	// assign to 'l', that will edit the top of the stack, which
970 	// replaces a pop and a pull.
971       vvp_vector4_t&l = thr->peek_vec4();
972 
973       l.add(r);
974 
975       return true;
976 }
977 
978 /*
979  * %addi <vala>, <valb>, <wid>
980  *
981  * Pop1 operand, get the other operand from the arguments, and push
982  * the result.
983  */
of_ADDI(vthread_t thr,vvp_code_t cp)984 bool of_ADDI(vthread_t thr, vvp_code_t cp)
985 {
986       unsigned wid = cp->number;
987 
988       vvp_vector4_t&l = thr->peek_vec4();
989 
990 	// I expect that most of the bits of an immediate value are
991 	// going to be zero, so start the result vector with all zero
992 	// bits. Then we only need to replace the bits that are different.
993       vvp_vector4_t r (wid, BIT4_0);
994       get_immediate_rval (cp, r);
995 
996       l.add(r);
997 
998       return true;
999 }
1000 
1001 /*
1002  * %add/wr
1003  */
of_ADD_WR(vthread_t thr,vvp_code_t)1004 bool of_ADD_WR(vthread_t thr, vvp_code_t)
1005 {
1006       double r = thr->pop_real();
1007       double l = thr->pop_real();
1008       thr->push_real(l + r);
1009       return true;
1010 }
1011 
1012 /* %assign/ar <array>, <delay>
1013  * Generate an assignment event to a real array. Index register 3
1014  * contains the canonical address of the word in the memory. <delay>
1015  * is the delay in simulation time. <bit> is the index register
1016  * containing the real value.
1017  */
of_ASSIGN_AR(vthread_t thr,vvp_code_t cp)1018 bool of_ASSIGN_AR(vthread_t thr, vvp_code_t cp)
1019 {
1020       long adr = thr->words[3].w_int;
1021       unsigned delay = cp->bit_idx[0];
1022       double value = thr->pop_real();
1023 
1024       if (adr >= 0) {
1025 	    schedule_assign_array_word(cp->array, adr, value, delay);
1026       }
1027 
1028       return true;
1029 }
1030 
1031 /* %assign/ar/d <array>, <delay_idx>
1032  * Generate an assignment event to a real array. Index register 3
1033  * contains the canonical address of the word in the memory.
1034  * <delay_idx> is the integer register that contains the delay value.
1035  */
of_ASSIGN_ARD(vthread_t thr,vvp_code_t cp)1036 bool of_ASSIGN_ARD(vthread_t thr, vvp_code_t cp)
1037 {
1038       long adr = thr->words[3].w_int;
1039       vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
1040       double value = thr->pop_real();
1041 
1042       if (adr >= 0) {
1043 	    schedule_assign_array_word(cp->array, adr, value, delay);
1044       }
1045 
1046       return true;
1047 }
1048 
1049 /* %assign/ar/e <array>
1050  * Generate an assignment event to a real array. Index register 3
1051  * contains the canonical address of the word in the memory. <bit>
1052  * is the index register containing the real value. The event
1053  * information is contained in the thread event control registers
1054  * and is set with %evctl.
1055  */
of_ASSIGN_ARE(vthread_t thr,vvp_code_t cp)1056 bool of_ASSIGN_ARE(vthread_t thr, vvp_code_t cp)
1057 {
1058       long adr = thr->words[3].w_int;
1059       double value = thr->pop_real();
1060 
1061       if (adr >= 0) {
1062 	    if (thr->ecount == 0) {
1063 		  schedule_assign_array_word(cp->array, adr, value, 0);
1064 	    } else {
1065 		  schedule_evctl(cp->array, adr, value, thr->event,
1066 		                 thr->ecount);
1067 	    }
1068       }
1069 
1070       return true;
1071 }
1072 
1073 /*
1074  * %assign/vec4 <var>, <delay>
1075  */
of_ASSIGN_VEC4(vthread_t thr,vvp_code_t cp)1076 bool of_ASSIGN_VEC4(vthread_t thr, vvp_code_t cp)
1077 {
1078       vvp_net_ptr_t ptr (cp->net, 0);
1079       unsigned delay = cp->bit_idx[0];
1080       vvp_vector4_t&val = thr->peek_vec4();
1081 
1082       schedule_assign_vector(ptr, 0, 0, val, delay);
1083       thr->pop_vec4(1);
1084       return true;
1085 }
1086 
1087 /*
1088  * %assign/vec4/a/d <arr>, <offx>, <delx>
1089  */
of_ASSIGN_VEC4_A_D(vthread_t thr,vvp_code_t cp)1090 bool of_ASSIGN_VEC4_A_D(vthread_t thr, vvp_code_t cp)
1091 {
1092       int off_idx = cp->bit_idx[0];
1093       int del_idx = cp->bit_idx[1];
1094       int adr_idx = 3;
1095 
1096       long     off = off_idx? thr->words[off_idx].w_int  : 0;
1097       vvp_time64_t del = del_idx? thr->words[del_idx].w_uint : 0;
1098       long     adr = thr->words[adr_idx].w_int;
1099 
1100       vvp_vector4_t val = thr->pop_vec4();
1101       unsigned wid = val.size();
1102       const unsigned array_wid = cp->array->get_word_size();
1103 
1104 	// Abort if flags[4] is set. This can happen if the calculation
1105 	// into an index register failed.
1106       if (thr->flags[4] == BIT4_1)
1107 	    return true;
1108 
1109       if (off >= (long)array_wid)
1110 	    return true;
1111       if (off < 0) {
1112 	    if ((unsigned)-off >= array_wid)
1113 		  return true;
1114 
1115 	    int use_off = -off;
1116 	    assert(wid > (unsigned)use_off);
1117 	    unsigned use_wid = wid - use_off;
1118 	    val = val.subvalue(use_off, use_wid);
1119 	    off = 0;
1120 	    wid = use_wid;
1121       }
1122       if (off+wid > array_wid) {
1123 	    val = val.subvalue(0, array_wid-off);
1124 	    wid = val.size();
1125       }
1126 
1127       schedule_assign_array_word(cp->array, adr, off, val, del);
1128 
1129       return true;
1130 }
1131 
1132 /*
1133  * %assign/vec4/a/e <arr>, <offx>
1134  */
of_ASSIGN_VEC4_A_E(vthread_t thr,vvp_code_t cp)1135 bool of_ASSIGN_VEC4_A_E(vthread_t thr, vvp_code_t cp)
1136 {
1137       int off_idx = cp->bit_idx[0];
1138       int adr_idx = 3;
1139 
1140       long     off = off_idx? thr->words[off_idx].w_int  : 0;
1141       long     adr = thr->words[adr_idx].w_int;
1142 
1143       vvp_vector4_t val = thr->pop_vec4();
1144       unsigned wid = val.size();
1145       const unsigned array_wid = cp->array->get_word_size();
1146 
1147 	// Abort if flags[4] is set. This can happen if the calculation
1148 	// into an index register failed.
1149       if (thr->flags[4] == BIT4_1)
1150 	    return true;
1151 
1152       if (off >= (long)array_wid)
1153 	    return true;
1154       if (off < 0) {
1155 	    if ((unsigned)-off >= array_wid)
1156 		  return true;
1157 
1158 	    int use_off = -off;
1159 	    assert(wid > (unsigned)use_off);
1160 	    unsigned use_wid = wid - use_off;
1161 	    val = val.subvalue(use_off, use_wid);
1162 	    off = 0;
1163 	    wid = use_wid;
1164       }
1165       if (off+wid > array_wid) {
1166 	    val = val.subvalue(0, array_wid-off);
1167 	    wid = val.size();
1168       }
1169 
1170       if (thr->ecount == 0) {
1171 	    schedule_assign_array_word(cp->array, adr, off, val, 0);
1172       } else {
1173 	    schedule_evctl(cp->array, adr, val, off, thr->event, thr->ecount);
1174       }
1175 
1176       return true;
1177 }
1178 
1179 /*
1180  * %assign/vec4/off/d <var>, <off>, <del>
1181  */
of_ASSIGN_VEC4_OFF_D(vthread_t thr,vvp_code_t cp)1182 bool of_ASSIGN_VEC4_OFF_D(vthread_t thr, vvp_code_t cp)
1183 {
1184       vvp_net_ptr_t ptr (cp->net, 0);
1185       unsigned off_index = cp->bit_idx[0];
1186       unsigned del_index = cp->bit_idx[1];
1187       vvp_vector4_t val = thr->pop_vec4();
1188       unsigned wid = val.size();
1189 
1190       int off = thr->words[off_index].w_int;
1191       vvp_time64_t del = thr->words[del_index].w_uint;
1192 
1193 	// Abort if flags[4] is set. This can happen if the calculation
1194 	// into an index register failed.
1195       if (thr->flags[4] == BIT4_1)
1196 	    return true;
1197 
1198       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
1199       assert(sig);
1200 
1201       if (off >= (long)sig->value_size())
1202 	    return true;
1203       if (off < 0) {
1204 	    if ((unsigned)-off >= wid)
1205 		  return true;
1206 
1207 	    int use_off = -off;
1208 	    assert(wid > (unsigned)use_off);
1209 	    unsigned use_wid = wid - use_off;
1210 	    val = val.subvalue(use_off, use_wid);
1211 	    off = 0;
1212 	    wid = use_wid;
1213       }
1214       if (off+wid > sig->value_size()) {
1215 	    val = val.subvalue(0, sig->value_size()-off);
1216 	    wid = val.size();
1217       }
1218 
1219       schedule_assign_vector(ptr, off, sig->value_size(), val, del);
1220       return true;
1221 }
1222 
1223 /*
1224  * %assign/vec4/off/e <var>, <off>
1225  */
of_ASSIGN_VEC4_OFF_E(vthread_t thr,vvp_code_t cp)1226 bool of_ASSIGN_VEC4_OFF_E(vthread_t thr, vvp_code_t cp)
1227 {
1228       vvp_net_ptr_t ptr (cp->net, 0);
1229       unsigned off_index = cp->bit_idx[0];
1230       vvp_vector4_t val = thr->pop_vec4();
1231       unsigned wid = val.size();
1232 
1233       int off = thr->words[off_index].w_int;
1234 
1235 	// Abort if flags[4] is set. This can happen if the calculation
1236 	// into an index register failed.
1237       if (thr->flags[4] == BIT4_1)
1238 	    return true;
1239 
1240       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
1241       assert(sig);
1242 
1243       if (off >= (long)sig->value_size())
1244 	    return true;
1245       if (off < 0) {
1246 	    if ((unsigned)-off >= wid)
1247 		  return true;
1248 
1249 	    int use_off = -off;
1250 	    assert((int)wid > use_off);
1251 	    unsigned use_wid = wid - use_off;
1252 	    val = val.subvalue(use_off, use_wid);
1253 	    off = 0;
1254 	    wid = use_wid;
1255       }
1256       if (off+wid > sig->value_size()) {
1257 	    val = val.subvalue(0, sig->value_size()-off);
1258 	    wid = val.size();
1259       }
1260 
1261       if (thr->ecount == 0) {
1262 	    schedule_assign_vector(ptr, off, sig->value_size(), val, 0);
1263       } else {
1264 	    schedule_evctl(ptr, val, off, sig->value_size(), thr->event, thr->ecount);
1265       }
1266 
1267       return true;
1268 }
1269 
1270 /*
1271  * %assign/vec4/d <var-label> <delay>
1272  */
of_ASSIGN_VEC4D(vthread_t thr,vvp_code_t cp)1273 bool of_ASSIGN_VEC4D(vthread_t thr, vvp_code_t cp)
1274 {
1275       vvp_net_ptr_t ptr (cp->net, 0);
1276       unsigned del_index = cp->bit_idx[0];
1277       vvp_time64_t del = thr->words[del_index].w_int;
1278 
1279       vvp_vector4_t value = thr->pop_vec4();
1280 
1281       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
1282       assert(sig);
1283 
1284       schedule_assign_vector(ptr, 0, sig->value_size(), value, del);
1285 
1286       return true;
1287 }
1288 
1289 /*
1290  * %assign/vec4/e <var-label>
1291  */
of_ASSIGN_VEC4E(vthread_t thr,vvp_code_t cp)1292 bool of_ASSIGN_VEC4E(vthread_t thr, vvp_code_t cp)
1293 {
1294       vvp_net_ptr_t ptr (cp->net, 0);
1295       vvp_vector4_t value = thr->pop_vec4();
1296 
1297       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
1298       assert(sig);
1299 
1300       if (thr->ecount == 0) {
1301 	    schedule_assign_vector(ptr, 0, sig->value_size(), value, 0);
1302       } else {
1303 	    schedule_evctl(ptr, value, 0, sig->value_size(), thr->event, thr->ecount);
1304       }
1305 
1306       thr->event = 0;
1307       thr->ecount = 0;
1308       return true;
1309 }
1310 
1311 /*
1312  * This is %assign/wr <vpi-label>, <delay>
1313  *
1314  * This assigns (after a delay) a value to a real variable. Use the
1315  * vpi_put_value function to do the assign, with the delay written
1316  * into the vpiInertialDelay carrying the desired delay.
1317  */
of_ASSIGN_WR(vthread_t thr,vvp_code_t cp)1318 bool of_ASSIGN_WR(vthread_t thr, vvp_code_t cp)
1319 {
1320       unsigned delay = cp->bit_idx[0];
1321       double value = thr->pop_real();
1322       s_vpi_time del;
1323 
1324       del.type = vpiSimTime;
1325       vpip_time_to_timestruct(&del, delay);
1326 
1327       __vpiHandle*tmp = cp->handle;
1328 
1329       t_vpi_value val;
1330       val.format = vpiRealVal;
1331       val.value.real = value;
1332       vpi_put_value(tmp, &val, &del, vpiTransportDelay);
1333 
1334       return true;
1335 }
1336 
of_ASSIGN_WRD(vthread_t thr,vvp_code_t cp)1337 bool of_ASSIGN_WRD(vthread_t thr, vvp_code_t cp)
1338 {
1339       vvp_time64_t delay = thr->words[cp->bit_idx[0]].w_uint;
1340       double value = thr->pop_real();
1341       s_vpi_time del;
1342 
1343       del.type = vpiSimTime;
1344       vpip_time_to_timestruct(&del, delay);
1345 
1346       __vpiHandle*tmp = cp->handle;
1347 
1348       t_vpi_value val;
1349       val.format = vpiRealVal;
1350       val.value.real = value;
1351       vpi_put_value(tmp, &val, &del, vpiTransportDelay);
1352 
1353       return true;
1354 }
1355 
of_ASSIGN_WRE(vthread_t thr,vvp_code_t cp)1356 bool of_ASSIGN_WRE(vthread_t thr, vvp_code_t cp)
1357 {
1358       assert(thr->event != 0);
1359       double value = thr->pop_real();
1360       __vpiHandle*tmp = cp->handle;
1361 
1362 	// If the count is zero then just put the value.
1363       if (thr->ecount == 0) {
1364 	    t_vpi_value val;
1365 
1366 	    val.format = vpiRealVal;
1367 	    val.value.real = value;
1368 	    vpi_put_value(tmp, &val, 0, vpiNoDelay);
1369       } else {
1370 	    schedule_evctl(tmp, value, thr->event, thr->ecount);
1371       }
1372 
1373       thr->event = 0;
1374       thr->ecount = 0;
1375 
1376       return true;
1377 }
1378 
of_BLEND(vthread_t thr,vvp_code_t)1379 bool of_BLEND(vthread_t thr, vvp_code_t)
1380 {
1381       vvp_vector4_t vala = thr->pop_vec4();
1382       vvp_vector4_t valb = thr->pop_vec4();
1383       assert(vala.size() == valb.size());
1384 
1385       for (unsigned idx = 0 ; idx < vala.size() ; idx += 1) {
1386 	    if (vala.value(idx) == valb.value(idx))
1387 		  continue;
1388 
1389 	    vala.set_bit(idx, BIT4_X);
1390       }
1391 
1392       thr->push_vec4(vala);
1393       return true;
1394 }
1395 
of_BLEND_WR(vthread_t thr,vvp_code_t)1396 bool of_BLEND_WR(vthread_t thr, vvp_code_t)
1397 {
1398       double f = thr->pop_real();
1399       double t = thr->pop_real();
1400       thr->push_real((t == f) ? t : 0.0);
1401       return true;
1402 }
1403 
of_BREAKPOINT(vthread_t,vvp_code_t)1404 bool of_BREAKPOINT(vthread_t, vvp_code_t)
1405 {
1406       return true;
1407 }
1408 
1409 /*
1410  * %callf/void <code-label>, <scope-label>
1411  * Combine the %fork and %join steps for invoking a function.
1412  */
do_callf_void(vthread_t thr,vthread_t child)1413 static bool do_callf_void(vthread_t thr, vthread_t child)
1414 {
1415 
1416       if (child->parent_scope->is_automatic()) {
1417 	      /* The context allocated for this child is the top entry
1418 		 on the write context stack */
1419 	    child->wt_context = thr->wt_context;
1420 	    child->rd_context = thr->wt_context;
1421       }
1422 
1423         // Mark the function thread as a direct child of the current thread.
1424       child->parent = thr;
1425       thr->children.insert(child);
1426         // This should be the only child
1427       assert(thr->children.size()==1);
1428 
1429         // Execute the function. This SHOULD run the function to completion,
1430         // but there are some exceptional situations where it won't.
1431       assert(child->parent_scope->get_type_code() == vpiFunction);
1432       child->is_scheduled = 1;
1433       child->i_am_in_function = 1;
1434       vthread_run(child);
1435       running_thread = thr;
1436 
1437       if (child->i_have_ended) {
1438 	    do_join(thr, child);
1439 	    return true;
1440       } else {
1441 	    thr->i_am_joining = 1;
1442 	    return false;
1443       }
1444 }
1445 
of_CALLF_OBJ(vthread_t thr,vvp_code_t cp)1446 bool of_CALLF_OBJ(vthread_t thr, vvp_code_t cp)
1447 {
1448       vthread_t child = vthread_new(cp->cptr2, cp->scope);
1449       return do_callf_void(thr, child);
1450 
1451       // XXXX NOT IMPLEMENTED
1452 }
1453 
of_CALLF_REAL(vthread_t thr,vvp_code_t cp)1454 bool of_CALLF_REAL(vthread_t thr, vvp_code_t cp)
1455 {
1456       vthread_t child = vthread_new(cp->cptr2, cp->scope);
1457 
1458 	// This is the return value. Push a place-holder value. The function
1459 	// will replace this with the actual value using a %ret/real instruction.
1460       thr->push_real(0.0);
1461       child->args_real.push_back(0);
1462 
1463       return do_callf_void(thr, child);
1464 }
1465 
of_CALLF_STR(vthread_t thr,vvp_code_t cp)1466 bool of_CALLF_STR(vthread_t thr, vvp_code_t cp)
1467 {
1468       vthread_t child = vthread_new(cp->cptr2, cp->scope);
1469 
1470       thr->push_str("");
1471       child->args_str.push_back(0);
1472 
1473       return do_callf_void(thr, child);
1474 }
1475 
of_CALLF_VEC4(vthread_t thr,vvp_code_t cp)1476 bool of_CALLF_VEC4(vthread_t thr, vvp_code_t cp)
1477 {
1478       vthread_t child = vthread_new(cp->cptr2, cp->scope);
1479 
1480       vpiScopeFunction*scope_func = dynamic_cast<vpiScopeFunction*>(cp->scope);
1481       assert(scope_func);
1482 
1483 	// This is the return value. Push a place-holder value. The function
1484 	// will replace this with the actual value using a %ret/real instruction.
1485       thr->push_vec4(vvp_vector4_t(scope_func->get_func_width(), scope_func->get_func_init_val()));
1486       child->args_vec4.push_back(0);
1487 
1488       return do_callf_void(thr, child);
1489 }
1490 
of_CALLF_VOID(vthread_t thr,vvp_code_t cp)1491 bool of_CALLF_VOID(vthread_t thr, vvp_code_t cp)
1492 {
1493       vthread_t child = vthread_new(cp->cptr2, cp->scope);
1494       return do_callf_void(thr, child);
1495 }
1496 
1497 /*
1498  * The %cassign/link instruction connects a source node to a
1499  * destination node. The destination node must be a signal, as it is
1500  * marked with the source of the cassign so that it may later be
1501  * unlinked without specifically knowing the source that this
1502  * instruction used.
1503  */
of_CASSIGN_LINK(vthread_t,vvp_code_t cp)1504 bool of_CASSIGN_LINK(vthread_t, vvp_code_t cp)
1505 {
1506       vvp_net_t*dst = cp->net;
1507       vvp_net_t*src = cp->net2;
1508 
1509       vvp_fun_signal_base*sig
1510 	    = dynamic_cast<vvp_fun_signal_base*>(dst->fun);
1511       assert(sig);
1512 
1513 	/* Any previous continuous assign should have been removed already. */
1514       assert(sig->cassign_link == 0);
1515 
1516       sig->cassign_link = src;
1517 
1518 	/* Link the output of the src to the port[1] (the cassign
1519 	   port) of the destination. */
1520       vvp_net_ptr_t dst_ptr (dst, 1);
1521       src->link(dst_ptr);
1522 
1523       return true;
1524 }
1525 
1526 /*
1527  * If there is an existing continuous assign linked to the destination
1528  * node, unlink it. This must be done before applying a new continuous
1529  * assign, otherwise the initial assigned value will be propagated to
1530  * any other nodes driven by the old continuous assign source.
1531  */
cassign_unlink(vvp_net_t * dst)1532 static void cassign_unlink(vvp_net_t*dst)
1533 {
1534       vvp_fun_signal_base*sig
1535 	    = dynamic_cast<vvp_fun_signal_base*>(dst->fun);
1536       assert(sig);
1537 
1538       if (sig->cassign_link == 0)
1539 	    return;
1540 
1541       vvp_net_ptr_t tmp (dst, 1);
1542       sig->cassign_link->unlink(tmp);
1543       sig->cassign_link = 0;
1544 }
1545 
1546 /*
1547  * The %cassign/v instruction invokes a continuous assign of a
1548  * constant value to a signal. The instruction arguments are:
1549  *
1550  *     %cassign/vec4 <net>;
1551  *
1552  * Where the <net> is the net label assembled into a vvp_net pointer,
1553  * and the <base> and <wid> are stashed in the bit_idx array.
1554  *
1555  * This instruction writes vvp_vector4_t values to port-1 of the
1556  * target signal.
1557  */
of_CASSIGN_VEC4(vthread_t thr,vvp_code_t cp)1558 bool of_CASSIGN_VEC4(vthread_t thr, vvp_code_t cp)
1559 {
1560       vvp_net_t*net = cp->net;
1561       vvp_vector4_t value = thr->pop_vec4();
1562 
1563 	/* Remove any previous continuous assign to this net. */
1564       cassign_unlink(net);
1565 
1566 	/* Set the value into port 1 of the destination. */
1567       vvp_net_ptr_t ptr (net, 1);
1568       vvp_send_vec4(ptr, value, 0);
1569 
1570       return true;
1571 }
1572 
1573 /*
1574  * %cassign/vec4/off <var>, <off>
1575  */
of_CASSIGN_VEC4_OFF(vthread_t thr,vvp_code_t cp)1576 bool of_CASSIGN_VEC4_OFF(vthread_t thr, vvp_code_t cp)
1577 {
1578       vvp_net_t*net = cp->net;
1579       unsigned base_idx = cp->bit_idx[0];
1580       long base = thr->words[base_idx].w_int;
1581       vvp_vector4_t value = thr->pop_vec4();
1582       unsigned wid = value.size();
1583 
1584       if (thr->flags[4] == BIT4_1)
1585 	    return true;
1586 
1587 	/* Remove any previous continuous assign to this net. */
1588       cassign_unlink(net);
1589 
1590       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (net->fil);
1591       assert(sig);
1592 
1593       if (base < 0 && (wid <= (unsigned)-base))
1594 	    return true;
1595 
1596       if (base >= (long)sig->value_size())
1597 	    return true;
1598 
1599       if (base < 0) {
1600 	    wid -= (unsigned) -base;
1601 	    base = 0;
1602 	    value.resize(wid);
1603       }
1604 
1605       if (base+wid > sig->value_size()) {
1606 	    wid = sig->value_size() - base;
1607 	    value.resize(wid);
1608       }
1609 
1610       vvp_net_ptr_t ptr (net, 1);
1611       vvp_send_vec4_pv(ptr, value, base, wid, sig->value_size(), 0);
1612       return true;
1613 }
1614 
of_CASSIGN_WR(vthread_t thr,vvp_code_t cp)1615 bool of_CASSIGN_WR(vthread_t thr, vvp_code_t cp)
1616 {
1617       vvp_net_t*net  = cp->net;
1618       double value = thr->pop_real();
1619 
1620 	/* Remove any previous continuous assign to this net. */
1621       cassign_unlink(net);
1622 
1623 	/* Set the value into port 1 of the destination. */
1624       vvp_net_ptr_t ptr (net, 1);
1625       vvp_send_real(ptr, value, 0);
1626 
1627       return true;
1628 }
1629 
1630 /*
1631  * %cast2
1632  */
of_CAST2(vthread_t thr,vvp_code_t)1633 bool of_CAST2(vthread_t thr, vvp_code_t)
1634 {
1635       vvp_vector4_t&val = thr->peek_vec4();
1636       unsigned wid = val.size();
1637 
1638       for (unsigned idx = 0 ; idx < wid ; idx += 1) {
1639 	    switch (val.value(idx)) {
1640 		case BIT4_0:
1641 		case BIT4_1:
1642 		  break;
1643 		default:
1644 		  val.set_bit(idx, BIT4_0);
1645 		  break;
1646 	    }
1647       }
1648 
1649       return true;
1650 }
1651 
do_cast_vec_dar(vthread_t thr,vvp_code_t cp,bool as_vec4)1652 bool do_cast_vec_dar(vthread_t thr, vvp_code_t cp, bool as_vec4)
1653 {
1654       unsigned wid = cp->number;
1655 
1656       vvp_object_t obj;
1657       thr->pop_object(obj);
1658 
1659       vvp_darray*darray = obj.peek<vvp_darray>();
1660       assert(darray);
1661 
1662       vvp_vector4_t vec = darray->get_bitstream(as_vec4);
1663       if (vec.size() != wid) {
1664 	    cerr << thr->get_fileline()
1665 	         << "VVP error: size mismatch when casting dynamic array to vector." << endl;
1666             thr->push_vec4(vvp_vector4_t(wid));
1667             schedule_stop(0);
1668             return false;
1669       }
1670       thr->push_vec4(vec);
1671       return true;
1672 }
1673 
1674 /*
1675  * %cast/vec2/dar <wid>
1676  */
of_CAST_VEC2_DAR(vthread_t thr,vvp_code_t cp)1677 bool of_CAST_VEC2_DAR(vthread_t thr, vvp_code_t cp)
1678 {
1679       return do_cast_vec_dar(thr, cp, false);
1680 }
1681 
1682 /*
1683  * %cast/vec4/dar <wid>
1684  */
of_CAST_VEC4_DAR(vthread_t thr,vvp_code_t cp)1685 bool of_CAST_VEC4_DAR(vthread_t thr, vvp_code_t cp)
1686 {
1687       return do_cast_vec_dar(thr, cp, true);
1688 }
1689 
1690 /*
1691  * %cast/vec4/str <wid>
1692  */
of_CAST_VEC4_STR(vthread_t thr,vvp_code_t cp)1693 bool of_CAST_VEC4_STR(vthread_t thr, vvp_code_t cp)
1694 {
1695       unsigned wid = cp->number;
1696       string str = thr->pop_str();
1697 
1698       vvp_vector4_t vec(wid, BIT4_0);
1699 
1700       if (wid != 8*str.length()) {
1701 	    cerr << thr->get_fileline()
1702 	         << "VVP error: size mismatch when casting string to vector." << endl;
1703             thr->push_vec4(vec);
1704             schedule_stop(0);
1705             return false;
1706       }
1707 
1708       unsigned sdx = 0;
1709       unsigned vdx = wid;
1710       while (vdx > 0) {
1711             char ch = str[sdx++];
1712             vdx -= 8;
1713             for (unsigned bdx = 0; bdx < 8; bdx += 1) {
1714                   if (ch & 1)
1715                         vec.set_bit(vdx+bdx, BIT4_1);
1716                   ch >>= 1;
1717             }
1718       }
1719 
1720       thr->push_vec4(vec);
1721       return true;
1722 }
1723 
do_CMPE(vthread_t thr,const vvp_vector4_t & lval,const vvp_vector4_t & rval)1724 static void do_CMPE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
1725 {
1726       assert(rval.size() == lval.size());
1727 
1728       if (lval.has_xz() || rval.has_xz()) {
1729 
1730 	    unsigned wid = lval.size();
1731 	    vvp_bit4_t eq  = BIT4_1;
1732 	    vvp_bit4_t eeq = BIT4_1;
1733 
1734 	    for (unsigned idx = 0 ; idx < wid ; idx += 1) {
1735 		  vvp_bit4_t lv = lval.value(idx);
1736 		  vvp_bit4_t rv = rval.value(idx);
1737 
1738 		  if (lv != rv)
1739 			eeq = BIT4_0;
1740 
1741 		  if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
1742 			eq = BIT4_X;
1743 		  if ((lv == BIT4_0) && (rv==BIT4_1))
1744 			eq = BIT4_0;
1745 		  if ((lv == BIT4_1) && (rv==BIT4_0))
1746 			eq = BIT4_0;
1747 
1748 		  if (eq == BIT4_0)
1749 			break;
1750 	    }
1751 
1752 	    thr->flags[4] = eq;
1753 	    thr->flags[6] = eeq;
1754 
1755       } else {
1756 	      // If there are no XZ bits anywhere, then the results of
1757 	      // == match the === test.
1758 	    thr->flags[4] = thr->flags[6] = (lval.eeq(rval)? BIT4_1 : BIT4_0);
1759       }
1760 }
1761 
1762 /*
1763  *  %cmp/e
1764  *
1765  * Pop the operands from the stack, and do not replace them. The
1766  * results are written to flag bits:
1767  *
1768  *	4: eq  (equal)
1769  *
1770  *	6: eeq (case equal)
1771  */
of_CMPE(vthread_t thr,vvp_code_t)1772 bool of_CMPE(vthread_t thr, vvp_code_t)
1773 {
1774 	// We are going to pop these and push nothing in their
1775 	// place, but for now it is more efficient to use a constant
1776 	// reference. When we finish, pop the stack without copies.
1777       const vvp_vector4_t&rval = thr->peek_vec4(0);
1778       const vvp_vector4_t&lval = thr->peek_vec4(1);
1779 
1780       do_CMPE(thr, lval, rval);
1781 
1782       thr->pop_vec4(2);
1783       return true;
1784 }
1785 
of_CMPNE(vthread_t thr,vvp_code_t)1786 bool of_CMPNE(vthread_t thr, vvp_code_t)
1787 {
1788 	// We are going to pop these and push nothing in their
1789 	// place, but for now it is more efficient to use a constant
1790 	// reference. When we finish, pop the stack without copies.
1791       const vvp_vector4_t&rval = thr->peek_vec4(0);
1792       const vvp_vector4_t&lval = thr->peek_vec4(1);
1793 
1794       do_CMPE(thr, lval, rval);
1795 
1796       thr->flags[4] =  ~thr->flags[4];
1797       thr->flags[6] =  ~thr->flags[6];
1798 
1799       thr->pop_vec4(2);
1800       return true;
1801 }
1802 
1803 /*
1804  * %cmpi/e <vala>, <valb>, <wid>
1805  *
1806  * Pop1 operand, get the other operand from the arguments.
1807  */
of_CMPIE(vthread_t thr,vvp_code_t cp)1808 bool of_CMPIE(vthread_t thr, vvp_code_t cp)
1809 {
1810       unsigned wid = cp->number;
1811 
1812       vvp_vector4_t&lval = thr->peek_vec4();
1813 
1814 	// I expect that most of the bits of an immediate value are
1815 	// going to be zero, so start the result vector with all zero
1816 	// bits. Then we only need to replace the bits that are different.
1817       vvp_vector4_t rval (wid, BIT4_0);
1818       get_immediate_rval (cp, rval);
1819 
1820       do_CMPE(thr, lval, rval);
1821 
1822       thr->pop_vec4(1);
1823       return true;
1824 }
1825 
of_CMPINE(vthread_t thr,vvp_code_t cp)1826 bool of_CMPINE(vthread_t thr, vvp_code_t cp)
1827 {
1828       unsigned wid = cp->number;
1829 
1830       vvp_vector4_t&lval = thr->peek_vec4();
1831 
1832 	// I expect that most of the bits of an immediate value are
1833 	// going to be zero, so start the result vector with all zero
1834 	// bits. Then we only need to replace the bits that are different.
1835       vvp_vector4_t rval (wid, BIT4_0);
1836       get_immediate_rval (cp, rval);
1837 
1838       do_CMPE(thr, lval, rval);
1839 
1840       thr->flags[4] =  ~thr->flags[4];
1841       thr->flags[6] =  ~thr->flags[6];
1842 
1843       thr->pop_vec4(1);
1844       return true;
1845 }
1846 
1847 
1848 
do_CMPS(vthread_t thr,const vvp_vector4_t & lval,const vvp_vector4_t & rval)1849 static void do_CMPS(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
1850 {
1851       assert(rval.size() == lval.size());
1852 
1853 	// If either value has XZ bits, then the eq and lt values are
1854 	// known already to be X. Just calculate the eeq result as a
1855 	// special case and short circuit the rest of the compare.
1856       if (lval.has_xz() || rval.has_xz()) {
1857 	    thr->flags[4] = BIT4_X; // eq
1858 	    thr->flags[5] = BIT4_X; // lt
1859 	    thr->flags[6] = lval.eeq(rval)? BIT4_1 : BIT4_0;
1860 	    return;
1861       }
1862 
1863 	// Past this point, we know we are dealing only with fully
1864 	// defined values.
1865       unsigned wid = lval.size();
1866 
1867       const vvp_bit4_t sig1 = lval.value(wid-1);
1868       const vvp_bit4_t sig2 = rval.value(wid-1);
1869 
1870 	// If the lval is <0 and the rval is >=0, then we know the result.
1871       if ((sig1 == BIT4_1) && (sig2 == BIT4_0)) {
1872 	    thr->flags[4] = BIT4_0; // eq;
1873 	    thr->flags[5] = BIT4_1; // lt;
1874 	    thr->flags[6] = BIT4_0; // eeq
1875 	    return;
1876       }
1877 
1878 	// If the lval is >=0 and the rval is <0, then we know the result.
1879       if ((sig1 == BIT4_0) && (sig2 == BIT4_1)) {
1880 	    thr->flags[4] = BIT4_0; // eq;
1881 	    thr->flags[5] = BIT4_0; // lt;
1882 	    thr->flags[6] = BIT4_0; // eeq
1883 	    return;
1884       }
1885 
1886 	// The values have the same sign, so we have to look at the
1887 	// actual value. Scan from the MSB down. As soon as we find a
1888 	// bit that differs, we know the result.
1889 
1890       for (unsigned idx = 1 ;  idx < wid ;  idx += 1) {
1891 	    vvp_bit4_t lv = lval.value(wid-1-idx);
1892 	    vvp_bit4_t rv = rval.value(wid-1-idx);
1893 
1894 	    if (lv == rv)
1895 		  continue;
1896 
1897 	    thr->flags[4] = BIT4_0; // eq
1898 	    thr->flags[6] = BIT4_0; // eeq
1899 
1900 	    if (lv==BIT4_0) {
1901 		  thr->flags[5] = BIT4_1; // lt
1902 	    } else {
1903 		  thr->flags[5] = BIT4_0; // lt
1904 	    }
1905 	    return;
1906       }
1907 
1908 	// If we survive the loop above, then the values must be equal.
1909       thr->flags[4] = BIT4_1;
1910       thr->flags[5] = BIT4_0;
1911       thr->flags[6] = BIT4_1;
1912 }
1913 
1914 /*
1915  *  %cmp/s
1916  *
1917  * Pop the operands from the stack, and do not replace them. The
1918  * results are written to flag bits:
1919  *
1920  *	4: eq  (equal)
1921  *	5: lt  (less than)
1922  *	6: eeq (case equal)
1923  */
of_CMPS(vthread_t thr,vvp_code_t)1924 bool of_CMPS(vthread_t thr, vvp_code_t)
1925 {
1926 	// We are going to pop these and push nothing in their
1927 	// place, but for now it is more efficient to use a constant
1928 	// reference. When we finish, pop the stack without copies.
1929       const vvp_vector4_t&rval = thr->peek_vec4(0);
1930       const vvp_vector4_t&lval = thr->peek_vec4(1);
1931 
1932       do_CMPS(thr, lval, rval);
1933 
1934       thr->pop_vec4(2);
1935       return true;
1936 }
1937 
1938 /*
1939  * %cmpi/s <vala>, <valb>, <wid>
1940  *
1941  * Pop1 operand, get the other operand from the arguments.
1942  */
of_CMPIS(vthread_t thr,vvp_code_t cp)1943 bool of_CMPIS(vthread_t thr, vvp_code_t cp)
1944 {
1945       unsigned wid = cp->number;
1946 
1947       vvp_vector4_t&lval = thr->peek_vec4();
1948 
1949 	// I expect that most of the bits of an immediate value are
1950 	// going to be zero, so start the result vector with all zero
1951 	// bits. Then we only need to replace the bits that are different.
1952       vvp_vector4_t rval (wid, BIT4_0);
1953       get_immediate_rval (cp, rval);
1954 
1955       do_CMPS(thr, lval, rval);
1956 
1957       thr->pop_vec4(1);
1958       return true;
1959 }
1960 
of_CMPSTR(vthread_t thr,vvp_code_t)1961 bool of_CMPSTR(vthread_t thr, vvp_code_t)
1962 {
1963       string re = thr->pop_str();
1964       string le = thr->pop_str();
1965 
1966       int rc = strcmp(le.c_str(), re.c_str());
1967 
1968       vvp_bit4_t eq;
1969       vvp_bit4_t lt;
1970 
1971       if (rc == 0) {
1972 	    eq = BIT4_1;
1973 	    lt = BIT4_0;
1974       } else if (rc < 0) {
1975 	    eq = BIT4_0;
1976 	    lt = BIT4_1;
1977       } else {
1978 	    eq = BIT4_0;
1979 	    lt = BIT4_0;
1980       }
1981 
1982       thr->flags[4] = eq;
1983       thr->flags[5] = lt;
1984 
1985       return true;
1986 }
1987 
of_CMPU_the_hard_way(vthread_t thr,unsigned wid,const vvp_vector4_t & lval,const vvp_vector4_t & rval)1988 static void of_CMPU_the_hard_way(vthread_t thr, unsigned wid,
1989 				 const vvp_vector4_t&lval,
1990 				 const vvp_vector4_t&rval)
1991 {
1992       vvp_bit4_t eq = BIT4_1;
1993       vvp_bit4_t eeq = BIT4_1;
1994 
1995       for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
1996 	    vvp_bit4_t lv = lval.value(idx);
1997 	    vvp_bit4_t rv = rval.value(idx);
1998 
1999 	    if (lv != rv)
2000 		  eeq = BIT4_0;
2001 
2002 	    if (eq==BIT4_1 && (bit4_is_xz(lv) || bit4_is_xz(rv)))
2003 		  eq = BIT4_X;
2004 	    if ((lv == BIT4_0) && (rv==BIT4_1))
2005 		  eq = BIT4_0;
2006 	    if ((lv == BIT4_1) && (rv==BIT4_0))
2007 		  eq = BIT4_0;
2008 
2009 	    if (eq == BIT4_0)
2010 		  break;
2011 
2012       }
2013 
2014       thr->flags[4] = eq;
2015       thr->flags[5] = BIT4_X;
2016       thr->flags[6] = eeq;
2017 }
2018 
do_CMPU(vthread_t thr,const vvp_vector4_t & lval,const vvp_vector4_t & rval)2019 static void do_CMPU(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
2020 {
2021       vvp_bit4_t eq = BIT4_1;
2022       vvp_bit4_t lt = BIT4_0;
2023 
2024       if (rval.size() != lval.size()) {
2025 	    cerr << thr->get_fileline()
2026 	         << "VVP ERROR: %cmp/u operand width mismatch: lval=" << lval
2027 		 << ", rval=" << rval << endl;
2028       }
2029       assert(rval.size() == lval.size());
2030       unsigned wid = lval.size();
2031 
2032       unsigned long*larray = lval.subarray(0,wid);
2033       if (larray == 0) return of_CMPU_the_hard_way(thr, wid, lval, rval);
2034 
2035       unsigned long*rarray = rval.subarray(0,wid);
2036       if (rarray == 0) {
2037 	    delete[]larray;
2038 	    return of_CMPU_the_hard_way(thr, wid, lval, rval);
2039       }
2040 
2041       unsigned words = (wid+CPU_WORD_BITS-1) / CPU_WORD_BITS;
2042 
2043       for (unsigned wdx = 0 ; wdx < words ; wdx += 1) {
2044 	    if (larray[wdx] == rarray[wdx])
2045 		  continue;
2046 
2047 	    eq = BIT4_0;
2048 	    if (larray[wdx] < rarray[wdx])
2049 		  lt = BIT4_1;
2050 	    else
2051 		  lt = BIT4_0;
2052       }
2053 
2054       delete[]larray;
2055       delete[]rarray;
2056 
2057       thr->flags[4] = eq;
2058       thr->flags[5] = lt;
2059       thr->flags[6] = eq;
2060 }
2061 
of_CMPU(vthread_t thr,vvp_code_t)2062 bool of_CMPU(vthread_t thr, vvp_code_t)
2063 {
2064 
2065       const vvp_vector4_t&rval = thr->peek_vec4(0);
2066       const vvp_vector4_t&lval = thr->peek_vec4(1);
2067 
2068       do_CMPU(thr, lval, rval);
2069 
2070       thr->pop_vec4(2);
2071       return true;
2072 }
2073 
2074 /*
2075  * %cmpi/u <vala>, <valb>, <wid>
2076  *
2077  * Pop1 operand, get the other operand from the arguments.
2078  */
of_CMPIU(vthread_t thr,vvp_code_t cp)2079 bool of_CMPIU(vthread_t thr, vvp_code_t cp)
2080 {
2081       unsigned wid = cp->number;
2082 
2083       vvp_vector4_t&lval = thr->peek_vec4();
2084 
2085 	// I expect that most of the bits of an immediate value are
2086 	// going to be zero, so start the result vector with all zero
2087 	// bits. Then we only need to replace the bits that are different.
2088       vvp_vector4_t rval (wid, BIT4_0);
2089       get_immediate_rval (cp, rval);
2090 
2091       do_CMPU(thr, lval, rval);
2092 
2093       thr->pop_vec4(1);
2094       return true;
2095 }
2096 
2097 
2098 /*
2099  * %cmp/x
2100  */
of_CMPX(vthread_t thr,vvp_code_t)2101 bool of_CMPX(vthread_t thr, vvp_code_t)
2102 {
2103       vvp_bit4_t eq = BIT4_1;
2104       vvp_vector4_t rval = thr->pop_vec4();
2105       vvp_vector4_t lval = thr->pop_vec4();
2106 
2107       assert(rval.size() == lval.size());
2108       unsigned wid = lval.size();
2109 
2110       for (unsigned idx = 0 ; idx < wid ; idx += 1) {
2111 	    vvp_bit4_t lv = lval.value(idx);
2112 	    vvp_bit4_t rv = rval.value(idx);
2113 	    if ((lv != rv) && !bit4_is_xz(lv) && !bit4_is_xz(rv)) {
2114 		  eq = BIT4_0;
2115 		  break;
2116 	    }
2117       }
2118 
2119       thr->flags[4] = eq;
2120       return true;
2121 }
2122 
do_CMPWE(vthread_t thr,const vvp_vector4_t & lval,const vvp_vector4_t & rval)2123 static void do_CMPWE(vthread_t thr, const vvp_vector4_t&lval, const vvp_vector4_t&rval)
2124 {
2125       assert(rval.size() == lval.size());
2126 
2127       if (lval.has_xz() || rval.has_xz()) {
2128 
2129 	    unsigned wid = lval.size();
2130 	    vvp_bit4_t eq  = BIT4_1;
2131 
2132 	    for (unsigned idx = 0 ; idx < wid ; idx += 1) {
2133 		  vvp_bit4_t lv = lval.value(idx);
2134 		  vvp_bit4_t rv = rval.value(idx);
2135 
2136 		  if (bit4_is_xz(rv))
2137 			continue;
2138 		  if ((eq == BIT4_1) && bit4_is_xz(lv))
2139 			eq = BIT4_X;
2140 		  if ((lv == BIT4_0) && (rv==BIT4_1))
2141 			eq = BIT4_0;
2142 		  if ((lv == BIT4_1) && (rv==BIT4_0))
2143 			eq = BIT4_0;
2144 
2145 		  if (eq == BIT4_0)
2146 			break;
2147 	    }
2148 
2149 	    thr->flags[4] = eq;
2150 
2151       } else {
2152 	      // If there are no XZ bits anywhere, then the results of
2153 	      // ==? match the === test.
2154 	    thr->flags[4] = (lval.eeq(rval)? BIT4_1 : BIT4_0);
2155       }
2156 }
2157 
of_CMPWE(vthread_t thr,vvp_code_t)2158 bool of_CMPWE(vthread_t thr, vvp_code_t)
2159 {
2160 	// We are going to pop these and push nothing in their
2161 	// place, but for now it is more efficient to use a constant
2162 	// reference. When we finish, pop the stack without copies.
2163       const vvp_vector4_t&rval = thr->peek_vec4(0);
2164       const vvp_vector4_t&lval = thr->peek_vec4(1);
2165 
2166       do_CMPWE(thr, lval, rval);
2167 
2168       thr->pop_vec4(2);
2169       return true;
2170 }
2171 
of_CMPWNE(vthread_t thr,vvp_code_t)2172 bool of_CMPWNE(vthread_t thr, vvp_code_t)
2173 {
2174 	// We are going to pop these and push nothing in their
2175 	// place, but for now it is more efficient to use a constant
2176 	// reference. When we finish, pop the stack without copies.
2177       const vvp_vector4_t&rval = thr->peek_vec4(0);
2178       const vvp_vector4_t&lval = thr->peek_vec4(1);
2179 
2180       do_CMPWE(thr, lval, rval);
2181 
2182       thr->flags[4] =  ~thr->flags[4];
2183 
2184       thr->pop_vec4(2);
2185       return true;
2186 }
2187 
of_CMPWR(vthread_t thr,vvp_code_t)2188 bool of_CMPWR(vthread_t thr, vvp_code_t)
2189 {
2190       double r = thr->pop_real();
2191       double l = thr->pop_real();
2192 
2193       vvp_bit4_t eq = (l == r)? BIT4_1 : BIT4_0;
2194       vvp_bit4_t lt = (l <  r)? BIT4_1 : BIT4_0;
2195 
2196       thr->flags[4] = eq;
2197       thr->flags[5] = lt;
2198 
2199       return true;
2200 }
2201 
of_CMPWS(vthread_t thr,vvp_code_t cp)2202 bool of_CMPWS(vthread_t thr, vvp_code_t cp)
2203 {
2204       int64_t l = thr->words[cp->bit_idx[0]].w_int;
2205       int64_t r = thr->words[cp->bit_idx[1]].w_int;
2206 
2207       vvp_bit4_t eq = (l == r)? BIT4_1 : BIT4_0;
2208       vvp_bit4_t lt = (l <  r)? BIT4_1 : BIT4_0;
2209 
2210       thr->flags[4] = eq;
2211       thr->flags[5] = lt;
2212 
2213       return true;
2214 }
2215 
of_CMPWU(vthread_t thr,vvp_code_t cp)2216 bool of_CMPWU(vthread_t thr, vvp_code_t cp)
2217 {
2218       uint64_t l = thr->words[cp->bit_idx[0]].w_uint;
2219       uint64_t r = thr->words[cp->bit_idx[1]].w_uint;
2220 
2221       vvp_bit4_t eq = (l == r)? BIT4_1 : BIT4_0;
2222       vvp_bit4_t lt = (l <  r)? BIT4_1 : BIT4_0;
2223 
2224       thr->flags[4] = eq;
2225       thr->flags[5] = lt;
2226 
2227       return true;
2228 }
2229 
2230 /*
2231  * %cmp/z
2232  */
of_CMPZ(vthread_t thr,vvp_code_t)2233 bool of_CMPZ(vthread_t thr, vvp_code_t)
2234 {
2235       vvp_bit4_t eq = BIT4_1;
2236       vvp_vector4_t rval = thr->pop_vec4();
2237       vvp_vector4_t lval = thr->pop_vec4();
2238 
2239       assert(rval.size() == lval.size());
2240       unsigned wid = lval.size();
2241 
2242       for (unsigned idx = 0 ; idx < wid ; idx += 1) {
2243 	    vvp_bit4_t lv = lval.value(idx);
2244 	    vvp_bit4_t rv = rval.value(idx);
2245 	    if ((lv != rv) && (rv != BIT4_Z) && (lv != BIT4_Z)) {
2246 		  eq = BIT4_0;
2247 		  break;
2248 	    }
2249       }
2250 
2251       thr->flags[4] = eq;
2252       return true;
2253 }
2254 
2255 /*
2256  *  %concat/str;
2257  */
of_CONCAT_STR(vthread_t thr,vvp_code_t)2258 bool of_CONCAT_STR(vthread_t thr, vvp_code_t)
2259 {
2260       string text = thr->pop_str();
2261       thr->peek_str(0).append(text);
2262       return true;
2263 }
2264 
2265 /*
2266  *  %concati/str <string>;
2267  */
of_CONCATI_STR(vthread_t thr,vvp_code_t cp)2268 bool of_CONCATI_STR(vthread_t thr, vvp_code_t cp)
2269 {
2270       const char*text = cp->text;
2271       thr->peek_str(0).append(text);
2272       return true;
2273 }
2274 
2275 /*
2276  * %concat/vec4
2277  */
of_CONCAT_VEC4(vthread_t thr,vvp_code_t)2278 bool of_CONCAT_VEC4(vthread_t thr, vvp_code_t)
2279 {
2280       const vvp_vector4_t&lsb = thr->peek_vec4(0);
2281       const vvp_vector4_t&msb = thr->peek_vec4(1);
2282 
2283 	// The result is the size of the top two vectors in the stack.
2284       vvp_vector4_t res (msb.size() + lsb.size(), BIT4_X);
2285 
2286 	// Build up the result.
2287       res.set_vec(0, lsb);
2288       res.set_vec(lsb.size(), msb);
2289 
2290 	// Rearrange the stack to pop the inputs and push the
2291 	// result. Do that by actually popping only 1 stack position
2292 	// and replacing the new top with the new value.
2293       thr->pop_vec4(1);
2294       thr->peek_vec4() = res;
2295 
2296       return true;
2297 }
2298 
2299 /*
2300  * %concati/vec4 <vala>, <valb>, <wid>
2301  *
2302  * Concat the immediate value to the LOW bits of the concatenation.
2303  * Get the HIGH bits from the top of the vec4 stack.
2304  */
of_CONCATI_VEC4(vthread_t thr,vvp_code_t cp)2305 bool of_CONCATI_VEC4(vthread_t thr, vvp_code_t cp)
2306 {
2307       uint32_t vala = cp->bit_idx[0];
2308       uint32_t valb = cp->bit_idx[1];
2309       unsigned wid  = cp->number;
2310 
2311       vvp_vector4_t&msb = thr->peek_vec4();
2312 
2313 	// I expect that most of the bits of an immediate value are
2314 	// going to be zero, so start the result vector with all zero
2315 	// bits. Then we only need to replace the bits that are different.
2316       vvp_vector4_t lsb (wid, BIT4_0);
2317 
2318 	// The %concati/vec4 can create values bigger then 32 bits, but
2319 	// only if the high bits are zero. So at most we need to run
2320 	// through the loop below 32 times. Maybe less, if the target
2321 	// width is less. We don't have to do anything special on that
2322 	// because vala/valb bits will shift away so (vala|valb) will
2323 	// turn to zero at or before 32 shifts.
2324 
2325       for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
2326 	    uint32_t ba = 0;
2327 	      // Convert the vala/valb bits to a ba number that can be
2328 	      // used to select what goes into the value.
2329 	    ba = (valb & 1) << 1;
2330 	    ba |= vala & 1;
2331 
2332 	    switch (ba) {
2333 		case 1:
2334 		  lsb.set_bit(idx, BIT4_1);
2335 		  break;
2336 		case 2:
2337 		  lsb.set_bit(idx, BIT4_Z);
2338 		  break;
2339 		case 3:
2340 		  lsb.set_bit(idx, BIT4_X);
2341 		  break;
2342 		default:
2343 		  break;
2344 	    }
2345 
2346 	    vala >>= 1;
2347 	    valb >>= 1;
2348       }
2349 
2350       vvp_vector4_t res (msb.size()+lsb.size(), BIT4_X);
2351       res.set_vec(0, lsb);
2352       res.set_vec(lsb.size(), msb);
2353 
2354       msb = res;
2355       return true;
2356 }
2357 
2358 /*
2359  * %cvt/rv
2360  */
of_CVT_RV(vthread_t thr,vvp_code_t)2361 bool of_CVT_RV(vthread_t thr, vvp_code_t)
2362 {
2363       double val;
2364       vvp_vector4_t val4 = thr->pop_vec4();
2365       vector4_to_value(val4, val, false);
2366       thr->push_real(val);
2367       return true;
2368 }
2369 
2370 /*
2371  * %cvt/rv/s
2372  */
of_CVT_RV_S(vthread_t thr,vvp_code_t)2373 bool of_CVT_RV_S(vthread_t thr, vvp_code_t)
2374 {
2375       double val;
2376       vvp_vector4_t val4 = thr->pop_vec4();
2377       vector4_to_value(val4, val, true);
2378       thr->push_real(val);
2379       return true;
2380 }
2381 
2382 /*
2383  * %cvt/sr <idx>
2384  * Pop the top value from the real stack, convert it to a 64bit signed
2385  * and save it to the indexed register.
2386  */
of_CVT_SR(vthread_t thr,vvp_code_t cp)2387 bool of_CVT_SR(vthread_t thr, vvp_code_t cp)
2388 {
2389       double r = thr->pop_real();
2390       thr->words[cp->bit_idx[0]].w_int = i64round(r);
2391 
2392       return true;
2393 }
2394 
2395 /*
2396  * %cvt/ur <idx>
2397  */
of_CVT_UR(vthread_t thr,vvp_code_t cp)2398 bool of_CVT_UR(vthread_t thr, vvp_code_t cp)
2399 {
2400       double r = thr->pop_real();
2401       if (r >= 0.0)
2402 	    thr->words[cp->bit_idx[0]].w_uint = (uint64_t)floor(r+0.5);
2403       else
2404 	    thr->words[cp->bit_idx[0]].w_uint = (uint64_t)ceil(r-0.5);
2405 
2406       return true;
2407 }
2408 
2409 /*
2410  * %cvt/vr <wid>
2411  */
of_CVT_VR(vthread_t thr,vvp_code_t cp)2412 bool of_CVT_VR(vthread_t thr, vvp_code_t cp)
2413 {
2414       double r = thr->pop_real();
2415       unsigned wid = cp->number;
2416 
2417       vvp_vector4_t tmp(wid, r);
2418       thr->push_vec4(tmp);
2419       return true;
2420 }
2421 
2422 /*
2423  * This implements the %deassign instruction. All we do is write a
2424  * long(1) to port-3 of the addressed net. This turns off an active
2425  * continuous assign activated by %cassign/v
2426  */
of_DEASSIGN(vthread_t,vvp_code_t cp)2427 bool of_DEASSIGN(vthread_t, vvp_code_t cp)
2428 {
2429       vvp_net_t*net = cp->net;
2430       unsigned base  = cp->bit_idx[0];
2431       unsigned width = cp->bit_idx[1];
2432 
2433       vvp_signal_value*fil = dynamic_cast<vvp_signal_value*> (net->fil);
2434       assert(fil);
2435       vvp_fun_signal_vec*sig = dynamic_cast<vvp_fun_signal_vec*>(net->fun);
2436       assert(sig);
2437 
2438       if (base >= fil->value_size()) return true;
2439       if (base+width > fil->value_size()) width = fil->value_size() - base;
2440 
2441       bool full_sig = base == 0 && width == fil->value_size();
2442 
2443 	// This is the net that is forcing me...
2444       if (vvp_net_t*src = sig->cassign_link) {
2445 	    if (! full_sig) {
2446 		  fprintf(stderr, "Sorry: when a signal is assigning a "
2447 		          "register, I cannot deassign part of it.\n");
2448 		  exit(1);
2449 	    }
2450 	      // And this is the pointer to be removed.
2451 	    vvp_net_ptr_t dst_ptr (net, 1);
2452 	    src->unlink(dst_ptr);
2453 	    sig->cassign_link = 0;
2454       }
2455 
2456 	/* Do we release all or part of the net? */
2457       if (full_sig) {
2458 	    sig->deassign();
2459       } else {
2460 	    sig->deassign_pv(base, width);
2461       }
2462 
2463       return true;
2464 }
2465 
of_DEASSIGN_WR(vthread_t,vvp_code_t cp)2466 bool of_DEASSIGN_WR(vthread_t, vvp_code_t cp)
2467 {
2468       vvp_net_t*net = cp->net;
2469 
2470       vvp_fun_signal_real*sig = dynamic_cast<vvp_fun_signal_real*>(net->fun);
2471       assert(sig);
2472 
2473 	// This is the net that is forcing me...
2474       if (vvp_net_t*src = sig->cassign_link) {
2475 	      // And this is the pointer to be removed.
2476 	    vvp_net_ptr_t dst_ptr (net, 1);
2477 	    src->unlink(dst_ptr);
2478 	    sig->cassign_link = 0;
2479       }
2480 
2481       sig->deassign();
2482 
2483       return true;
2484 }
2485 
2486 /*
2487  * %debug/thr
2488  */
of_DEBUG_THR(vthread_t thr,vvp_code_t cp)2489 bool of_DEBUG_THR(vthread_t thr, vvp_code_t cp)
2490 {
2491       const char*text = cp->text;
2492       thr->debug_dump(cerr, text);
2493       return true;
2494 }
2495 
2496 /*
2497  * The delay takes two 32bit numbers to make up a 64bit time.
2498  *
2499  *   %delay <low>, <hig>
2500  */
of_DELAY(vthread_t thr,vvp_code_t cp)2501 bool of_DELAY(vthread_t thr, vvp_code_t cp)
2502 {
2503       vvp_time64_t low = cp->bit_idx[0];
2504       vvp_time64_t hig = cp->bit_idx[1];
2505 
2506       vvp_time64_t delay = (hig << 32) | low;
2507 
2508       if (delay == 0) schedule_inactive(thr);
2509       else schedule_vthread(thr, delay);
2510       return false;
2511 }
2512 
of_DELAYX(vthread_t thr,vvp_code_t cp)2513 bool of_DELAYX(vthread_t thr, vvp_code_t cp)
2514 {
2515       vvp_time64_t delay;
2516 
2517       assert(cp->number < vthread_s::WORDS_COUNT);
2518       delay = thr->words[cp->number].w_uint;
2519       if (delay == 0) schedule_inactive(thr);
2520       else schedule_vthread(thr, delay);
2521       return false;
2522 }
2523 
of_DELETE_ELEM(vthread_t thr,vvp_code_t cp)2524 bool of_DELETE_ELEM(vthread_t thr, vvp_code_t cp)
2525 {
2526       vvp_net_t*net = cp->net;
2527 
2528       int64_t idx_val = thr->words[3].w_int;
2529       if (thr->flags[4] == BIT4_1) {
2530 	    cerr << thr->get_fileline()
2531 	         << "Warning: skipping queue delete() with undefined index."
2532 	         << endl;
2533 	    return true;
2534       }
2535       if (idx_val < 0) {
2536 	    cerr << thr->get_fileline()
2537 	         << "Warning: skipping queue delete() with negative index."
2538 	         << endl;
2539 	    return true;
2540       }
2541       size_t idx = idx_val;
2542 
2543       vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
2544       assert(obj);
2545 
2546       vvp_queue*queue = obj->get_object().peek<vvp_queue>();
2547       if (queue == 0) {
2548 	    cerr << thr->get_fileline()
2549 	         << "Warning: skipping delete(" << idx
2550 	         << ") on empty queue." << endl;
2551       } else {
2552 	    size_t size = queue->get_size();
2553 	    if (idx >= size) {
2554 		  cerr << thr->get_fileline()
2555 		       << "Warning: skipping out of range delete(" << idx
2556 		       << ") on queue of size " << size << "." << endl;
2557 	    } else {
2558 		  queue->erase(idx);
2559 	    }
2560       }
2561 
2562       return true;
2563 }
2564 
2565 /* %delete/obj <label>
2566  *
2567  * This operator works by assigning a nil to the target object. This
2568  * causes any value that might be there to be garbage collected, thus
2569  * deleting the object.
2570  */
of_DELETE_OBJ(vthread_t thr,vvp_code_t cp)2571 bool of_DELETE_OBJ(vthread_t thr, vvp_code_t cp)
2572 {
2573 	/* set the value into port 0 of the destination. */
2574       vvp_net_ptr_t ptr (cp->net, 0);
2575       vvp_send_object(ptr, vvp_object_t(), thr->wt_context);
2576 
2577       return true;
2578 }
2579 
2580 /* %delete/tail <label>, idx
2581  *
2582  * Remove all elements after the one specified.
2583  */
of_DELETE_TAIL(vthread_t thr,vvp_code_t cp)2584 bool of_DELETE_TAIL(vthread_t thr, vvp_code_t cp)
2585 {
2586       vvp_net_t*net = cp->net;
2587 
2588       vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
2589       assert(obj);
2590 
2591       vvp_queue*queue = obj->get_object().peek<vvp_queue>();
2592       assert(queue);
2593 
2594       unsigned idx = thr->words[cp->bit_idx[0]].w_int;
2595       queue->erase_tail(idx);
2596 
2597       return true;
2598 }
2599 
do_disable(vthread_t thr,vthread_t match)2600 static bool do_disable(vthread_t thr, vthread_t match)
2601 {
2602       bool flag = false;
2603 
2604 	/* Pull the target thread out of its scope if needed. */
2605       thr->parent_scope->threads.erase(thr);
2606 
2607 	/* Turn the thread off by setting is program counter to
2608 	   zero and setting an OFF bit. */
2609       thr->pc = codespace_null();
2610       thr->i_was_disabled = 1;
2611       thr->i_have_ended = 1;
2612 
2613 	/* Turn off all the children of the thread. Simulate a %join
2614 	   for as many times as needed to clear the results of all the
2615 	   %forks that this thread has done. */
2616       while (! thr->children.empty()) {
2617 
2618 	    vthread_t tmp = *(thr->children.begin());
2619 	    assert(tmp);
2620 	    assert(tmp->parent == thr);
2621 	    thr->i_am_joining = 0;
2622 	    if (do_disable(tmp, match))
2623 		  flag = true;
2624 
2625 	    vthread_reap(tmp);
2626       }
2627 
2628       vthread_t parent = thr->parent;
2629       if (parent && parent->i_am_joining) {
2630 	      // If a parent is waiting in a %join, wake it up. Note
2631 	      // that it is possible to be waiting in a %join yet
2632 	      // already scheduled if multiple child threads are
2633 	      // ending. So check if the thread is already scheduled
2634 	      // before scheduling it again.
2635 	    parent->i_am_joining = 0;
2636 	    if (! parent->i_have_ended)
2637 		  schedule_vthread(parent, 0, true);
2638 
2639 	    do_join(parent, thr);
2640 
2641       } else if (parent) {
2642 	      /* If the parent is yet to %join me, let its %join
2643 		 do the reaping. */
2644 	      //assert(tmp->is_scheduled == 0);
2645 
2646       } else {
2647 	      /* No parent at all. Goodbye. */
2648 	    vthread_reap(thr);
2649       }
2650 
2651       return flag || (thr == match);
2652 }
2653 
2654 /*
2655  * Implement the %disable instruction by scanning the target scope for
2656  * all the target threads. Kill the target threads and wake up a
2657  * parent that is attempting a %join.
2658  */
of_DISABLE(vthread_t thr,vvp_code_t cp)2659 bool of_DISABLE(vthread_t thr, vvp_code_t cp)
2660 {
2661       __vpiScope*scope = (__vpiScope*)cp->handle;
2662 
2663       bool disabled_myself_flag = false;
2664 
2665       while (! scope->threads.empty()) {
2666 	    set<vthread_t>::iterator cur = scope->threads.begin();
2667 
2668 	    if (do_disable(*cur, thr))
2669 		  disabled_myself_flag = true;
2670       }
2671 
2672       return ! disabled_myself_flag;
2673 }
2674 
2675 /*
2676  * Implement the %disable/fork (SystemVerilog) instruction by disabling
2677  * all the detached children of the given thread.
2678  */
of_DISABLE_FORK(vthread_t thr,vvp_code_t)2679 bool of_DISABLE_FORK(vthread_t thr, vvp_code_t)
2680 {
2681 	/* If a %disable/fork is being executed then the parent thread
2682 	 * cannot be waiting in a join. */
2683       assert(! thr->i_am_joining);
2684 
2685 	/* There should be no active children to disable. */
2686       assert(thr->children.empty());
2687 
2688 	/* Disable any detached children. */
2689       while (! thr->detached_children.empty()) {
2690 	    vthread_t child = *(thr->detached_children.begin());
2691 	    assert(child);
2692 	    assert(child->parent == thr);
2693 	      /* Disabling the children can never match the parent thread. */
2694 	    bool res = do_disable(child, thr);
2695 	    assert(! res);
2696 	    vthread_reap(child);
2697       }
2698 
2699       return true;
2700 }
2701 
2702 /*
2703  * This function divides a 2-word number {high, a} by a 1-word
2704  * number. Assume that high < b.
2705  */
divide2words(unsigned long a,unsigned long b,unsigned long high)2706 static unsigned long divide2words(unsigned long a, unsigned long b,
2707 				  unsigned long high)
2708 {
2709       unsigned long result = 0;
2710       while (high > 0) {
2711 	    unsigned long tmp_result = ULONG_MAX / b;
2712 	    unsigned long remain = ULONG_MAX % b;
2713 
2714 	    remain += 1;
2715 	    if (remain >= b) {
2716 		  remain -= b;
2717 		  tmp_result += 1;
2718 	    }
2719 
2720 	      // Now 0x1_0...0 = b*tmp_result + remain
2721 	      // high*0x1_0...0 = high*(b*tmp_result + remain)
2722 	      // high*0x1_0...0 = high*b*tmp_result + high*remain
2723 
2724 	      // We know that high*0x1_0...0 >= high*b*tmp_result, and
2725 	      // we know that high*0x1_0...0 > high*remain. Use
2726 	      // high*remain as the remainder for another iteration,
2727 	      // and add tmp_result*high into the current estimate of
2728 	      // the result.
2729 	    result += tmp_result * high;
2730 
2731 	      // The new iteration starts with high*remain + a.
2732 	    remain = multiply_with_carry(high, remain, high);
2733 	    a += remain;
2734             if(a < remain)
2735               high += 1;
2736 
2737 	      // Now result*b + {high,a} == the input {high,a}. It is
2738 	      // possible that the new high >= 1. If so, it will
2739 	      // certainly be less than high from the previous
2740 	      // iteration. Do another iteration and it will shrink,
2741 	      // eventually to 0.
2742       }
2743 
2744 	// high is now 0, so a is the remaining remainder, so we can
2745 	// finish off the integer divide with a simple a/b.
2746 
2747       return result + a/b;
2748 }
2749 
divide_bits(unsigned long * ap,unsigned long * bp,unsigned wid)2750 static unsigned long* divide_bits(unsigned long*ap, unsigned long*bp, unsigned wid)
2751 {
2752 	// Do all our work a cpu-word at a time. The "words" variable
2753 	// is the number of words of the wid.
2754       unsigned words = (wid+CPU_WORD_BITS-1) / CPU_WORD_BITS;
2755 
2756       unsigned btop = words-1;
2757       while (btop > 0 && bp[btop] == 0)
2758 	    btop -= 1;
2759 
2760 	// Detect divide by 0, and exit.
2761       if (btop==0 && bp[0]==0)
2762 	    return 0;
2763 
2764 	// The result array will eventually accumulate the result. The
2765 	// diff array is a difference that we use in the intermediate.
2766       unsigned long*diff  = new unsigned long[words];
2767       unsigned long*result= new unsigned long[words];
2768       for (unsigned idx = 0 ; idx < words ; idx += 1)
2769 	    result[idx] = 0;
2770 
2771       for (unsigned cur = words-btop ; cur > 0 ; cur -= 1) {
2772 	    unsigned cur_ptr = cur-1;
2773 	    unsigned long cur_res;
2774 	    if (ap[cur_ptr+btop] >= bp[btop]) {
2775 		  unsigned long high = 0;
2776 		  if (cur_ptr+btop+1 < words)
2777 			high = ap[cur_ptr+btop+1];
2778 		  cur_res = divide2words(ap[cur_ptr+btop], bp[btop], high);
2779 
2780 	    } else if (cur_ptr+btop+1 >= words) {
2781 		  continue;
2782 
2783 	    } else if (ap[cur_ptr+btop+1] == 0) {
2784 		  continue;
2785 
2786 	    } else {
2787 		  cur_res = divide2words(ap[cur_ptr+btop], bp[btop],
2788 					 ap[cur_ptr+btop+1]);
2789 	    }
2790 
2791 	      // cur_res is a guesstimate of the result this far. It
2792 	      // may be 1 too big. (But it will also be >0) Try it,
2793 	      // and if the difference comes out negative, then adjust.
2794 
2795 	      // diff = (bp * cur_res)  << cur_ptr;
2796 	    multiply_array_imm(diff+cur_ptr, bp, words-cur_ptr, cur_res);
2797 	      // ap -= diff
2798 	    unsigned long carry = 1;
2799 	    for (unsigned idx = cur_ptr ; idx < words ; idx += 1)
2800 		  ap[idx] = add_with_carry(ap[idx], ~diff[idx], carry);
2801 
2802 	      // ap has the diff subtracted out of it. If cur_res was
2803 	      // too large, then ap will turn negative. (We easily
2804 	      // tell that ap turned negative by looking at
2805 	      // carry&1. If it is 0, then it is *negative*.) In that
2806 	      // case, we know that cur_res was too large by 1. Correct by
2807 	      // adding 1b back in and reducing cur_res.
2808 	    if ((carry&1) == 0) {
2809 		    // Keep adding b back in until the remainder
2810 		    // becomes positive again.
2811 		  do {
2812 			cur_res -= 1;
2813 			carry = 0;
2814 			for (unsigned idx = cur_ptr ; idx < words ; idx += 1)
2815 			      ap[idx] = add_with_carry(ap[idx], bp[idx-cur_ptr], carry);
2816 		  } while (carry == 0);
2817 	    }
2818 
2819 	    result[cur_ptr] = cur_res;
2820       }
2821 
2822 	// Now ap contains the remainder and result contains the
2823 	// desired result. We should find that:
2824 	//  input-a = bp * result + ap;
2825 
2826       delete[]diff;
2827       return result;
2828 }
2829 
2830 /*
2831  * %div
2832  */
of_DIV(vthread_t thr,vvp_code_t)2833 bool of_DIV(vthread_t thr, vvp_code_t)
2834 {
2835       vvp_vector4_t valb = thr->pop_vec4();
2836       vvp_vector4_t vala = thr->pop_vec4();
2837 
2838       assert(vala.size()== valb.size());
2839       unsigned wid = vala.size();
2840 
2841       unsigned long*ap = vala.subarray(0, wid);
2842       if (ap == 0) {
2843 	    vvp_vector4_t tmp(wid, BIT4_X);
2844 	    thr->push_vec4(tmp);
2845 	    return true;
2846       }
2847 
2848       unsigned long*bp = valb.subarray(0, wid);
2849       if (bp == 0) {
2850 	    delete[]ap;
2851 	    vvp_vector4_t tmp(wid, BIT4_X);
2852 	    thr->push_vec4(tmp);
2853 	    return true;
2854       }
2855 
2856 	// If the value fits in a single CPU word, then do it the easy way.
2857       if (wid <= CPU_WORD_BITS) {
2858 	    if (bp[0] == 0) {
2859 		  vvp_vector4_t tmp(wid, BIT4_X);
2860 		  thr->push_vec4(tmp);
2861 	    } else {
2862 		  ap[0] /= bp[0];
2863 		  vala.setarray(0, wid, ap);
2864 		  thr->push_vec4(vala);
2865 	    }
2866 	    delete[]ap;
2867 	    delete[]bp;
2868 	    return true;
2869       }
2870 
2871       unsigned long*result = divide_bits(ap, bp, wid);
2872       if (result == 0) {
2873 	    delete[]ap;
2874 	    delete[]bp;
2875 	    vvp_vector4_t tmp(wid, BIT4_X);
2876 	    thr->push_vec4(tmp);
2877 	    return true;
2878       }
2879 
2880 	// Now ap contains the remainder and result contains the
2881 	// desired result. We should find that:
2882 	//  input-a = bp * result + ap;
2883 
2884       vala.setarray(0, wid, result);
2885       thr->push_vec4(vala);
2886       delete[]ap;
2887       delete[]bp;
2888       delete[]result;
2889 
2890       return true;
2891 }
2892 
2893 
negate_words(unsigned long * val,unsigned words)2894 static void negate_words(unsigned long*val, unsigned words)
2895 {
2896       unsigned long carry = 1;
2897       for (unsigned idx = 0 ; idx < words ; idx += 1)
2898 	    val[idx] = add_with_carry(0, ~val[idx], carry);
2899 }
2900 
2901 /*
2902  * %div/s
2903  */
of_DIV_S(vthread_t thr,vvp_code_t)2904 bool of_DIV_S(vthread_t thr, vvp_code_t)
2905 {
2906       vvp_vector4_t valb = thr->pop_vec4();
2907       vvp_vector4_t&vala = thr->peek_vec4();
2908 
2909       assert(vala.size()== valb.size());
2910       unsigned wid = vala.size();
2911       unsigned words = (wid + CPU_WORD_BITS - 1) / CPU_WORD_BITS;
2912 
2913 	// Get the values, left in right, in binary form. If there is
2914 	// a problem with either (caused by an X or Z bit) then we
2915 	// know right away that the entire result is X.
2916       unsigned long*ap = vala.subarray(0, wid);
2917       if (ap == 0) {
2918 	    vvp_vector4_t tmp(wid, BIT4_X);
2919 	    vala = tmp;
2920 	    return true;
2921       }
2922 
2923       unsigned long*bp = valb.subarray(0, wid);
2924       if (bp == 0) {
2925 	    delete[]ap;
2926 	    vvp_vector4_t tmp(wid, BIT4_X);
2927 	    vala = tmp;
2928 	    return true;
2929       }
2930 
2931 	// Sign extend the bits in the array to fill out the array.
2932       unsigned long sign_mask = 0;
2933       if (unsigned long sign_bits = (words*CPU_WORD_BITS) - wid) {
2934 	    sign_mask = -1UL << (CPU_WORD_BITS-sign_bits);
2935 	    if (ap[words-1] & (sign_mask>>1))
2936 		  ap[words-1] |= sign_mask;
2937 	    if (bp[words-1] & (sign_mask>>1))
2938 		  bp[words-1] |= sign_mask;
2939       }
2940 
2941 	// If the value fits in a single word, then use the native divide.
2942       if (wid <= CPU_WORD_BITS) {
2943 	    if (bp[0] == 0) {
2944 		  vvp_vector4_t tmp(wid, BIT4_X);
2945 		  vala = tmp;
2946 	    } else if (((long)ap[0] == LONG_MIN) && ((long)bp[0] == -1)) {
2947 		  vvp_vector4_t tmp(wid, BIT4_0);
2948 		  tmp.set_bit(wid-1, BIT4_1);
2949 		  vala = tmp;
2950 	    } else {
2951 		  long tmpa = (long) ap[0];
2952 		  long tmpb = (long) bp[0];
2953 		  long res = tmpa / tmpb;
2954 		  ap[0] = ((unsigned long)res) & ~sign_mask;
2955 		  vala.setarray(0, wid, ap);
2956 	    }
2957 	    delete[]ap;
2958 	    delete[]bp;
2959 	    return true;
2960       }
2961 
2962 	// We need to the actual division to positive integers. Make
2963 	// them positive here, and remember the negations.
2964       bool negate_flag = false;
2965       if ( ((long) ap[words-1]) < 0 ) {
2966 	    negate_flag = true;
2967 	    negate_words(ap, words);
2968       }
2969       if ( ((long) bp[words-1]) < 0 ) {
2970 	    negate_flag ^= true;
2971 	    negate_words(bp, words);
2972       }
2973 
2974       unsigned long*result = divide_bits(ap, bp, wid);
2975       if (result == 0) {
2976 	    delete[]ap;
2977 	    delete[]bp;
2978 	    vvp_vector4_t tmp(wid, BIT4_X);
2979 	    vala = tmp;
2980 	    return true;
2981       }
2982 
2983       if (negate_flag) {
2984 	    negate_words(result, words);
2985       }
2986 
2987       result[words-1] &= ~sign_mask;
2988 
2989       vala.setarray(0, wid, result);
2990       delete[]ap;
2991       delete[]bp;
2992       delete[]result;
2993       return true;
2994 }
2995 
of_DIV_WR(vthread_t thr,vvp_code_t)2996 bool of_DIV_WR(vthread_t thr, vvp_code_t)
2997 {
2998       double r = thr->pop_real();
2999       double l = thr->pop_real();
3000       thr->push_real(l / r);
3001 
3002       return true;
3003 }
3004 
of_DUP_REAL(vthread_t thr,vvp_code_t)3005 bool of_DUP_REAL(vthread_t thr, vvp_code_t)
3006 {
3007       thr->push_real(thr->peek_real(0));
3008       return true;
3009 }
3010 
of_DUP_VEC4(vthread_t thr,vvp_code_t)3011 bool of_DUP_VEC4(vthread_t thr, vvp_code_t)
3012 {
3013       thr->push_vec4(thr->peek_vec4(0));
3014       return true;
3015 }
3016 
3017 /*
3018  * This terminates the current thread. If there is a parent who is
3019  * waiting for me to die, then I schedule it. At any rate, I mark
3020  * myself as a zombie by setting my pc to 0.
3021  */
of_END(vthread_t thr,vvp_code_t)3022 bool of_END(vthread_t thr, vvp_code_t)
3023 {
3024       assert(! thr->waiting_for_event);
3025       thr->i_have_ended = 1;
3026       thr->pc = codespace_null();
3027 
3028 	/* Fully detach any detached children. */
3029       while (! thr->detached_children.empty()) {
3030 	    vthread_t child = *(thr->detached_children.begin());
3031 	    assert(child);
3032 	    assert(child->parent == thr);
3033 	    assert(child->i_am_detached);
3034 	    child->parent = 0;
3035 	    child->i_am_detached = 0;
3036 	    thr->detached_children.erase(thr->detached_children.begin());
3037       }
3038 
3039 	/* It is an error to still have active children running at this
3040 	 * point in time. They should have all been detached or joined. */
3041       assert(thr->children.empty());
3042 
3043 	/* If I have a parent who is waiting for me, then mark that I
3044 	   have ended, and schedule that parent. Also, finish the
3045 	   %join for the parent. */
3046       if (!thr->i_am_detached && thr->parent && thr->parent->i_am_joining) {
3047 	    vthread_t tmp = thr->parent;
3048 	    assert(! thr->i_am_detached);
3049 
3050 	    tmp->i_am_joining = 0;
3051 	    schedule_vthread(tmp, 0, true);
3052 	    do_join(tmp, thr);
3053 	    return false;
3054       }
3055 
3056 	/* If this thread is not fully detached then remove it from the
3057 	 * parents detached_children set and reap it. */
3058       if (thr->i_am_detached) {
3059 	    vthread_t tmp = thr->parent;
3060 	    assert(tmp);
3061 	    size_t res = tmp->detached_children.erase(thr);
3062 	    assert(res == 1);
3063 	      /* If the parent is waiting for the detached children to
3064 	       * finish then the last detached child needs to tell the
3065 	       * parent to wake up when it is finished. */
3066 	    if (tmp->i_am_waiting && tmp->detached_children.empty()) {
3067 		  tmp->i_am_waiting = 0;
3068 		  schedule_vthread(tmp, 0, true);
3069 	    }
3070 	      /* Fully detach this thread so it will be reaped below. */
3071 	    thr->i_am_detached = 0;
3072 	    thr->parent = 0;
3073       }
3074 
3075 	/* If I have no parent, then no one can %join me and there is
3076 	 * no reason to stick around. This can happen, for example if
3077 	 * I am an ``initial'' thread. */
3078       if (thr->parent == 0) {
3079 	    vthread_reap(thr);
3080 	    return false;
3081       }
3082 
3083 	/* If I make it this far, then I have a parent who may wish
3084 	   to %join me. Remain a zombie so that it can. */
3085 
3086       return false;
3087 }
3088 
3089 /*
3090  * %event <var-label>
3091  */
of_EVENT(vthread_t thr,vvp_code_t cp)3092 bool of_EVENT(vthread_t thr, vvp_code_t cp)
3093 {
3094       vvp_net_ptr_t ptr (cp->net, 0);
3095       vvp_vector4_t tmp (1, BIT4_X);
3096       vvp_send_vec4(ptr, tmp, thr->wt_context);
3097       return true;
3098 }
3099 
of_EVCTL(vthread_t thr,vvp_code_t cp)3100 bool of_EVCTL(vthread_t thr, vvp_code_t cp)
3101 {
3102       assert(thr->event == 0 && thr->ecount == 0);
3103       thr->event = cp->net;
3104       thr->ecount = thr->words[cp->bit_idx[0]].w_uint;
3105       return true;
3106 }
of_EVCTLC(vthread_t thr,vvp_code_t)3107 bool of_EVCTLC(vthread_t thr, vvp_code_t)
3108 {
3109       thr->event = 0;
3110       thr->ecount = 0;
3111       return true;
3112 }
3113 
of_EVCTLI(vthread_t thr,vvp_code_t cp)3114 bool of_EVCTLI(vthread_t thr, vvp_code_t cp)
3115 {
3116       assert(thr->event == 0 && thr->ecount == 0);
3117       thr->event = cp->net;
3118       thr->ecount = cp->bit_idx[0];
3119       return true;
3120 }
3121 
of_EVCTLS(vthread_t thr,vvp_code_t cp)3122 bool of_EVCTLS(vthread_t thr, vvp_code_t cp)
3123 {
3124       assert(thr->event == 0 && thr->ecount == 0);
3125       thr->event = cp->net;
3126       int64_t val = thr->words[cp->bit_idx[0]].w_int;
3127       if (val < 0) val = 0;
3128       thr->ecount = val;
3129       return true;
3130 }
3131 
of_FLAG_GET_VEC4(vthread_t thr,vvp_code_t cp)3132 bool of_FLAG_GET_VEC4(vthread_t thr, vvp_code_t cp)
3133 {
3134       int flag = cp->number;
3135       assert(flag < vthread_s::FLAGS_COUNT);
3136 
3137       vvp_vector4_t val (1, thr->flags[flag]);
3138       thr->push_vec4(val);
3139 
3140       return true;
3141 }
3142 
3143 /*
3144  * %flag_inv <flag1>
3145  */
of_FLAG_INV(vthread_t thr,vvp_code_t cp)3146 bool of_FLAG_INV(vthread_t thr, vvp_code_t cp)
3147 {
3148       int flag1 = cp->bit_idx[0];
3149 
3150       thr->flags[flag1] = ~ thr->flags[flag1];
3151       return true;
3152 }
3153 
3154 /*
3155  * %flag_mov <flag1>, <flag2>
3156  */
of_FLAG_MOV(vthread_t thr,vvp_code_t cp)3157 bool of_FLAG_MOV(vthread_t thr, vvp_code_t cp)
3158 {
3159       int flag1 = cp->bit_idx[0];
3160       int flag2 = cp->bit_idx[1];
3161 
3162       thr->flags[flag1] = thr->flags[flag2];
3163       return true;
3164 }
3165 
3166 /*
3167  * %flag_or <flag1>, <flag2>
3168  */
of_FLAG_OR(vthread_t thr,vvp_code_t cp)3169 bool of_FLAG_OR(vthread_t thr, vvp_code_t cp)
3170 {
3171       int flag1 = cp->bit_idx[0];
3172       int flag2 = cp->bit_idx[1];
3173 
3174       thr->flags[flag1] = thr->flags[flag1] | thr->flags[flag2];
3175       return true;
3176 }
3177 
of_FLAG_SET_IMM(vthread_t thr,vvp_code_t cp)3178 bool of_FLAG_SET_IMM(vthread_t thr, vvp_code_t cp)
3179 {
3180       int flag = cp->number;
3181       int vali = cp->bit_idx[0];
3182 
3183       assert(flag < vthread_s::FLAGS_COUNT);
3184       assert(vali >= 0 && vali < 4);
3185 
3186       static vvp_bit4_t map_bit[4] = {BIT4_0, BIT4_1, BIT4_Z, BIT4_X};
3187       thr->flags[flag] = map_bit[vali];
3188       return true;
3189 }
3190 
of_FLAG_SET_VEC4(vthread_t thr,vvp_code_t cp)3191 bool of_FLAG_SET_VEC4(vthread_t thr, vvp_code_t cp)
3192 {
3193       int flag = cp->number;
3194       assert(flag < vthread_s::FLAGS_COUNT);
3195 
3196       const vvp_vector4_t&val = thr->peek_vec4();
3197       thr->flags[flag] = val.value(0);
3198       thr->pop_vec4(1);
3199 
3200       return true;
3201 }
3202 
3203 /*
3204  * the %force/link instruction connects a source node to a
3205  * destination node. The destination node must be a signal, as it is
3206  * marked with the source of the force so that it may later be
3207  * unlinked without specifically knowing the source that this
3208  * instruction used.
3209  */
of_FORCE_LINK(vthread_t,vvp_code_t cp)3210 bool of_FORCE_LINK(vthread_t, vvp_code_t cp)
3211 {
3212       vvp_net_t*dst = cp->net;
3213       vvp_net_t*src = cp->net2;
3214 
3215       assert(dst->fil);
3216       dst->fil->force_link(dst, src);
3217 
3218       return true;
3219 }
3220 
3221 /*
3222  * The %force/vec4 instruction invokes a force assign of a constant value
3223  * to a signal. The instruction arguments are:
3224  *
3225  *     %force/vec4 <net> ;
3226  *
3227  * where the <net> is the net label assembled into a vvp_net pointer,
3228  * and the value to be forced is popped from the vec4 stack.\.
3229  *
3230  * The instruction writes a vvp_vector4_t value to port-2 of the
3231  * target signal.
3232  */
of_FORCE_VEC4(vthread_t thr,vvp_code_t cp)3233 bool of_FORCE_VEC4(vthread_t thr, vvp_code_t cp)
3234 {
3235       vvp_net_t*net = cp->net;
3236 
3237       vvp_vector4_t value = thr->pop_vec4();
3238 
3239 	/* Send the force value to the filter on the node. */
3240 
3241       assert(net->fil);
3242       if (value.size() != net->fil->filter_size())
3243 	    value = coerce_to_width(value, net->fil->filter_size());
3244 
3245       net->force_vec4(value, vvp_vector2_t(vvp_vector2_t::FILL1, net->fil->filter_size()));
3246 
3247       return true;
3248 }
3249 
3250 /*
3251  * %force/vec4/off <net>, <off>
3252  */
of_FORCE_VEC4_OFF(vthread_t thr,vvp_code_t cp)3253 bool of_FORCE_VEC4_OFF(vthread_t thr, vvp_code_t cp)
3254 {
3255       vvp_net_t*net = cp->net;
3256       unsigned base_idx = cp->bit_idx[0];
3257       long base = thr->words[base_idx].w_int;
3258       vvp_vector4_t value = thr->pop_vec4();
3259       unsigned wid = value.size();
3260 
3261       assert(net->fil);
3262 
3263       if (thr->flags[4] == BIT4_1)
3264 	    return true;
3265 
3266 	// This is the width of the target vector.
3267       unsigned use_size = net->fil->filter_size();
3268 
3269       if (base >= (long)use_size)
3270 	    return true;
3271       if (base < -(long)use_size)
3272 	    return true;
3273 
3274       if ((base + wid) > use_size)
3275 	    wid = use_size - base;
3276 
3277 	// Make a mask of which bits are to be forced, 0 for unforced
3278 	// bits and 1 for forced bits.
3279       vvp_vector2_t mask (vvp_vector2_t::FILL0, use_size);
3280       for (unsigned idx = 0 ; idx < wid ; idx += 1)
3281 	    mask.set_bit(base+idx, 1);
3282 
3283       vvp_vector4_t tmp (use_size, BIT4_Z);
3284 
3285 	// vvp_net_t::force_vec4 propagates all the bits of the
3286 	// forced vector value, regardless of the mask. This
3287 	// ensures the unforced bits retain their current value.
3288       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*>(net->fil);
3289       assert(sig);
3290       sig->vec4_value(tmp);
3291 
3292       tmp.set_vec(base, value);
3293 
3294       net->force_vec4(tmp, mask);
3295       return true;
3296 }
3297 
3298 /*
3299  * %force/vec4/off/d <net>, <off>, <del>
3300  */
of_FORCE_VEC4_OFF_D(vthread_t thr,vvp_code_t cp)3301 bool of_FORCE_VEC4_OFF_D(vthread_t thr, vvp_code_t cp)
3302 {
3303       vvp_net_t*net = cp->net;
3304 
3305       unsigned base_idx = cp->bit_idx[0];
3306       long base = thr->words[base_idx].w_int;
3307 
3308       unsigned delay_idx = cp->bit_idx[1];
3309       vvp_time64_t delay = thr->words[delay_idx].w_uint;
3310 
3311       vvp_vector4_t value = thr->pop_vec4();
3312 
3313       assert(net->fil);
3314 
3315       if (thr->flags[4] == BIT4_1)
3316 	    return true;
3317 
3318 	// This is the width of the target vector.
3319       unsigned use_size = net->fil->filter_size();
3320 
3321       if (base >= (long)use_size)
3322 	    return true;
3323       if (base < -(long)use_size)
3324 	    return true;
3325 
3326       schedule_force_vector(net, base, use_size, value, delay);
3327       return true;
3328 }
3329 
of_FORCE_WR(vthread_t thr,vvp_code_t cp)3330 bool of_FORCE_WR(vthread_t thr, vvp_code_t cp)
3331 {
3332       vvp_net_t*net  = cp->net;
3333       double value = thr->pop_real();
3334 
3335       net->force_real(value, vvp_vector2_t(vvp_vector2_t::FILL1, 1));
3336 
3337       return true;
3338 }
3339 
3340 /*
3341  * The %fork instruction causes a new child to be created and pushed
3342  * in front of any existing child. This causes the new child to be
3343  * added to the list of children, and for me to be the parent of the
3344  * new child.
3345  */
of_FORK(vthread_t thr,vvp_code_t cp)3346 bool of_FORK(vthread_t thr, vvp_code_t cp)
3347 {
3348       vthread_t child = vthread_new(cp->cptr2, cp->scope);
3349 
3350       if (cp->scope->is_automatic()) {
3351               /* The context allocated for this child is the top entry
3352                  on the write context stack. */
3353             child->wt_context = thr->wt_context;
3354             child->rd_context = thr->wt_context;
3355       }
3356 
3357       child->parent = thr;
3358       thr->children.insert(child);
3359 
3360       if (thr->i_am_in_function) {
3361 	    child->is_scheduled = 1;
3362 	    child->i_am_in_function = 1;
3363 	    vthread_run(child);
3364 	    running_thread = thr;
3365       } else {
3366 	    schedule_vthread(child, 0, true);
3367       }
3368       return true;
3369 }
3370 
of_FREE(vthread_t thr,vvp_code_t cp)3371 bool of_FREE(vthread_t thr, vvp_code_t cp)
3372 {
3373         /* Pop the child context from the read context stack. */
3374       vvp_context_t child_context = thr->rd_context;
3375       thr->rd_context = vvp_get_stacked_context(child_context);
3376 
3377         /* Free the context. */
3378       vthread_free_context(child_context, cp->scope);
3379 
3380       return true;
3381 }
3382 
3383 /*
3384  * %inv
3385  *
3386  * Logically, this pops a value, inverts is (Verilog style, with Z and
3387  * X converted to X) and pushes the result. We can more efficiently
3388  * just to the invert in place.
3389  */
of_INV(vthread_t thr,vvp_code_t)3390 bool of_INV(vthread_t thr, vvp_code_t)
3391 {
3392       vvp_vector4_t&val = thr->peek_vec4();
3393       val.invert();
3394       return true;
3395 }
3396 
3397 
3398 /*
3399  * Index registers, arithmetic.
3400  */
3401 
get_as_64_bit(uint32_t low_32,uint32_t high_32)3402 static inline int64_t get_as_64_bit(uint32_t low_32, uint32_t high_32)
3403 {
3404       int64_t low = low_32;
3405       int64_t res = high_32;
3406 
3407       res <<= 32;
3408       res |= low;
3409       return res;
3410 }
3411 
of_IX_ADD(vthread_t thr,vvp_code_t cp)3412 bool of_IX_ADD(vthread_t thr, vvp_code_t cp)
3413 {
3414       thr->words[cp->number].w_int += get_as_64_bit(cp->bit_idx[0],
3415                                                     cp->bit_idx[1]);
3416       return true;
3417 }
3418 
of_IX_SUB(vthread_t thr,vvp_code_t cp)3419 bool of_IX_SUB(vthread_t thr, vvp_code_t cp)
3420 {
3421       thr->words[cp->number].w_int -= get_as_64_bit(cp->bit_idx[0],
3422                                                     cp->bit_idx[1]);
3423       return true;
3424 }
3425 
of_IX_MUL(vthread_t thr,vvp_code_t cp)3426 bool of_IX_MUL(vthread_t thr, vvp_code_t cp)
3427 {
3428       thr->words[cp->number].w_int *= get_as_64_bit(cp->bit_idx[0],
3429                                                     cp->bit_idx[1]);
3430       return true;
3431 }
3432 
of_IX_LOAD(vthread_t thr,vvp_code_t cp)3433 bool of_IX_LOAD(vthread_t thr, vvp_code_t cp)
3434 {
3435       thr->words[cp->number].w_int = get_as_64_bit(cp->bit_idx[0],
3436                                                    cp->bit_idx[1]);
3437       return true;
3438 }
3439 
of_IX_MOV(vthread_t thr,vvp_code_t cp)3440 bool of_IX_MOV(vthread_t thr, vvp_code_t cp)
3441 {
3442       thr->words[cp->bit_idx[0]].w_int = thr->words[cp->bit_idx[1]].w_int;
3443       return true;
3444 }
3445 
of_IX_GETV(vthread_t thr,vvp_code_t cp)3446 bool of_IX_GETV(vthread_t thr, vvp_code_t cp)
3447 {
3448       unsigned index = cp->bit_idx[0];
3449       vvp_net_t*net = cp->net;
3450 
3451       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*>(net->fil);
3452       if (sig == 0) {
3453 	    assert(net->fil);
3454 	    cerr << thr->get_fileline()
3455 	         << "%%ix/getv error: Net arg not a vector signal? "
3456 		 << typeid(*net->fil).name() << endl;
3457       }
3458       assert(sig);
3459 
3460       vvp_vector4_t vec;
3461       sig->vec4_value(vec);
3462       bool overflow_flag;
3463       uint64_t val;
3464       bool known_flag = vector4_to_value(vec, overflow_flag, val);
3465 
3466       if (known_flag)
3467 	    thr->words[index].w_uint = val;
3468       else
3469 	    thr->words[index].w_uint = 0;
3470 
3471 	/* Set bit 4 as a flag if the input is unknown. */
3472       thr->flags[4] = known_flag ? (overflow_flag ? BIT4_X : BIT4_0) : BIT4_1;
3473 
3474       return true;
3475 }
3476 
of_IX_GETV_S(vthread_t thr,vvp_code_t cp)3477 bool of_IX_GETV_S(vthread_t thr, vvp_code_t cp)
3478 {
3479       unsigned index = cp->bit_idx[0];
3480       vvp_net_t*net = cp->net;
3481 
3482       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*>(net->fil);
3483       if (sig == 0) {
3484 	    assert(net->fil);
3485 	    cerr << thr->get_fileline()
3486 	         << "%%ix/getv/s error: Net arg not a vector signal? "
3487 		 << "fun=" << typeid(*net->fil).name()
3488 		 << ", fil=" << (net->fil? typeid(*net->fil).name() : "<>")
3489 		 << endl;
3490       }
3491       assert(sig);
3492 
3493       vvp_vector4_t vec;
3494       sig->vec4_value(vec);
3495       int64_t val;
3496       bool known_flag = vector4_to_value(vec, val, true, true);
3497 
3498       if (known_flag)
3499 	    thr->words[index].w_int = val;
3500       else
3501 	    thr->words[index].w_int = 0;
3502 
3503 	/* Set bit 4 as a flag if the input is unknown. */
3504       thr->flags[4] = known_flag? BIT4_0 : BIT4_1;
3505 
3506       return true;
3507 }
3508 
vec4_to_index(vthread_t thr,bool signed_flag)3509 static uint64_t vec4_to_index(vthread_t thr, bool signed_flag)
3510 {
3511 	// Get all the information we need about the vec4 vector, then
3512 	// pop it away. We only need the bool bits and the length.
3513       const vvp_vector4_t&val = thr->peek_vec4();
3514       unsigned val_size = val.size();
3515       unsigned long*bits = val.subarray(0, val_size, false);
3516       thr->pop_vec4(1);
3517 
3518 	// If there are X/Z bits, then the subarray will give us a nil
3519 	// pointer. Set a flag to indicate the error, and give up.
3520       if (bits == 0) {
3521 	    thr->flags[4] = BIT4_1;
3522 	    return 0;
3523       }
3524 
3525       uint64_t v = 0;
3526       thr->flags[4] = BIT4_0;
3527 
3528       assert(sizeof(bits[0]) <= sizeof(v));
3529 
3530       v = 0;
3531       for (unsigned idx = 0 ; idx < val_size ; idx += 8*sizeof(bits[0])) {
3532 	    uint64_t tmp = bits[idx/8/sizeof(bits[0])];
3533 	    if (idx < 8*sizeof(v)) {
3534 		  v |= tmp << idx;
3535 	    } else {
3536 		  bool overflow = signed_flag && (v >> 63) ? ~tmp != 0 : tmp != 0;
3537 		  if (overflow) {
3538 			thr->flags[4] = BIT4_X;
3539 			break;
3540 		  }
3541 	    }
3542       }
3543 
3544 	// Set the high bits that are not necessarily filled in by the
3545 	// subarray function.
3546       if (val_size < 8*sizeof(v)) {
3547 	    if (signed_flag && (v & (static_cast<uint64_t>(1)<<(val_size-1)))) {
3548 		    // Propagate the sign bit...
3549 		  v |= (~static_cast<uint64_t>(0)) << val_size;
3550 
3551 	    } else {
3552 		    // Fill with zeros.
3553 		  v &= ~((~static_cast<uint64_t>(0)) << val_size);
3554 	    }
3555 
3556       }
3557 
3558       delete[]bits;
3559       return v;
3560 }
3561 
3562 /*
3563  * %ix/vec4 <idx>
3564  */
of_IX_VEC4(vthread_t thr,vvp_code_t cp)3565 bool of_IX_VEC4(vthread_t thr, vvp_code_t cp)
3566 {
3567       unsigned use_idx = cp->number;
3568       thr->words[use_idx].w_uint = vec4_to_index(thr, false);
3569       return true;
3570 }
3571 
3572 /*
3573  * %ix/vec4/s <idx>
3574  */
of_IX_VEC4_S(vthread_t thr,vvp_code_t cp)3575 bool of_IX_VEC4_S(vthread_t thr, vvp_code_t cp)
3576 {
3577       unsigned use_idx = cp->number;
3578       thr->words[use_idx].w_uint = vec4_to_index(thr, true);
3579       return true;
3580 }
3581 
3582 /*
3583  * The various JMP instruction work simply by pulling the new program
3584  * counter from the instruction and resuming. If the jump is
3585  * conditional, then test the bit for the expected value first.
3586  */
of_JMP(vthread_t thr,vvp_code_t cp)3587 bool of_JMP(vthread_t thr, vvp_code_t cp)
3588 {
3589       thr->pc = cp->cptr;
3590 
3591 	/* Normally, this returns true so that the processor just
3592 	   keeps going to the next instruction. However, if there was
3593 	   a $stop or vpiStop, returning false here can break the
3594 	   simulation out of a hung loop. */
3595       if (schedule_stopped()) {
3596 	    schedule_vthread(thr, 0, false);
3597 	    return false;
3598       }
3599 
3600       return true;
3601 }
3602 
3603 /*
3604  * %jmp/0 <pc>, <flag>
3605  */
of_JMP0(vthread_t thr,vvp_code_t cp)3606 bool of_JMP0(vthread_t thr, vvp_code_t cp)
3607 {
3608       if (thr->flags[cp->bit_idx[0]] == BIT4_0)
3609 	    thr->pc = cp->cptr;
3610 
3611 	/* Normally, this returns true so that the processor just
3612 	   keeps going to the next instruction. However, if there was
3613 	   a $stop or vpiStop, returning false here can break the
3614 	   simulation out of a hung loop. */
3615       if (schedule_stopped()) {
3616 	    schedule_vthread(thr, 0, false);
3617 	    return false;
3618       }
3619 
3620       return true;
3621 }
3622 
3623 /*
3624  * %jmp/0xz <pc>, <flag>
3625  */
of_JMP0XZ(vthread_t thr,vvp_code_t cp)3626 bool of_JMP0XZ(vthread_t thr, vvp_code_t cp)
3627 {
3628       if (thr->flags[cp->bit_idx[0]] != BIT4_1)
3629 	    thr->pc = cp->cptr;
3630 
3631 	/* Normally, this returns true so that the processor just
3632 	   keeps going to the next instruction. However, if there was
3633 	   a $stop or vpiStop, returning false here can break the
3634 	   simulation out of a hung loop. */
3635       if (schedule_stopped()) {
3636 	    schedule_vthread(thr, 0, false);
3637 	    return false;
3638       }
3639 
3640       return true;
3641 }
3642 
3643 /*
3644  * %jmp/1 <pc>, <flag>
3645  */
of_JMP1(vthread_t thr,vvp_code_t cp)3646 bool of_JMP1(vthread_t thr, vvp_code_t cp)
3647 {
3648       if (thr->flags[cp->bit_idx[0]] == BIT4_1)
3649 	    thr->pc = cp->cptr;
3650 
3651 	/* Normally, this returns true so that the processor just
3652 	   keeps going to the next instruction. However, if there was
3653 	   a $stop or vpiStop, returning false here can break the
3654 	   simulation out of a hung loop. */
3655       if (schedule_stopped()) {
3656 	    schedule_vthread(thr, 0, false);
3657 	    return false;
3658       }
3659 
3660       return true;
3661 }
3662 
3663 /*
3664  * %jmp/1xz <pc>, <flag>
3665  */
of_JMP1XZ(vthread_t thr,vvp_code_t cp)3666 bool of_JMP1XZ(vthread_t thr, vvp_code_t cp)
3667 {
3668       if (thr->flags[cp->bit_idx[0]] != BIT4_0)
3669 	    thr->pc = cp->cptr;
3670 
3671 	/* Normally, this returns true so that the processor just
3672 	   keeps going to the next instruction. However, if there was
3673 	   a $stop or vpiStop, returning false here can break the
3674 	   simulation out of a hung loop. */
3675       if (schedule_stopped()) {
3676 	    schedule_vthread(thr, 0, false);
3677 	    return false;
3678       }
3679 
3680       return true;
3681 }
3682 
3683 /*
3684  * The %join instruction causes the thread to wait for one child
3685  * to die.  If a child is already dead (and a zombie) then I reap
3686  * it and go on. Otherwise, I mark myself as waiting in a join so that
3687  * children know to wake me when they finish.
3688  */
3689 
do_join(vthread_t thr,vthread_t child)3690 static void do_join(vthread_t thr, vthread_t child)
3691 {
3692       assert(child->parent == thr);
3693 
3694         /* If the immediate child thread is in an automatic scope... */
3695       if (child->wt_context) {
3696               /* and is the top level task/function thread... */
3697             if (thr->wt_context != thr->rd_context) {
3698                     /* Pop the child context from the write context stack. */
3699                   vvp_context_t child_context = thr->wt_context;
3700                   thr->wt_context = vvp_get_stacked_context(child_context);
3701 
3702                     /* Push the child context onto the read context stack */
3703                   vvp_set_stacked_context(child_context, thr->rd_context);
3704                   thr->rd_context = child_context;
3705             }
3706       }
3707 
3708       vthread_reap(child);
3709 }
3710 
do_join_opcode(vthread_t thr)3711 static bool do_join_opcode(vthread_t thr)
3712 {
3713       assert( !thr->i_am_joining );
3714       assert( !thr->children.empty());
3715 
3716 	// Are there any children that have already ended? If so, then
3717 	// join with that one.
3718       for (set<vthread_t>::iterator cur = thr->children.begin()
3719 		 ; cur != thr->children.end() ; ++cur) {
3720 	    vthread_t curp = *cur;
3721 	    if (! curp->i_have_ended)
3722 		  continue;
3723 
3724 	      // found something!
3725 	    do_join(thr, curp);
3726 	    return true;
3727       }
3728 
3729 	// Otherwise, tell my children to awaken me when they end,
3730 	// then pause.
3731       thr->i_am_joining = 1;
3732       return false;
3733 }
3734 
of_JOIN(vthread_t thr,vvp_code_t)3735 bool of_JOIN(vthread_t thr, vvp_code_t)
3736 {
3737       return do_join_opcode(thr);
3738 }
3739 
3740 /*
3741  * This %join/detach <n> instruction causes the thread to detach
3742  * threads that were created by an earlier %fork.
3743  */
of_JOIN_DETACH(vthread_t thr,vvp_code_t cp)3744 bool of_JOIN_DETACH(vthread_t thr, vvp_code_t cp)
3745 {
3746       unsigned long count = cp->number;
3747 
3748       assert(count == thr->children.size());
3749 
3750       while (! thr->children.empty()) {
3751 	    vthread_t child = *thr->children.begin();
3752 	    assert(child->parent == thr);
3753 
3754 	      // We cannot detach automatic tasks/functions within an
3755 	      // automatic scope. If we try to do that, we might make
3756 	      // a mess of the allocation of the context. Note that it
3757 	      // is OK if the child context is distinct (See %exec_ufunc.)
3758 	    assert(child->wt_context==0 || thr->wt_context!=child->wt_context);
3759 	    if (child->i_have_ended) {
3760 		    // If the child has already ended, then reap it.
3761 		  vthread_reap(child);
3762 
3763 	    } else {
3764 		  size_t res = child->parent->children.erase(child);
3765 		  assert(res == 1);
3766 		  child->i_am_detached = 1;
3767 		  thr->detached_children.insert(child);
3768 	    }
3769       }
3770 
3771       return true;
3772 }
3773 
3774 /*
3775  * %load/ar <array-label>, <index>;
3776 */
of_LOAD_AR(vthread_t thr,vvp_code_t cp)3777 bool of_LOAD_AR(vthread_t thr, vvp_code_t cp)
3778 {
3779       unsigned idx = cp->bit_idx[0];
3780       unsigned adr = thr->words[idx].w_int;
3781       double word;
3782 
3783 	/* The result is 0.0 if the address is undefined. */
3784       if (thr->flags[4] == BIT4_1) {
3785 	    word = 0.0;
3786       } else {
3787 	    word = cp->array->get_word_r(adr);
3788       }
3789 
3790       thr->push_real(word);
3791       return true;
3792 }
3793 
3794 template <typename ELEM>
load_dar(vthread_t thr,vvp_code_t cp)3795 static bool load_dar(vthread_t thr, vvp_code_t cp)
3796 {
3797       int64_t adr = thr->words[3].w_int;
3798       vvp_net_t*net = cp->net;
3799       assert(net);
3800 
3801       vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
3802       assert(obj);
3803 
3804       vvp_darray*darray = obj->get_object().peek<vvp_darray>();
3805 
3806       ELEM word;
3807       if (darray &&
3808           (adr >= 0) && (thr->flags[4] == BIT4_0)) // A defined address >= 0
3809 	    darray->get_word(adr, word);
3810       else
3811 	    dq_default(word, obj->size());
3812 
3813       vthread_push(thr, word);
3814       return true;
3815 }
3816 
3817 /*
3818  * %load/dar/r <array-label>;
3819  */
of_LOAD_DAR_R(vthread_t thr,vvp_code_t cp)3820 bool of_LOAD_DAR_R(vthread_t thr, vvp_code_t cp)
3821 {
3822       return load_dar<double>(thr, cp);
3823 }
3824 
3825 /*
3826  * %load/dar/str <array-label>;
3827  */
of_LOAD_DAR_STR(vthread_t thr,vvp_code_t cp)3828 bool of_LOAD_DAR_STR(vthread_t thr, vvp_code_t cp)
3829 {
3830       return load_dar<string>(thr, cp);
3831 }
3832 
3833 /*
3834  * %load/dar/vec4 <array-label>;
3835  */
of_LOAD_DAR_VEC4(vthread_t thr,vvp_code_t cp)3836 bool of_LOAD_DAR_VEC4(vthread_t thr, vvp_code_t cp)
3837 {
3838       return load_dar<vvp_vector4_t>(thr, cp);
3839 }
3840 
3841 /*
3842  * %load/obj <var-label>
3843  */
of_LOAD_OBJ(vthread_t thr,vvp_code_t cp)3844 bool of_LOAD_OBJ(vthread_t thr, vvp_code_t cp)
3845 {
3846       vvp_net_t*net = cp->net;
3847       vvp_fun_signal_object*fun = dynamic_cast<vvp_fun_signal_object*> (net->fun);
3848       assert(fun);
3849 
3850       vvp_object_t val = fun->get_object();
3851       thr->push_object(val);
3852 
3853       return true;
3854 }
3855 
3856 /*
3857  * %load/obja <index>
3858  *    Loads the object from array, using index <index> as the index
3859  *    value. If flags[4] == 1, the calculation of <index> may have
3860  *    failed, so push nil.
3861  */
of_LOAD_OBJA(vthread_t thr,vvp_code_t cp)3862 bool of_LOAD_OBJA(vthread_t thr, vvp_code_t cp)
3863 {
3864       unsigned idx = cp->bit_idx[0];
3865       unsigned adr = thr->words[idx].w_int;
3866       vvp_object_t word;
3867 
3868 	/* The result is 0.0 if the address is undefined. */
3869       if (thr->flags[4] == BIT4_1) {
3870 	    ; // Return nil
3871       } else {
3872 	    cp->array->get_word_obj(adr, word);
3873       }
3874 
3875       thr->push_object(word);
3876       return true;
3877 }
3878 
3879 /*
3880  * %load/real <var-label>
3881  */
of_LOAD_REAL(vthread_t thr,vvp_code_t cp)3882 bool of_LOAD_REAL(vthread_t thr, vvp_code_t cp)
3883 {
3884       __vpiHandle*tmp = cp->handle;
3885       t_vpi_value val;
3886 
3887       val.format = vpiRealVal;
3888       vpi_get_value(tmp, &val);
3889 
3890       thr->push_real(val.value.real);
3891 
3892       return true;
3893 }
3894 
3895 /*
3896  * %load/str <var-label>
3897  */
of_LOAD_STR(vthread_t thr,vvp_code_t cp)3898 bool of_LOAD_STR(vthread_t thr, vvp_code_t cp)
3899 {
3900       vvp_net_t*net = cp->net;
3901 
3902 
3903       vvp_fun_signal_string*fun = dynamic_cast<vvp_fun_signal_string*> (net->fun);
3904       assert(fun);
3905 
3906       const string&val = fun->get_string();
3907       thr->push_str(val);
3908 
3909       return true;
3910 }
3911 
of_LOAD_STRA(vthread_t thr,vvp_code_t cp)3912 bool of_LOAD_STRA(vthread_t thr, vvp_code_t cp)
3913 {
3914       unsigned idx = cp->bit_idx[0];
3915       unsigned adr = thr->words[idx].w_int;
3916       string word;
3917 
3918       if (thr->flags[4] == BIT4_1) {
3919 	    word = "";
3920       } else {
3921 	    word = cp->array->get_word_str(adr);
3922       }
3923 
3924       thr->push_str(word);
3925       return true;
3926 }
3927 
3928 
3929 /*
3930  * %load/vec4 <net>
3931  */
of_LOAD_VEC4(vthread_t thr,vvp_code_t cp)3932 bool of_LOAD_VEC4(vthread_t thr, vvp_code_t cp)
3933 {
3934 	// Push a placeholder onto the stack in order to reserve the
3935 	// stack space. Use a reference for the stack top as a target
3936 	// for the load.
3937       thr->push_vec4(vvp_vector4_t());
3938       vvp_vector4_t&sig_value = thr->peek_vec4();
3939 
3940       vvp_net_t*net = cp->net;
3941 
3942 	// For the %load to work, the functor must actually be a
3943 	// signal functor. Only signals save their vector value.
3944       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (net->fil);
3945       if (sig == 0) {
3946 	    cerr << thr->get_fileline()
3947 	         << "%load/v error: Net arg not a signal? "
3948 		 << (net->fil ? typeid(*net->fil).name() : typeid(*net->fun).name()) << endl;
3949 	    assert(sig);
3950       }
3951 
3952 	// Extract the value from the signal and directly into the
3953 	// target stack position.
3954       sig->vec4_value(sig_value);
3955 
3956       return true;
3957 }
3958 
3959 /*
3960  * %load/vec4a <arr>, <adrx>
3961  */
of_LOAD_VEC4A(vthread_t thr,vvp_code_t cp)3962 bool of_LOAD_VEC4A(vthread_t thr, vvp_code_t cp)
3963 {
3964       int adr_index = cp->bit_idx[0];
3965 
3966       long adr = thr->words[adr_index].w_int;
3967 
3968 	// If flag[3] is set, then the calculation of the address
3969 	// failed, and this load should return X instead of the actual
3970 	// value.
3971       if (thr->flags[4] == BIT4_1) {
3972 	    vvp_vector4_t tmp (cp->array->get_word_size(), BIT4_X);
3973 	    thr->push_vec4(tmp);
3974 	    return true;
3975       }
3976 
3977       vvp_vector4_t tmp (cp->array->get_word(adr));
3978       thr->push_vec4(tmp);
3979       return true;
3980 }
3981 
do_verylong_mod(vvp_vector4_t & vala,const vvp_vector4_t & valb,bool left_is_neg,bool right_is_neg)3982 static void do_verylong_mod(vvp_vector4_t&vala, const vvp_vector4_t&valb,
3983 			    bool left_is_neg, bool right_is_neg)
3984 {
3985       bool out_is_neg = left_is_neg;
3986       const int len=vala.size();
3987       unsigned char *a, *z, *t;
3988       a = new unsigned char[len+1];
3989       z = new unsigned char[len+1];
3990       t = new unsigned char[len+1];
3991 
3992       unsigned char carry;
3993       unsigned char temp;
3994 
3995       int mxa = -1, mxz = -1;
3996       int i;
3997       int current, copylen;
3998 
3999       unsigned lb_carry = left_is_neg? 1 : 0;
4000       unsigned rb_carry = right_is_neg? 1 : 0;
4001       for (int idx = 0 ;  idx < len ;  idx += 1) {
4002 	    unsigned lb = vala.value(idx);
4003 	    unsigned rb = valb.value(idx);
4004 
4005 	    if ((lb | rb) & 2) {
4006 		  delete []t;
4007 		  delete []z;
4008 		  delete []a;
4009 		  vvp_vector4_t tmp(len, BIT4_X);
4010 		  vala = tmp;
4011 		  return;
4012 	    }
4013 
4014 	    if (left_is_neg) {
4015 		  lb = (1-lb) + lb_carry;
4016 		  lb_carry = (lb & ~1)? 1 : 0;
4017 		  lb &= 1;
4018 	    }
4019 	    if (right_is_neg) {
4020 		  rb = (1-rb) + rb_carry;
4021 		  rb_carry = (rb & ~1)? 1 : 0;
4022 		  rb &= 1;
4023 	    }
4024 
4025 	    z[idx]=lb;
4026 	    a[idx]=1-rb;	// for 2s complement add..
4027       }
4028 
4029       z[len]=0;
4030       a[len]=1;
4031 
4032       for(i=len-1;i>=0;i--) {
4033 	    if(! a[i]) {
4034 		  mxa=i;
4035 		  break;
4036 	    }
4037       }
4038 
4039       for(i=len-1;i>=0;i--) {
4040 	    if(z[i]) {
4041 		  mxz=i;
4042 		  break;
4043 	    }
4044       }
4045 
4046       if((mxa>mxz)||(mxa==-1)) {
4047 	    if(mxa==-1) {
4048 		  delete []t;
4049 		  delete []z;
4050 		  delete []a;
4051 		  vvp_vector4_t tmpx (len, BIT4_X);
4052 		  vala = tmpx;
4053 		  return;
4054 	    }
4055 
4056 	    goto tally;
4057       }
4058 
4059       copylen = mxa + 2;
4060       current = mxz - mxa;
4061 
4062       while(current > -1) {
4063 	    carry = 1;
4064 	    for(i=0;i<copylen;i++) {
4065 		  temp = z[i+current] + a[i] + carry;
4066 		  t[i] = (temp&1);
4067 		  carry = (temp>>1);
4068 	    }
4069 
4070 	    if(carry) {
4071 		  for(i=0;i<copylen;i++) {
4072 			z[i+current] = t[i];
4073 		  }
4074 	    }
4075 
4076 	    current--;
4077       }
4078 
4079  tally:
4080 
4081       vvp_vector4_t tmp (len, BIT4_X);
4082       carry = out_is_neg? 1 : 0;
4083       for (int idx = 0 ;  idx < len ;  idx += 1) {
4084 	    unsigned ob = z[idx];
4085 	    if (out_is_neg) {
4086 		  ob = (1-ob) + carry;
4087 		  carry = (ob & ~1)? 1 : 0;
4088 		  ob = ob & 1;
4089 	    }
4090 	    tmp.set_bit(idx, ob?BIT4_1:BIT4_0);
4091       }
4092       vala = tmp;
4093       delete []t;
4094       delete []z;
4095       delete []a;
4096 }
4097 
of_MAX_WR(vthread_t thr,vvp_code_t)4098 bool of_MAX_WR(vthread_t thr, vvp_code_t)
4099 {
4100       double r = thr->pop_real();
4101       double l = thr->pop_real();
4102       if (r != r)
4103 	    thr->push_real(l);
4104       else if (l != l)
4105 	    thr->push_real(r);
4106       else if (r < l)
4107 	    thr->push_real(l);
4108       else
4109 	    thr->push_real(r);
4110       return true;
4111 }
4112 
of_MIN_WR(vthread_t thr,vvp_code_t)4113 bool of_MIN_WR(vthread_t thr, vvp_code_t)
4114 {
4115       double r = thr->pop_real();
4116       double l = thr->pop_real();
4117       if (r != r)
4118 	    thr->push_real(l);
4119       else if (l != l)
4120 	    thr->push_real(r);
4121       else if (r < l)
4122 	    thr->push_real(r);
4123       else
4124 	    thr->push_real(l);
4125       return true;
4126 }
4127 
of_MOD(vthread_t thr,vvp_code_t)4128 bool of_MOD(vthread_t thr, vvp_code_t)
4129 {
4130       vvp_vector4_t valb = thr->pop_vec4();
4131       vvp_vector4_t&vala = thr->peek_vec4();
4132 
4133       assert(vala.size()==valb.size());
4134       unsigned wid = vala.size();
4135 
4136       if(wid <= 8*sizeof(unsigned long long)) {
4137 	    unsigned long long lv = 0, rv = 0;
4138 
4139 	    for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
4140 		  unsigned long long lb = vala.value(idx);
4141 		  unsigned long long rb = valb.value(idx);
4142 
4143 		  if ((lb | rb) & 2)
4144 			goto x_out;
4145 
4146 		  lv |= (unsigned long long) lb << idx;
4147 		  rv |= (unsigned long long) rb << idx;
4148 	    }
4149 
4150 	    if (rv == 0)
4151 		  goto x_out;
4152 
4153 	    lv %= rv;
4154 
4155 	    for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
4156 		  vala.set_bit(idx, (lv&1)?BIT4_1 : BIT4_0);
4157 		  lv >>= 1;
4158 	    }
4159 
4160 	    return true;
4161 
4162       } else {
4163 	    do_verylong_mod(vala, valb, false, false);
4164 	    return true;
4165       }
4166 
4167  x_out:
4168       vala = vvp_vector4_t(wid, BIT4_X);
4169       return true;
4170 }
4171 
4172 /*
4173  * %mod/s
4174  */
of_MOD_S(vthread_t thr,vvp_code_t)4175 bool of_MOD_S(vthread_t thr, vvp_code_t)
4176 {
4177       vvp_vector4_t valb = thr->pop_vec4();
4178       vvp_vector4_t&vala = thr->peek_vec4();
4179 
4180       assert(vala.size()==valb.size());
4181       unsigned wid = vala.size();
4182 
4183 	/* Handle the case that we can fit the bits into a long-long
4184 	   variable. We cause use native % to do the work. */
4185       if(wid <= 8*sizeof(long long)) {
4186 	    long long lv = 0, rv = 0;
4187 
4188 	    for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
4189 		  long long lb = vala.value(idx);
4190 		  long long rb = valb.value(idx);
4191 
4192 		  if ((lb | rb) & 2)
4193 			goto x_out;
4194 
4195 		  lv |= (long long) lb << idx;
4196 		  rv |= (long long) rb << idx;
4197 	    }
4198 
4199 	    if (rv == 0)
4200 		  goto x_out;
4201 
4202 	    if ((lv == LLONG_MIN) && (rv == -1))
4203 		  goto zero_out;
4204 
4205 	      /* Sign extend the signed operands when needed. */
4206 	    if (wid < 8*sizeof(long long)) {
4207 		  if (lv & (1LL << (wid-1)))
4208 			lv |= -1ULL << wid;
4209 		  if (rv & (1LL << (wid-1)))
4210 			rv |= -1ULL << wid;
4211 	    }
4212 
4213 	    lv %= rv;
4214 
4215 	    for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
4216 		  vala.set_bit(idx, (lv&1)? BIT4_1 : BIT4_0);
4217 		  lv >>= 1;
4218 	    }
4219 
4220 	      // vala is the top of the stack, edited in place, so we
4221 	      // do not need to push the result.
4222 
4223 	    return true;
4224 
4225       } else {
4226 
4227 	    bool left_is_neg  = vala.value(vala.size()-1) == BIT4_1;
4228 	    bool right_is_neg = valb.value(valb.size()-1) == BIT4_1;
4229 	    do_verylong_mod(vala, valb, left_is_neg, right_is_neg);
4230 	    return true;
4231       }
4232 
4233  x_out:
4234       vala = vvp_vector4_t(wid, BIT4_X);
4235       return true;
4236  zero_out:
4237       vala = vvp_vector4_t(wid, BIT4_0);
4238       return true;
4239 }
4240 
4241 /*
4242  * %mod/wr
4243  */
of_MOD_WR(vthread_t thr,vvp_code_t)4244 bool of_MOD_WR(vthread_t thr, vvp_code_t)
4245 {
4246       double r = thr->pop_real();
4247       double l = thr->pop_real();
4248       thr->push_real(fmod(l,r));
4249 
4250       return true;
4251 }
4252 
4253 /*
4254  * %pad/s <wid>
4255  */
of_PAD_S(vthread_t thr,vvp_code_t cp)4256 bool of_PAD_S(vthread_t thr, vvp_code_t cp)
4257 {
4258       unsigned wid = cp->number;
4259 
4260       vvp_vector4_t&val = thr->peek_vec4();
4261       unsigned old_size = val.size();
4262 
4263 	// Sign-extend.
4264       if (old_size < wid)
4265 	    val.resize(wid, val.value(old_size-1));
4266       else
4267 	    val.resize(wid);
4268 
4269       return true;
4270 }
4271 
4272 /*
4273  * %pad/u <wid>
4274  */
of_PAD_U(vthread_t thr,vvp_code_t cp)4275 bool of_PAD_U(vthread_t thr, vvp_code_t cp)
4276 {
4277       unsigned wid = cp->number;
4278 
4279       vvp_vector4_t&val = thr->peek_vec4();
4280       val.resize(wid, BIT4_0);
4281 
4282       return true;
4283 }
4284 
4285 /*
4286  * %part/s <wid>
4287  * %part/u <wid>
4288  * Two values are popped from the stack. First, pop the canonical
4289  * index of the part select, and second is the value to be
4290  * selected. The result is pushed back to the stack.
4291  */
of_PART_base(vthread_t thr,vvp_code_t cp,bool signed_flag)4292 static bool of_PART_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
4293 {
4294       unsigned wid = cp->number;
4295 
4296       vvp_vector4_t base4 = thr->pop_vec4();
4297       vvp_vector4_t&value = thr->peek_vec4();
4298 
4299       vvp_vector4_t res (wid, BIT4_X);
4300 
4301 	// NOTE: This is treating the vector as signed. Is that correct?
4302       int32_t base;
4303       bool value_ok = vector4_to_value(base4, base, signed_flag);
4304       if (! value_ok) {
4305 	    value = res;
4306 	    return true;
4307       }
4308 
4309       if (base >= (int32_t)value.size()) {
4310 	    value = res;
4311 	    return true;
4312       }
4313 
4314       if ((base+(int)wid) <= 0) {
4315 	    value = res;
4316 	    return true;
4317       }
4318 
4319       long vbase = 0;
4320       if (base < 0) {
4321 	    vbase = -base;
4322 	    wid -= vbase;
4323 	    base = 0;
4324       }
4325 
4326       if ((base+wid) > value.size()) {
4327 	    wid = value.size() - base;
4328       }
4329 
4330       res .set_vec(vbase, value.subvalue(base, wid));
4331       value = res;
4332 
4333       return true;
4334 }
4335 
of_PART_S(vthread_t thr,vvp_code_t cp)4336 bool of_PART_S(vthread_t thr, vvp_code_t cp)
4337 {
4338       return of_PART_base(thr, cp, true);
4339 }
4340 
of_PART_U(vthread_t thr,vvp_code_t cp)4341 bool of_PART_U(vthread_t thr, vvp_code_t cp)
4342 {
4343       return of_PART_base(thr, cp, false);
4344 }
4345 
4346 /*
4347  * %parti/s <wid>, <basei>, <base_wid>
4348  * %parti/u <wid>, <basei>, <base_wid>
4349  *
4350  * Pop the value to be selected. The result is pushed back to the stack.
4351  */
of_PARTI_base(vthread_t thr,vvp_code_t cp,bool signed_flag)4352 static bool of_PARTI_base(vthread_t thr, vvp_code_t cp, bool signed_flag)
4353 {
4354       unsigned wid = cp->number;
4355       uint32_t base = cp->bit_idx[0];
4356       uint32_t bwid = cp->bit_idx[1];
4357 
4358       vvp_vector4_t&value = thr->peek_vec4();
4359 
4360       vvp_vector4_t res (wid, BIT4_X);
4361 
4362 	// NOTE: This is treating the vector as signed. Is that correct?
4363       int32_t use_base = base;
4364       if (signed_flag && bwid < 32 && (base&(1<<(bwid-1)))) {
4365 	    use_base |= -1UL << bwid;
4366       }
4367 
4368       if (use_base >= (int32_t)value.size()) {
4369 	    value = res;
4370 	    return true;
4371       }
4372 
4373       if ((use_base+(int32_t)wid) <= 0) {
4374 	    value = res;
4375 	    return true;
4376       }
4377 
4378       long vbase = 0;
4379       if (use_base < 0) {
4380 	    vbase = -use_base;
4381 	    wid -= vbase;
4382 	    use_base = 0;
4383       }
4384 
4385       if ((use_base+wid) > value.size()) {
4386 	    wid = value.size() - use_base;
4387       }
4388 
4389       res .set_vec(vbase, value.subvalue(use_base, wid));
4390       value = res;
4391 
4392       return true;
4393 }
4394 
of_PARTI_S(vthread_t thr,vvp_code_t cp)4395 bool of_PARTI_S(vthread_t thr, vvp_code_t cp)
4396 {
4397       return of_PARTI_base(thr, cp, true);
4398 }
4399 
of_PARTI_U(vthread_t thr,vvp_code_t cp)4400 bool of_PARTI_U(vthread_t thr, vvp_code_t cp)
4401 {
4402       return of_PARTI_base(thr, cp, false);
4403 }
4404 
4405 /*
4406 *  %mov/wu <dst>, <src>
4407 */
of_MOV_WU(vthread_t thr,vvp_code_t cp)4408 bool of_MOV_WU(vthread_t thr, vvp_code_t cp)
4409 {
4410       unsigned dst = cp->bit_idx[0];
4411       unsigned src = cp->bit_idx[1];
4412 
4413       thr->words[dst].w_uint = thr->words[src].w_uint;
4414       return true;
4415 }
4416 
4417 /*
4418  * %mul
4419  */
of_MUL(vthread_t thr,vvp_code_t)4420 bool of_MUL(vthread_t thr, vvp_code_t)
4421 {
4422       vvp_vector4_t r = thr->pop_vec4();
4423 	// Rather then pop l, use it directly from the stack. When we
4424 	// assign to 'l', that will edit the top of the stack, which
4425 	// replaces a pop and a pull.
4426       vvp_vector4_t&l = thr->peek_vec4();
4427 
4428       l.mul(r);
4429       return true;
4430 }
4431 
4432 /*
4433  * %muli <vala>, <valb>, <wid>
4434  *
4435  * Pop1 operand, get the other operand from the arguments, and push
4436  * the result.
4437  */
of_MULI(vthread_t thr,vvp_code_t cp)4438 bool of_MULI(vthread_t thr, vvp_code_t cp)
4439 {
4440       unsigned wid = cp->number;
4441 
4442       vvp_vector4_t&l = thr->peek_vec4();
4443 
4444 	// I expect that most of the bits of an immediate value are
4445 	// going to be zero, so start the result vector with all zero
4446 	// bits. Then we only need to replace the bits that are different.
4447       vvp_vector4_t r (wid, BIT4_0);
4448       get_immediate_rval (cp, r);
4449 
4450       l.mul(r);
4451       return true;
4452 }
4453 
of_MUL_WR(vthread_t thr,vvp_code_t)4454 bool of_MUL_WR(vthread_t thr, vvp_code_t)
4455 {
4456       double r = thr->pop_real();
4457       double l = thr->pop_real();
4458       thr->push_real(l * r);
4459 
4460       return true;
4461 }
4462 
of_NAND(vthread_t thr,vvp_code_t)4463 bool of_NAND(vthread_t thr, vvp_code_t)
4464 {
4465       vvp_vector4_t valr = thr->pop_vec4();
4466       vvp_vector4_t&vall = thr->peek_vec4();
4467       assert(vall.size() == valr.size());
4468       unsigned wid = vall.size();
4469 
4470       for (unsigned idx = 0 ; idx < wid ; idx += 1) {
4471 	    vvp_bit4_t lb = vall.value(idx);
4472 	    vvp_bit4_t rb = valr.value(idx);
4473 	    vall.set_bit(idx, ~(lb&rb));
4474       }
4475 
4476       return true;
4477 }
4478 
4479 /*
4480  * %new/cobj <vpi_object>
4481  * This creates a new cobject (SystemVerilog class object) and pushes
4482  * it to the stack. The <vpi-object> is a __vpiHandle that is a
4483  * vpiClassDefn object that defines the item to be created.
4484  */
of_NEW_COBJ(vthread_t thr,vvp_code_t cp)4485 bool of_NEW_COBJ(vthread_t thr, vvp_code_t cp)
4486 {
4487       const class_type*defn = dynamic_cast<const class_type*> (cp->handle);
4488       assert(defn);
4489 
4490       vvp_object_t tmp (new vvp_cobject(defn));
4491       thr->push_object(tmp);
4492       return true;
4493 }
4494 
of_NEW_DARRAY(vthread_t thr,vvp_code_t cp)4495 bool of_NEW_DARRAY(vthread_t thr, vvp_code_t cp)
4496 {
4497       const char*text = cp->text;
4498       size_t size = thr->words[cp->bit_idx[0]].w_int;
4499       unsigned word_wid;
4500       size_t n;
4501 
4502       vvp_object_t obj;
4503       if (strcmp(text,"b8") == 0) {
4504 	    obj = new vvp_darray_atom<uint8_t>(size);
4505       } else if (strcmp(text,"b16") == 0) {
4506 	    obj = new vvp_darray_atom<uint16_t>(size);
4507       } else if (strcmp(text,"b32") == 0) {
4508 	    obj = new vvp_darray_atom<uint32_t>(size);
4509       } else if (strcmp(text,"b64") == 0) {
4510 	    obj = new vvp_darray_atom<uint64_t>(size);
4511       } else if (strcmp(text,"sb8") == 0) {
4512 	    obj = new vvp_darray_atom<int8_t>(size);
4513       } else if (strcmp(text,"sb16") == 0) {
4514 	    obj = new vvp_darray_atom<int16_t>(size);
4515       } else if (strcmp(text,"sb32") == 0) {
4516 	    obj = new vvp_darray_atom<int32_t>(size);
4517       } else if (strcmp(text,"sb64") == 0) {
4518 	    obj = new vvp_darray_atom<int64_t>(size);
4519       } else if ((1 == sscanf(text, "b%u%zn", &word_wid, &n)) &&
4520                  (n == strlen(text))) {
4521 	    obj = new vvp_darray_vec2(size, word_wid);
4522       } else if ((1 == sscanf(text, "sb%u%zn", &word_wid, &n)) &&
4523                  (n == strlen(text))) {
4524 	    obj = new vvp_darray_vec2(size, word_wid);
4525       } else if ((1 == sscanf(text, "v%u%zn", &word_wid, &n)) &&
4526                  (n == strlen(text))) {
4527 	    obj = new vvp_darray_vec4(size, word_wid);
4528       } else if ((1 == sscanf(text, "sv%u%zn", &word_wid, &n)) &&
4529                  (n == strlen(text))) {
4530 	    obj = new vvp_darray_vec4(size, word_wid);
4531       } else if (strcmp(text,"r") == 0) {
4532 	    obj = new vvp_darray_real(size);
4533       } else if (strcmp(text,"S") == 0) {
4534 	    obj = new vvp_darray_string(size);
4535       } else {
4536 	    cerr << get_fileline()
4537 	         << "Internal error: Unsupported dynamic array type: "
4538 	         << text << "." << endl;
4539 	    assert(0);
4540       }
4541 
4542       thr->push_object(obj);
4543 
4544       return true;
4545 }
4546 
of_NOOP(vthread_t,vvp_code_t)4547 bool of_NOOP(vthread_t, vvp_code_t)
4548 {
4549       return true;
4550 }
4551 
4552 /*
4553  * %nor/r
4554  */
of_NORR(vthread_t thr,vvp_code_t)4555 bool of_NORR(vthread_t thr, vvp_code_t)
4556 {
4557       vvp_vector4_t val = thr->pop_vec4();
4558 
4559       vvp_bit4_t lb = BIT4_1;
4560 
4561       for (unsigned idx = 0 ;  idx < val.size() ;  idx += 1) {
4562 
4563 	    vvp_bit4_t rb = val.value(idx);
4564 	    if (rb == BIT4_1) {
4565 		  lb = BIT4_0;
4566 		  break;
4567 	    }
4568 
4569 	    if (rb != BIT4_0)
4570 		  lb = BIT4_X;
4571       }
4572 
4573       vvp_vector4_t res (1, lb);
4574       thr->push_vec4(res);
4575 
4576       return true;
4577 }
4578 
4579 /*
4580  * Push a null to the object stack.
4581  */
of_NULL(vthread_t thr,vvp_code_t)4582 bool of_NULL(vthread_t thr, vvp_code_t)
4583 {
4584       vvp_object_t tmp;
4585       thr->push_object(tmp);
4586       return true;
4587 }
4588 
4589 /*
4590  * %and/r
4591  */
of_ANDR(vthread_t thr,vvp_code_t)4592 bool of_ANDR(vthread_t thr, vvp_code_t)
4593 {
4594       vvp_vector4_t val = thr->pop_vec4();
4595 
4596       vvp_bit4_t lb = BIT4_1;
4597 
4598       for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4599 	    vvp_bit4_t rb = val.value(idx);
4600 	    if (rb == BIT4_0) {
4601 		  lb = BIT4_0;
4602 		  break;
4603 	    }
4604 
4605 	    if (rb != 1)
4606 		  lb = BIT4_X;
4607       }
4608 
4609       vvp_vector4_t res (1, lb);
4610       thr->push_vec4(res);
4611 
4612       return true;
4613 }
4614 
4615 /*
4616  * %nand/r
4617  */
of_NANDR(vthread_t thr,vvp_code_t)4618 bool of_NANDR(vthread_t thr, vvp_code_t)
4619 {
4620       vvp_vector4_t val = thr->pop_vec4();
4621 
4622       vvp_bit4_t lb = BIT4_0;
4623       for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4624 
4625 	    vvp_bit4_t rb = val.value(idx);
4626 	    if (rb == BIT4_0) {
4627 		  lb = BIT4_1;
4628 		  break;
4629 	    }
4630 
4631 	    if (rb != BIT4_1)
4632 		  lb = BIT4_X;
4633       }
4634 
4635       vvp_vector4_t res (1, lb);
4636       thr->push_vec4(res);
4637 
4638       return true;
4639 }
4640 
4641 /*
4642  * %or/r
4643  */
of_ORR(vthread_t thr,vvp_code_t)4644 bool of_ORR(vthread_t thr, vvp_code_t)
4645 {
4646       vvp_vector4_t val = thr->pop_vec4();
4647 
4648       vvp_bit4_t lb = BIT4_0;
4649       for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4650 	    vvp_bit4_t rb = val.value(idx);
4651 	    if (rb == BIT4_1) {
4652 		  lb = BIT4_1;
4653 		  break;
4654 	    }
4655 
4656 	    if (rb != BIT4_0)
4657 		  lb = BIT4_X;
4658       }
4659 
4660       vvp_vector4_t res (1, lb);
4661       thr->push_vec4(res);
4662       return true;
4663 }
4664 
4665 /*
4666  * %xor/r
4667  */
of_XORR(vthread_t thr,vvp_code_t)4668 bool of_XORR(vthread_t thr, vvp_code_t)
4669 {
4670       vvp_vector4_t val = thr->pop_vec4();
4671 
4672       vvp_bit4_t lb = BIT4_0;
4673       for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4674 
4675 	    vvp_bit4_t rb = val.value(idx);
4676 	    if (rb == BIT4_1)
4677 		  lb = ~lb;
4678 	    else if (rb != BIT4_0) {
4679 		  lb = BIT4_X;
4680 		  break;
4681 	    }
4682       }
4683 
4684       vvp_vector4_t res (1, lb);
4685       thr->push_vec4(res);
4686       return true;
4687 }
4688 
4689 /*
4690  * %xnor/r
4691  */
of_XNORR(vthread_t thr,vvp_code_t)4692 bool of_XNORR(vthread_t thr, vvp_code_t)
4693 {
4694       vvp_vector4_t val = thr->pop_vec4();
4695 
4696       vvp_bit4_t lb = BIT4_1;
4697       for (unsigned idx = 0 ; idx < val.size() ; idx += 1) {
4698 
4699 	    vvp_bit4_t rb = val.value(idx);
4700 	    if (rb == BIT4_1)
4701 		  lb = ~lb;
4702 	    else if (rb != BIT4_0) {
4703 		  lb = BIT4_X;
4704 		  break;
4705 	    }
4706       }
4707 
4708       vvp_vector4_t res (1, lb);
4709       thr->push_vec4(res);
4710       return true;
4711 }
4712 
4713 /*
4714  * %or
4715  */
of_OR(vthread_t thr,vvp_code_t)4716 bool of_OR(vthread_t thr, vvp_code_t)
4717 {
4718       vvp_vector4_t valb = thr->pop_vec4();
4719       vvp_vector4_t&vala = thr->peek_vec4();
4720       vala |= valb;
4721       return true;
4722 }
4723 
4724 /*
4725  * %nor
4726  */
of_NOR(vthread_t thr,vvp_code_t)4727 bool of_NOR(vthread_t thr, vvp_code_t)
4728 {
4729       vvp_vector4_t valr = thr->pop_vec4();
4730       vvp_vector4_t&vall = thr->peek_vec4();
4731       assert(vall.size() == valr.size());
4732       unsigned wid = vall.size();
4733 
4734       for (unsigned idx = 0 ; idx < wid ; idx += 1) {
4735 	    vvp_bit4_t lb = vall.value(idx);
4736 	    vvp_bit4_t rb = valr.value(idx);
4737 	    vall.set_bit(idx, ~(lb|rb));
4738       }
4739 
4740       return true;
4741 }
4742 
4743 /*
4744  * %pop/obj <num>, <skip>
4745  */
of_POP_OBJ(vthread_t thr,vvp_code_t cp)4746 bool of_POP_OBJ(vthread_t thr, vvp_code_t cp)
4747 {
4748       unsigned cnt = cp->bit_idx[0];
4749       unsigned skip = cp->bit_idx[1];
4750 
4751       thr->pop_object(cnt, skip);
4752       return true;
4753 }
4754 
4755 /*
4756  * %pop/real <number>
4757  */
of_POP_REAL(vthread_t thr,vvp_code_t cp)4758 bool of_POP_REAL(vthread_t thr, vvp_code_t cp)
4759 {
4760       unsigned cnt = cp->number;
4761       thr->pop_real(cnt);
4762       return true;
4763 }
4764 
4765 /*
4766  *  %pop/str <number>
4767  */
of_POP_STR(vthread_t thr,vvp_code_t cp)4768 bool of_POP_STR(vthread_t thr, vvp_code_t cp)
4769 {
4770       unsigned cnt = cp->number;
4771       thr->pop_str(cnt);
4772       return true;
4773 }
4774 
4775 /*
4776  *  %pop/vec4 <number>
4777  */
of_POP_VEC4(vthread_t thr,vvp_code_t cp)4778 bool of_POP_VEC4(vthread_t thr, vvp_code_t cp)
4779 {
4780       unsigned cnt = cp->number;
4781       thr->pop_vec4(cnt);
4782       return true;
4783 }
4784 
4785 /*
4786  * %pow
4787  * %pow/s
4788  */
of_POW_base(vthread_t thr,bool signed_flag)4789 static bool of_POW_base(vthread_t thr, bool signed_flag)
4790 {
4791       vvp_vector4_t valb = thr->pop_vec4();
4792       vvp_vector4_t vala = thr->pop_vec4();
4793 
4794       unsigned wid = vala.size();
4795 
4796       vvp_vector2_t xv2 = vvp_vector2_t(vala, true);
4797       vvp_vector2_t yv2 = vvp_vector2_t(valb, true);
4798 
4799 
4800         /* If we have an X or Z in the arguments return X. */
4801       if (xv2.is_NaN() || yv2.is_NaN()) {
4802 	    vvp_vector4_t tmp (wid, BIT4_X);
4803 	    thr->push_vec4(tmp);
4804 	    return true;
4805       }
4806 
4807 	// Is the exponent negative? If so, table 5-6 in IEEE1364-2005
4808 	// defines what value is returned.
4809       if (signed_flag && yv2.value(yv2.size()-1)) {
4810 	    int a_val;
4811 	    vvp_bit4_t pad = BIT4_0, lsb = BIT4_0;
4812 	    if (vector2_to_value(xv2, a_val, true)) {
4813 		  if (a_val == 0) {
4814 			pad = BIT4_X; lsb = BIT4_X;
4815 		  }
4816 		  if (a_val == 1) {
4817 			pad = BIT4_0; lsb = BIT4_1;
4818 		  }
4819 		  if (a_val == -1) {
4820 			if (yv2.value(0)) {
4821 			      pad = BIT4_1; lsb = BIT4_1;
4822 			} else {
4823 			      pad = BIT4_0; lsb = BIT4_1;
4824 			}
4825 		  }
4826 	    }
4827 	    vvp_vector4_t tmp (wid, pad);
4828 	    tmp.set_bit(0, lsb);
4829 	    thr->push_vec4(tmp);
4830 	    return true;
4831       }
4832 
4833       vvp_vector2_t result = pow(xv2, yv2);
4834 
4835         /* Copy only what we need of the result. If the result is too
4836 	   small, zero-pad it. */
4837       for (unsigned jdx = 0;  jdx < wid;  jdx += 1) {
4838 	    if (jdx >= result.size())
4839 		  vala.set_bit(jdx, BIT4_0);
4840 	    else
4841 		  vala.set_bit(jdx, result.value(jdx) ? BIT4_1 : BIT4_0);
4842       }
4843       thr->push_vec4(vala);
4844 
4845       return true;
4846 }
4847 
of_POW(vthread_t thr,vvp_code_t)4848 bool of_POW(vthread_t thr, vvp_code_t)
4849 {
4850       return of_POW_base(thr, false);
4851 }
4852 
of_POW_S(vthread_t thr,vvp_code_t)4853 bool of_POW_S(vthread_t thr, vvp_code_t)
4854 {
4855       return of_POW_base(thr, true);
4856 }
4857 
of_POW_WR(vthread_t thr,vvp_code_t)4858 bool of_POW_WR(vthread_t thr, vvp_code_t)
4859 {
4860       double r = thr->pop_real();
4861       double l = thr->pop_real();
4862       thr->push_real(pow(l,r));
4863 
4864       return true;
4865 }
4866 
4867 /*
4868  * %prop/obj <pid>, <idx>
4869  *
4870  * Load an object value from the cobject and push it onto the object stack.
4871  */
of_PROP_OBJ(vthread_t thr,vvp_code_t cp)4872 bool of_PROP_OBJ(vthread_t thr, vvp_code_t cp)
4873 {
4874       unsigned pid = cp->number;
4875       unsigned idx = cp->bit_idx[0];
4876 
4877       if (idx != 0) {
4878 	    assert(idx < vthread_s::WORDS_COUNT);
4879 	    idx = thr->words[idx].w_uint;
4880       }
4881 
4882       vvp_object_t&obj = thr->peek_object();
4883       vvp_cobject*cobj = obj.peek<vvp_cobject>();
4884 
4885       vvp_object_t val;
4886       cobj->get_object(pid, val, idx);
4887 
4888       thr->push_object(val);
4889 
4890       return true;
4891 }
4892 
get_from_obj(unsigned pid,vvp_cobject * cobj,double & val)4893 static void get_from_obj(unsigned pid, vvp_cobject*cobj, double&val)
4894 {
4895       val = cobj->get_real(pid);
4896 }
4897 
get_from_obj(unsigned pid,vvp_cobject * cobj,string & val)4898 static void get_from_obj(unsigned pid, vvp_cobject*cobj, string&val)
4899 {
4900       val = cobj->get_string(pid);
4901 }
4902 
get_from_obj(unsigned pid,vvp_cobject * cobj,vvp_vector4_t & val)4903 static void get_from_obj(unsigned pid, vvp_cobject*cobj, vvp_vector4_t&val)
4904 {
4905       cobj->get_vec4(pid, val);
4906 }
4907 
4908 template <typename ELEM>
prop(vthread_t thr,vvp_code_t cp)4909 static bool prop(vthread_t thr, vvp_code_t cp)
4910 {
4911       unsigned pid = cp->number;
4912 
4913       vvp_object_t&obj = thr->peek_object();
4914       vvp_cobject*cobj = obj.peek<vvp_cobject>();
4915       assert(cobj);
4916 
4917       ELEM val;
4918       get_from_obj(pid, cobj, val);
4919       vthread_push(thr, val);
4920 
4921       return true;
4922 }
4923 
4924 /*
4925  * %prop/r <pid>
4926  *
4927  * Load a real value from the cobject and push it onto the real value
4928  * stack.
4929  */
of_PROP_R(vthread_t thr,vvp_code_t cp)4930 bool of_PROP_R(vthread_t thr, vvp_code_t cp)
4931 {
4932       return prop<double>(thr, cp);
4933 }
4934 
4935 /*
4936  * %prop/str <pid>
4937  *
4938  * Load a string value from the cobject and push it onto the real value
4939  * stack.
4940  */
of_PROP_STR(vthread_t thr,vvp_code_t cp)4941 bool of_PROP_STR(vthread_t thr, vvp_code_t cp)
4942 {
4943       return prop<string>(thr, cp);
4944 }
4945 
4946 /*
4947  * %prop/v <pid>
4948  *
4949  * Load a property <pid> from the cobject on the top of the stack into
4950  * the vector space at <base>.
4951  */
of_PROP_V(vthread_t thr,vvp_code_t cp)4952 bool of_PROP_V(vthread_t thr, vvp_code_t cp)
4953 {
4954       return prop<vvp_vector4_t>(thr, cp);
4955 }
4956 
of_PUSHI_REAL(vthread_t thr,vvp_code_t cp)4957 bool of_PUSHI_REAL(vthread_t thr, vvp_code_t cp)
4958 {
4959       double mant = cp->bit_idx[0];
4960       uint32_t imant = cp->bit_idx[0];
4961       int exp = cp->bit_idx[1];
4962 
4963 	// Detect +infinity
4964       if (exp==0x3fff && imant==0) {
4965 	    thr->push_real(INFINITY);
4966 	    return true;
4967       }
4968 	// Detect -infinity
4969       if (exp==0x7fff && imant==0) {
4970 	    thr->push_real(-INFINITY);
4971 	    return true;
4972       }
4973 	// Detect NaN
4974       if (exp==0x3fff) {
4975 	    thr->push_real(nan(""));
4976 	    return true;
4977       }
4978 
4979       double sign = (exp & 0x4000)? -1.0 : 1.0;
4980 
4981       exp &= 0x1fff;
4982 
4983       mant = sign * ldexp(mant, exp - 0x1000);
4984       thr->push_real(mant);
4985       return true;
4986 }
4987 
of_PUSHI_STR(vthread_t thr,vvp_code_t cp)4988 bool of_PUSHI_STR(vthread_t thr, vvp_code_t cp)
4989 {
4990       const char*text = cp->text;
4991       thr->push_str(string(text));
4992       return true;
4993 }
4994 
4995 /*
4996  * %pushi/vec4 <vala>, <valb>, <wid>
4997  */
of_PUSHI_VEC4(vthread_t thr,vvp_code_t cp)4998 bool of_PUSHI_VEC4(vthread_t thr, vvp_code_t cp)
4999 {
5000       uint32_t vala = cp->bit_idx[0];
5001       uint32_t valb = cp->bit_idx[1];
5002       unsigned wid  = cp->number;
5003 
5004 	// I expect that most of the bits of an immediate value are
5005 	// going to be zero, so start the result vector with all zero
5006 	// bits. Then we only need to replace the bits that are different.
5007       vvp_vector4_t val (wid, BIT4_0);
5008 
5009 	// Special case: Immediate zero is super easy.
5010       if (vala==0 && valb==0) {
5011 	    thr->push_vec4(val);
5012 	    return true;
5013       }
5014 
5015 	// Special case: If the value is defined (no X or Z) and fits
5016 	// in an unsigned long, then use the setarray method to write
5017 	// the value all in one shot.
5018       if ((valb==0) && (wid <= 8*sizeof(unsigned long))) {
5019 	    unsigned long tmp = vala;
5020 	    val.setarray(0, wid, &tmp);
5021 	    thr->push_vec4(val);
5022 	    return true;
5023       }
5024 
5025 	// The %pushi/vec4 can create values bigger then 32 bits, but
5026 	// only if the high bits are zero. So at most we need to run
5027 	// through the loop below 32 times. Maybe less, if the target
5028 	// width is less. We don't have to do anything special on that
5029 	// because vala/valb bits will shift away so (vala|valb) will
5030 	// turn to zero at or before 32 shifts.
5031 
5032       for (unsigned idx = 0 ; idx < wid && (vala|valb) ; idx += 1) {
5033 	    uint32_t ba = 0;
5034 	      // Convert the vala/valb bits to a ba number that can be
5035 	      // used to select what goes into the value.
5036 	    ba = (valb & 1) << 1;
5037 	    ba |= vala & 1;
5038 
5039 	    switch (ba) {
5040 		case 1:
5041 		  val.set_bit(idx, BIT4_1);
5042 		  break;
5043 		case 2:
5044 		  val.set_bit(idx, BIT4_Z);
5045 		  break;
5046 		case 3:
5047 		  val.set_bit(idx, BIT4_X);
5048 		  break;
5049 		default:
5050 		  break;
5051 	    }
5052 
5053 	    vala >>= 1;
5054 	    valb >>= 1;
5055       }
5056 
5057       thr->push_vec4(val);
5058 
5059       return true;
5060 }
5061 
5062 /*
5063  * %pushv/str
5064  *   Pops a vec4 value, and pushes a string.
5065  */
of_PUSHV_STR(vthread_t thr,vvp_code_t)5066 bool of_PUSHV_STR(vthread_t thr, vvp_code_t)
5067 {
5068       vvp_vector4_t vec = thr->pop_vec4();
5069 
5070       size_t slen = (vec.size() + 7)/8;
5071       vector<char>buf;
5072       buf.reserve(slen);
5073 
5074       for (size_t idx = 0 ; idx < vec.size() ; idx += 8) {
5075 	    char tmp = 0;
5076 	    size_t trans = 8;
5077 	    if (idx+trans > vec.size())
5078 		  trans = vec.size() - idx;
5079 
5080 	    for (size_t bdx = 0 ; bdx < trans ; bdx += 1) {
5081 		  if (vec.value(idx+bdx) == BIT4_1)
5082 			tmp |= 1 << bdx;
5083 	    }
5084 
5085 	    if (tmp != 0)
5086 		  buf.push_back(tmp);
5087       }
5088 
5089       string val;
5090       for (vector<char>::reverse_iterator cur = buf.rbegin()
5091 		 ; cur != buf.rend() ; ++cur) {
5092 	    val.push_back(*cur);
5093       }
5094 
5095       thr->push_str(val);
5096 
5097       return true;
5098 }
5099 
5100 /*
5101  * %putc/str/vec4 <var>, <mux>
5102  */
of_PUTC_STR_VEC4(vthread_t thr,vvp_code_t cp)5103 bool of_PUTC_STR_VEC4(vthread_t thr, vvp_code_t cp)
5104 {
5105       unsigned muxr = cp->bit_idx[0];
5106       int32_t mux = muxr? thr->words[muxr].w_int : 0;
5107 
5108       vvp_vector4_t val = thr->pop_vec4();
5109       assert(val.size() == 8);
5110 
5111       if (mux < 0)
5112 	    return true;
5113 
5114 	/* Get the existing value of the string. If we find that the
5115 	   index is too big for the string, then give up. */
5116       vvp_net_t*net = cp->net;
5117       vvp_fun_signal_string*fun = dynamic_cast<vvp_fun_signal_string*> (net->fun);
5118       assert(fun);
5119 
5120       string tmp = fun->get_string();
5121       if (tmp.size() <= (size_t)mux)
5122 	    return true;
5123 
5124       char val_str = 0;
5125       for (size_t idx = 0 ; idx < 8 ; idx += 1) {
5126 	    if (val.value(idx)==BIT4_1)
5127 		  val_str |= 1<<idx;
5128       }
5129 
5130 	// It is a quirk of the Verilog standard that putc(..., 'h00)
5131 	// has no effect. Test for that case here.
5132       if (val_str == 0)
5133 	    return true;
5134 
5135       tmp[mux] = val_str;
5136 
5137       vvp_send_string(vvp_net_ptr_t(cp->net, 0), tmp, thr->wt_context);
5138       return true;
5139 }
5140 
5141 template <typename ELEM, class QTYPE>
qinsert(vthread_t thr,vvp_code_t cp,unsigned wid=0)5142 static bool qinsert(vthread_t thr, vvp_code_t cp, unsigned wid=0)
5143 {
5144       int64_t idx = thr->words[3].w_int;
5145       ELEM value;
5146       vvp_net_t*net = cp->net;
5147       unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
5148       pop_value(thr, value, wid); // Pop the value to store.
5149 
5150       vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
5151       assert(queue);
5152       if (idx < 0) {
5153 	    cerr << thr->get_fileline()
5154 	         << "Warning: cannot insert at a negative "
5155 	         << get_queue_type(value)
5156 	         << " index (" << idx << "). ";
5157 	    print_queue_value(value);
5158 	    cerr << " was not added." << endl;
5159       } else if (thr->flags[4] != BIT4_0) {
5160 	    cerr << thr->get_fileline()
5161 	         << "Warning: cannot insert at an undefined "
5162 	         << get_queue_type(value) << " index. ";
5163 	    print_queue_value(value);
5164 	    cerr << " was not added." << endl;
5165       } else
5166 	    queue->insert(idx, value, max_size);
5167       return true;
5168 }
5169 
5170 /*
5171  * %qinsert/real <var-label>
5172  */
of_QINSERT_REAL(vthread_t thr,vvp_code_t cp)5173 bool of_QINSERT_REAL(vthread_t thr, vvp_code_t cp)
5174 {
5175       return qinsert<double, vvp_queue_real>(thr, cp);
5176 }
5177 
5178 /*
5179  * %qinsert/str <var-label>
5180  */
of_QINSERT_STR(vthread_t thr,vvp_code_t cp)5181 bool of_QINSERT_STR(vthread_t thr, vvp_code_t cp)
5182 {
5183       return qinsert<string, vvp_queue_string>(thr, cp);
5184 }
5185 
5186 /*
5187  * %qinsert/v <var-label>
5188  */
of_QINSERT_V(vthread_t thr,vvp_code_t cp)5189 bool of_QINSERT_V(vthread_t thr, vvp_code_t cp)
5190 {
5191       return qinsert<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
5192 }
5193 
5194 /*
5195  * Helper functions used in the queue pop templates
5196  */
push_value(vthread_t thr,double value,unsigned)5197 inline void push_value(vthread_t thr, double value, unsigned)
5198 {
5199       thr->push_real(value);
5200 }
5201 
push_value(vthread_t thr,string value,unsigned)5202 inline void push_value(vthread_t thr, string value, unsigned)
5203 {
5204       thr->push_str(value);
5205 }
5206 
push_value(vthread_t thr,vvp_vector4_t value,unsigned wid)5207 inline void push_value(vthread_t thr, vvp_vector4_t value, unsigned wid)
5208 {
5209       assert(wid == value.size());
5210       thr->push_vec4(value);
5211 }
5212 
5213 template <typename ELEM, class QTYPE>
q_pop(vthread_t thr,vvp_code_t cp,void (* get_val_func)(vvp_queue *,ELEM &),const char * loc,unsigned wid)5214 static bool q_pop(vthread_t thr, vvp_code_t cp,
5215                   void (*get_val_func)(vvp_queue*, ELEM&),
5216                   const char*loc, unsigned wid)
5217 {
5218       vvp_net_t*net = cp->net;
5219 
5220       vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
5221       assert(queue);
5222 
5223       size_t size = queue->get_size();
5224 
5225       ELEM value;
5226       if (size) {
5227 	    get_val_func(queue, value);
5228       } else {
5229 	    dq_default(value, wid);
5230 	    cerr << thr->get_fileline()
5231 	         << "Warning: pop_" << loc << "() on empty "
5232 	         << get_queue_type(value) << "." << endl;
5233       }
5234 
5235       push_value(thr, value, wid);
5236       return true;
5237 }
5238 
5239 template <typename ELEM>
get_back_value(vvp_queue * queue,ELEM & value)5240 static void get_back_value(vvp_queue*queue, ELEM&value)
5241 {
5242       queue->get_word(queue->get_size()-1, value);
5243       queue->pop_back();
5244 }
5245 
5246 template <typename ELEM, class QTYPE>
qpop_b(vthread_t thr,vvp_code_t cp,unsigned wid=0)5247 static bool qpop_b(vthread_t thr, vvp_code_t cp, unsigned wid=0)
5248 {
5249       return q_pop<ELEM, QTYPE>(thr, cp, get_back_value<ELEM>, "back", wid);
5250 }
5251 
5252 /*
5253  * %qpop/b/real <var-label>
5254  */
of_QPOP_B_REAL(vthread_t thr,vvp_code_t cp)5255 bool of_QPOP_B_REAL(vthread_t thr, vvp_code_t cp)
5256 {
5257       return qpop_b<double, vvp_queue_real>(thr, cp);
5258 }
5259 
5260 /*
5261  * %qpop/b/str <var-label>
5262  */
of_QPOP_B_STR(vthread_t thr,vvp_code_t cp)5263 bool of_QPOP_B_STR(vthread_t thr, vvp_code_t cp)
5264 {
5265       return qpop_b<string, vvp_queue_string>(thr, cp);
5266 }
5267 
5268 /*
5269  * %qpop/b/v <var-label>
5270  */
of_QPOP_B_V(vthread_t thr,vvp_code_t cp)5271 bool of_QPOP_B_V(vthread_t thr, vvp_code_t cp)
5272 {
5273       return qpop_b<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[0]);
5274 }
5275 
5276 template <typename ELEM>
get_front_value(vvp_queue * queue,ELEM & value)5277 static void get_front_value(vvp_queue*queue, ELEM&value)
5278 {
5279       queue->get_word(0, value);
5280       queue->pop_front();
5281 }
5282 
5283 template <typename ELEM, class QTYPE>
qpop_f(vthread_t thr,vvp_code_t cp,unsigned wid=0)5284 static bool qpop_f(vthread_t thr, vvp_code_t cp, unsigned wid=0)
5285 {
5286       return q_pop<ELEM, QTYPE>(thr, cp, get_front_value<ELEM>, "front", wid);
5287 }
5288 
5289 
5290 /*
5291  * %qpop/f/real <var-label>
5292  */
of_QPOP_F_REAL(vthread_t thr,vvp_code_t cp)5293 bool of_QPOP_F_REAL(vthread_t thr, vvp_code_t cp)
5294 {
5295       return qpop_f<double, vvp_queue_real>(thr, cp);
5296 }
5297 
5298 /*
5299  * %qpop/f/str <var-label>
5300  */
of_QPOP_F_STR(vthread_t thr,vvp_code_t cp)5301 bool of_QPOP_F_STR(vthread_t thr, vvp_code_t cp)
5302 {
5303       return qpop_f<string, vvp_queue_string>(thr, cp);
5304 }
5305 
5306 /*
5307  * %qpop/f/v <var-label>
5308  */
of_QPOP_F_V(vthread_t thr,vvp_code_t cp)5309 bool of_QPOP_F_V(vthread_t thr, vvp_code_t cp)
5310 {
5311       return qpop_f<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[0]);
5312 }
5313 
5314 /*
5315  * These implement the %release/net and %release/reg instructions. The
5316  * %release/net instruction applies to a net kind of functor by
5317  * sending the release/net command to the command port. (See vvp_net.h
5318  * for details.) The %release/reg instruction is the same, but sends
5319  * the release/reg command instead. These are very similar to the
5320  * %deassign instruction.
5321  */
do_release_vec(vvp_code_t cp,bool net_flag)5322 static bool do_release_vec(vvp_code_t cp, bool net_flag)
5323 {
5324       vvp_net_t*net = cp->net;
5325       unsigned base  = cp->bit_idx[0];
5326       unsigned width = cp->bit_idx[1];
5327 
5328       assert(net->fil);
5329 
5330       if (base >= net->fil->filter_size()) return true;
5331       if (base+width > net->fil->filter_size())
5332 	    width = net->fil->filter_size() - base;
5333 
5334       bool full_sig = base == 0 && width == net->fil->filter_size();
5335 
5336 	// XXXX Can't really do this if this is a partial release?
5337       net->fil->force_unlink();
5338 
5339 	/* Do we release all or part of the net? */
5340       vvp_net_ptr_t ptr (net, 0);
5341       if (full_sig) {
5342 	    net->fil->release(ptr, net_flag);
5343       } else {
5344 	    net->fil->release_pv(ptr, base, width, net_flag);
5345       }
5346       net->fun->force_flag(false);
5347 
5348       return true;
5349 }
5350 
of_RELEASE_NET(vthread_t,vvp_code_t cp)5351 bool of_RELEASE_NET(vthread_t, vvp_code_t cp)
5352 {
5353       return do_release_vec(cp, true);
5354 }
5355 
5356 
of_RELEASE_REG(vthread_t,vvp_code_t cp)5357 bool of_RELEASE_REG(vthread_t, vvp_code_t cp)
5358 {
5359       return do_release_vec(cp, false);
5360 }
5361 
5362 /* The type is 1 for registers and 0 for everything else. */
of_RELEASE_WR(vthread_t,vvp_code_t cp)5363 bool of_RELEASE_WR(vthread_t, vvp_code_t cp)
5364 {
5365       vvp_net_t*net = cp->net;
5366       unsigned type  = cp->bit_idx[0];
5367 
5368       assert(net->fil);
5369       net->fil->force_unlink();
5370 
5371 	// Send a command to this signal to unforce itself.
5372       vvp_net_ptr_t ptr (net, 0);
5373       net->fil->release(ptr, type==0);
5374       return true;
5375 }
5376 
of_REPLICATE(vthread_t thr,vvp_code_t cp)5377 bool of_REPLICATE(vthread_t thr, vvp_code_t cp)
5378 {
5379       int rept = cp->number;
5380       vvp_vector4_t val = thr->pop_vec4();
5381       vvp_vector4_t res (val.size() * rept, BIT4_X);
5382 
5383       for (int idx = 0 ; idx < rept ; idx += 1) {
5384 	    res.set_vec(idx * val.size(), val);
5385       }
5386 
5387       thr->push_vec4(res);
5388 
5389       return true;
5390 }
5391 
poke_val(vthread_t fun_thr,unsigned depth,double val)5392 static void poke_val(vthread_t fun_thr, unsigned depth, double val)
5393 {
5394       fun_thr->parent->poke_real(depth, val);
5395 }
5396 
poke_val(vthread_t fun_thr,unsigned depth,string val)5397 static void poke_val(vthread_t fun_thr, unsigned depth, string val)
5398 {
5399       fun_thr->parent->poke_str(depth, val);
5400 }
5401 
get_max(vthread_t fun_thr,double &)5402 static size_t get_max(vthread_t fun_thr, double&)
5403 {
5404       return fun_thr->args_real.size();
5405 }
5406 
get_max(vthread_t fun_thr,string &)5407 static size_t get_max(vthread_t fun_thr, string&)
5408 {
5409       return fun_thr->args_str.size();
5410 }
5411 
get_max(vthread_t fun_thr,vvp_vector4_t &)5412 static size_t get_max(vthread_t fun_thr, vvp_vector4_t&)
5413 {
5414       return fun_thr->args_vec4.size();
5415 }
5416 
get_depth(vthread_t fun_thr,size_t index,double &)5417 static unsigned get_depth(vthread_t fun_thr, size_t index, double&)
5418 {
5419       return fun_thr->args_real[index];
5420 }
5421 
get_depth(vthread_t fun_thr,size_t index,string &)5422 static unsigned get_depth(vthread_t fun_thr, size_t index, string&)
5423 {
5424       return fun_thr->args_str[index];
5425 }
5426 
get_depth(vthread_t fun_thr,size_t index,vvp_vector4_t &)5427 static unsigned get_depth(vthread_t fun_thr, size_t index, vvp_vector4_t&)
5428 {
5429       return fun_thr->args_vec4[index];
5430 }
5431 
get_func(vthread_t thr)5432 static vthread_t get_func(vthread_t thr)
5433 {
5434       vthread_t fun_thr = thr;
5435 
5436       while (fun_thr->parent_scope->get_type_code() != vpiFunction) {
5437 	    assert(fun_thr->parent);
5438 	    fun_thr = fun_thr->parent;
5439       }
5440 
5441       return fun_thr;
5442 }
5443 
5444 template <typename ELEM>
ret(vthread_t thr,vvp_code_t cp)5445 static bool ret(vthread_t thr, vvp_code_t cp)
5446 {
5447       size_t index = cp->number;
5448       ELEM val;
5449       pop_value(thr, val, 0);
5450 
5451       vthread_t fun_thr = get_func(thr);
5452       assert(index < get_max(fun_thr, val));
5453 
5454       unsigned depth = get_depth(fun_thr, index, val);
5455 	// Use the depth to put the value into the stack of
5456 	// the parent thread.
5457       poke_val(fun_thr, depth, val);
5458       return true;
5459 }
5460 
5461 /*
5462  * %ret/real <index>
5463  */
of_RET_REAL(vthread_t thr,vvp_code_t cp)5464 bool of_RET_REAL(vthread_t thr, vvp_code_t cp)
5465 {
5466       return ret<double>(thr, cp);
5467 }
5468 
5469 /*
5470  * %ret/str <index>
5471  */
of_RET_STR(vthread_t thr,vvp_code_t cp)5472 bool of_RET_STR(vthread_t thr, vvp_code_t cp)
5473 {
5474       return ret<string>(thr, cp);
5475 }
5476 
5477 /*
5478  * %ret/vec4 <index>, <offset>, <wid>
5479  */
of_RET_VEC4(vthread_t thr,vvp_code_t cp)5480 bool of_RET_VEC4(vthread_t thr, vvp_code_t cp)
5481 {
5482       size_t index = cp->number;
5483       unsigned off_index = cp->bit_idx[0];
5484       int wid = cp->bit_idx[1];
5485       vvp_vector4_t&val = thr->peek_vec4();
5486 
5487       vthread_t fun_thr = get_func(thr);
5488       assert(index < get_max(fun_thr, val));
5489       unsigned depth = get_depth(fun_thr, index, val);
5490 
5491       int off = off_index? thr->words[off_index].w_int : 0;
5492       const int sig_value_size = fun_thr->parent->peek_vec4(depth).size();
5493 
5494       unsigned val_size = val.size();
5495 
5496       if (off_index!=0 && thr->flags[4] == BIT4_1) {
5497 	    thr->pop_vec4(1);
5498 	    return true;
5499       }
5500 
5501       if (off <= -wid) {
5502 	    thr->pop_vec4(1);
5503 	    return true;
5504       }
5505 
5506       if (off >= sig_value_size) {
5507 	    thr->pop_vec4(1);
5508 	    return true;
5509       }
5510 
5511 	// If the index is below the vector, then only assign the high
5512 	// bits that overlap with the target
5513       if (off < 0) {
5514 	    int use_off = -off;
5515 	    wid -= use_off;
5516 	    val = val.subvalue(use_off, wid);
5517 	    val_size = wid;
5518 	    off = 0;
5519       }
5520 
5521 	// If the value is partly above the target, then only assign
5522 	// the bits that overlap
5523       if ((off+wid) > sig_value_size) {
5524 	    wid = sig_value_size - off;
5525 	    val = val.subvalue(0, wid);
5526 	    val.resize(wid);
5527 	    val_size = wid;
5528       }
5529 
5530       if (off==0 && val_size==(unsigned)sig_value_size) {
5531 	    fun_thr->parent->poke_vec4(depth, val);
5532 
5533       } else {
5534 	    vvp_vector4_t tmp_dst = fun_thr->parent->peek_vec4(depth);
5535 	    assert(wid>=0 && val.size() == (unsigned)wid);
5536 	    tmp_dst.set_vec(off, val);
5537 	    fun_thr->parent->poke_vec4(depth, tmp_dst);
5538       }
5539 
5540       thr->pop_vec4(1);
5541       return true;
5542 }
5543 
push_from_parent(vthread_t thr,vthread_t fun_thr,unsigned depth,double &)5544 static void push_from_parent(vthread_t thr, vthread_t fun_thr, unsigned depth, double&)
5545 {
5546       thr->push_real(fun_thr->parent->peek_real(depth));
5547 }
5548 
push_from_parent(vthread_t thr,vthread_t fun_thr,unsigned depth,string &)5549 static void push_from_parent(vthread_t thr, vthread_t fun_thr, unsigned depth, string&)
5550 {
5551       thr->push_str(fun_thr->parent->peek_str(depth));
5552 }
5553 
push_from_parent(vthread_t thr,vthread_t fun_thr,unsigned depth,vvp_vector4_t &)5554 static void push_from_parent(vthread_t thr, vthread_t fun_thr, unsigned depth, vvp_vector4_t&)
5555 {
5556       thr->push_vec4(fun_thr->parent->peek_vec4(depth));
5557 }
5558 
5559 template <typename ELEM>
retload(vthread_t thr,vvp_code_t cp)5560 static bool retload(vthread_t thr, vvp_code_t cp)
5561 {
5562       size_t index = cp->number;
5563       ELEM type;
5564 
5565       vthread_t fun_thr = get_func(thr);
5566       assert(index < get_max(fun_thr, type));
5567 
5568       unsigned depth = get_depth(fun_thr, index, type);
5569 	// Use the depth to extract the values from the stack
5570 	// of the parent thread.
5571       push_from_parent(thr, fun_thr, depth, type);
5572       return true;
5573 }
5574 
5575 /*
5576  * %retload/real <index>
5577  */
of_RETLOAD_REAL(vthread_t thr,vvp_code_t cp)5578 bool of_RETLOAD_REAL(vthread_t thr, vvp_code_t cp)
5579 {
5580       return retload<double>(thr, cp);
5581 }
5582 
5583 /*
5584  * %retload/str <index>
5585  */
of_RETLOAD_STR(vthread_t thr,vvp_code_t cp)5586 bool of_RETLOAD_STR(vthread_t thr, vvp_code_t cp)
5587 {
5588       return retload<string>(thr, cp);
5589 }
5590 
5591 /*
5592  * %retload/vec4 <index>
5593  */
of_RETLOAD_VEC4(vthread_t thr,vvp_code_t cp)5594 bool of_RETLOAD_VEC4(vthread_t thr, vvp_code_t cp)
5595 {
5596       return retload<vvp_vector4_t>(thr, cp);
5597 }
5598 
of_SCOPY(vthread_t thr,vvp_code_t)5599 bool of_SCOPY(vthread_t thr, vvp_code_t)
5600 {
5601       vvp_object_t tmp;
5602       thr->pop_object(tmp);
5603 
5604       vvp_object_t&dest = thr->peek_object();
5605       dest.shallow_copy(tmp);
5606 
5607       return true;
5608 }
5609 
thread_peek(vthread_t thr,double & value)5610 static void thread_peek(vthread_t thr, double&value)
5611 {
5612       value = thr->peek_real(0);
5613 }
5614 
thread_peek(vthread_t thr,string & value)5615 static void thread_peek(vthread_t thr, string&value)
5616 {
5617       value = thr->peek_str(0);
5618 }
5619 
thread_peek(vthread_t thr,vvp_vector4_t & value)5620 static void thread_peek(vthread_t thr, vvp_vector4_t&value)
5621 {
5622       value = thr->peek_vec4(0);
5623 }
5624 
5625 template <typename ELEM>
set_dar_obj(vthread_t thr,vvp_code_t cp)5626 static bool set_dar_obj(vthread_t thr, vvp_code_t cp)
5627 {
5628       unsigned adr = thr->words[cp->number].w_int;
5629 
5630       ELEM value;
5631       thread_peek(thr, value);
5632 
5633       vvp_object_t&top = thr->peek_object();
5634       vvp_darray*darray = top.peek<vvp_darray>();
5635       assert(darray);
5636 
5637       darray->set_word(adr, value);
5638       return true;
5639 }
5640 
5641 /*
5642  * %set/dar/obj/real <index>
5643  */
of_SET_DAR_OBJ_REAL(vthread_t thr,vvp_code_t cp)5644 bool of_SET_DAR_OBJ_REAL(vthread_t thr, vvp_code_t cp)
5645 {
5646       return set_dar_obj<double>(thr, cp);
5647 }
5648 
5649 /*
5650  * %set/dar/obj/str <index>
5651  */
of_SET_DAR_OBJ_STR(vthread_t thr,vvp_code_t cp)5652 bool of_SET_DAR_OBJ_STR(vthread_t thr, vvp_code_t cp)
5653 {
5654       return set_dar_obj<string>(thr, cp);
5655 }
5656 
5657 /*
5658  * %set/dar/obj/vec4 <index>
5659  */
of_SET_DAR_OBJ_VEC4(vthread_t thr,vvp_code_t cp)5660 bool of_SET_DAR_OBJ_VEC4(vthread_t thr, vvp_code_t cp)
5661 {
5662       return set_dar_obj<vvp_vector4_t>(thr, cp);
5663 }
5664 
5665 /*
5666  * %shiftl <idx>
5667  *
5668  * Pop the operand, then push the result.
5669  */
of_SHIFTL(vthread_t thr,vvp_code_t cp)5670 bool of_SHIFTL(vthread_t thr, vvp_code_t cp)
5671 {
5672       int use_index = cp->number;
5673       uint64_t shift = thr->words[use_index].w_uint;
5674 
5675       vvp_vector4_t&val = thr->peek_vec4();
5676       unsigned wid  = val.size();
5677 
5678       if (thr->flags[4] == BIT4_1) {
5679 	      // The result is 'bx if the shift amount is undefined
5680 	    val = vvp_vector4_t(wid, BIT4_X);
5681 
5682       } else if (thr->flags[4] == BIT4_X || shift >= wid) {
5683 	      // Shift is so big that all value is shifted out. Write
5684 	      // a constant 0 result.
5685 	    val = vvp_vector4_t(wid, BIT4_0);
5686 
5687       } else if (shift > 0) {
5688 	    vvp_vector4_t blk = val.subvalue(0, wid-shift);
5689 	    vvp_vector4_t tmp (shift, BIT4_0);
5690 	    val.set_vec(0, tmp);
5691 	    val.set_vec(shift, blk);
5692       }
5693 
5694       return true;
5695 }
5696 
5697 /*
5698  * %shiftr <idx>
5699  * This is an unsigned right shift. The <idx> is a number that selects
5700  * the index register with the amount of the shift. This instruction
5701  * checks flag bit 4, which will be true if the shift is invalid.
5702  */
of_SHIFTR(vthread_t thr,vvp_code_t cp)5703 bool of_SHIFTR(vthread_t thr, vvp_code_t cp)
5704 {
5705       int use_index = cp->number;
5706       uint64_t shift = thr->words[use_index].w_uint;
5707 
5708       vvp_vector4_t val = thr->pop_vec4();
5709       unsigned wid  = val.size();
5710 
5711       if (thr->flags[4] == BIT4_1) {
5712 	    val = vvp_vector4_t(wid, BIT4_X);
5713 
5714       } else if (thr->flags[4] == BIT4_X || shift > wid) {
5715 	    val = vvp_vector4_t(wid, BIT4_0);
5716 
5717       } else if (shift > 0) {
5718 	    vvp_vector4_t blk = val.subvalue(shift, wid-shift);
5719 	    vvp_vector4_t tmp (shift, BIT4_0);
5720 	    val.set_vec(0, blk);
5721 	    val.set_vec(wid-shift, tmp);
5722       }
5723 
5724       thr->push_vec4(val);
5725       return true;
5726 }
5727 
5728 /*
5729  *  %shiftr/s <wid>
5730  */
of_SHIFTR_S(vthread_t thr,vvp_code_t cp)5731 bool of_SHIFTR_S(vthread_t thr, vvp_code_t cp)
5732 {
5733       int use_index = cp->number;
5734       uint64_t shift = thr->words[use_index].w_uint;
5735 
5736       vvp_vector4_t val = thr->pop_vec4();
5737       unsigned wid  = val.size();
5738 
5739       vvp_bit4_t sign_bit = val.value(val.size()-1);
5740 
5741       if (thr->flags[4] == BIT4_1) {
5742 	    val = vvp_vector4_t(wid, BIT4_X);
5743 
5744       } else if (thr->flags[4] == BIT4_X || shift > wid) {
5745 	    val = vvp_vector4_t(wid, sign_bit);
5746 
5747       } else if (shift > 0) {
5748 	    vvp_vector4_t blk = val.subvalue(shift, wid-shift);
5749 	    vvp_vector4_t tmp (shift, sign_bit);
5750 	    val.set_vec(0, blk);
5751 	    val.set_vec(wid-shift, tmp);
5752       }
5753 
5754       thr->push_vec4(val);
5755       return true;
5756 }
5757 
5758 /*
5759  * %split/vec4 <wid>
5760  *   Pop 1 value,
5761  *   Take <wid> bits from the lsb,
5762  *   Push the remaining msb,
5763  *   Push the lsb.
5764  */
of_SPLIT_VEC4(vthread_t thr,vvp_code_t cp)5765 bool of_SPLIT_VEC4(vthread_t thr, vvp_code_t cp)
5766 {
5767       unsigned lsb_wid = cp->number;
5768 
5769       vvp_vector4_t&val = thr->peek_vec4();
5770       assert(lsb_wid < val.size());
5771 
5772       vvp_vector4_t lsb = val.subvalue(0, lsb_wid);
5773       val = val.subvalue(lsb_wid, val.size()-lsb_wid);
5774 
5775       thr->push_vec4(lsb);
5776       return true;
5777 }
5778 
5779 /*
5780  * The following are used to allow the darray templates to print correctly.
5781  */
get_darray_type(double &)5782 inline static string get_darray_type(double&)
5783 {
5784       return "darray<real>";
5785 }
5786 
get_darray_type(string &)5787 inline static string get_darray_type(string&)
5788 {
5789       return "darray<string>";
5790 }
5791 
get_darray_type(vvp_vector4_t value)5792 inline static string get_darray_type(vvp_vector4_t value)
5793 {
5794       ostringstream buf;
5795       buf << "darray<vector[" << value.size() << "]>";
5796       string res = buf.str();
5797       return res;
5798 }
5799 
5800 /*
5801  * The following are used to allow a common template to be written for
5802  * darray real/string/vec4 operations
5803  */
dar_pop_value(vthread_t thr,double & value)5804 inline static void dar_pop_value(vthread_t thr, double&value)
5805 {
5806       value = thr->pop_real();
5807 }
5808 
dar_pop_value(vthread_t thr,string & value)5809 inline static void dar_pop_value(vthread_t thr, string&value)
5810 {
5811       value = thr->pop_str();
5812 }
5813 
dar_pop_value(vthread_t thr,vvp_vector4_t & value)5814 inline static void dar_pop_value(vthread_t thr, vvp_vector4_t&value)
5815 {
5816       value = thr->pop_vec4();
5817 }
5818 
5819 template <typename ELEM>
store_dar(vthread_t thr,vvp_code_t cp)5820 static bool store_dar(vthread_t thr, vvp_code_t cp)
5821 {
5822       int64_t adr = thr->words[3].w_int;
5823       ELEM value;
5824 	// FIXME: Can we get the size of the underlying array element
5825 	//        and then use the normal pop_value?
5826       dar_pop_value(thr, value);
5827 
5828       vvp_net_t*net = cp->net;
5829       assert(net);
5830 
5831       vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
5832       assert(obj);
5833 
5834       vvp_darray*darray = obj->get_object().peek<vvp_darray>();
5835 
5836       if (adr < 0)
5837 	    cerr << thr->get_fileline()
5838 	         << "Warning: cannot write to a negative " << get_darray_type(value)
5839 	         << " index (" << adr << ")." << endl;
5840       else if (thr->flags[4] != BIT4_0)
5841 	    cerr << thr->get_fileline()
5842 	         << "Warning: cannot write to an undefined " << get_darray_type(value)
5843 	         << " index." << endl;
5844       else if (darray)
5845 	    darray->set_word(adr, value);
5846       else
5847 	    cerr << thr->get_fileline()
5848 	         << "Warning: cannot write to an undefined " << get_darray_type(value)
5849 	         << "." << endl;
5850 
5851       return true;
5852 }
5853 
5854 /*
5855  * %store/dar/real <var>
5856  */
of_STORE_DAR_R(vthread_t thr,vvp_code_t cp)5857 bool of_STORE_DAR_R(vthread_t thr, vvp_code_t cp)
5858 {
5859       return store_dar<double>(thr, cp);
5860 }
5861 
5862 /*
5863  * %store/dar/str <var>
5864  */
of_STORE_DAR_STR(vthread_t thr,vvp_code_t cp)5865 bool of_STORE_DAR_STR(vthread_t thr, vvp_code_t cp)
5866 {
5867       return store_dar<string>(thr, cp);
5868 }
5869 
5870 /*
5871  * %store/dar/vec4 <var>
5872  */
of_STORE_DAR_VEC4(vthread_t thr,vvp_code_t cp)5873 bool of_STORE_DAR_VEC4(vthread_t thr, vvp_code_t cp)
5874 {
5875       return store_dar<vvp_vector4_t>(thr, cp);
5876 }
5877 
of_STORE_OBJ(vthread_t thr,vvp_code_t cp)5878 bool of_STORE_OBJ(vthread_t thr, vvp_code_t cp)
5879 {
5880 	/* set the value into port 0 of the destination. */
5881       vvp_net_ptr_t ptr (cp->net, 0);
5882 
5883       vvp_object_t val;
5884       thr->pop_object(val);
5885 
5886       vvp_send_object(ptr, val, thr->wt_context);
5887 
5888       return true;
5889 }
5890 
5891 /*
5892  * %store/obja <array-label> <index>
5893  */
of_STORE_OBJA(vthread_t thr,vvp_code_t cp)5894 bool of_STORE_OBJA(vthread_t thr, vvp_code_t cp)
5895 {
5896       unsigned idx = cp->bit_idx[0];
5897       unsigned adr = thr->words[idx].w_int;
5898 
5899       vvp_object_t val;
5900       thr->pop_object(val);
5901 
5902       cp->array->set_word(adr, val);
5903 
5904       return true;
5905 }
5906 
5907 
5908 /*
5909  * %store/prop/obj <pid>, <idx>
5910  *
5911  * Pop an object value from the object stack, and store the value into
5912  * the property of the object references by the top of the stack. Do NOT
5913  * pop the object stack.
5914  */
of_STORE_PROP_OBJ(vthread_t thr,vvp_code_t cp)5915 bool of_STORE_PROP_OBJ(vthread_t thr, vvp_code_t cp)
5916 {
5917       size_t pid = cp->number;
5918       unsigned idx = cp->bit_idx[0];
5919 
5920       if (idx != 0) {
5921 	    assert(idx < vthread_s::WORDS_COUNT);
5922 	    idx = thr->words[idx].w_uint;
5923       }
5924 
5925       vvp_object_t val;
5926       thr->pop_object(val);
5927 
5928       vvp_object_t&obj = thr->peek_object();
5929       vvp_cobject*cobj = obj.peek<vvp_cobject>();
5930       assert(cobj);
5931 
5932       cobj->set_object(pid, val, idx);
5933 
5934       return true;
5935 }
5936 
pop_prop_val(vthread_t thr,double & val,unsigned)5937 static void pop_prop_val(vthread_t thr, double&val, unsigned)
5938 {
5939       val = thr->pop_real();
5940 }
5941 
pop_prop_val(vthread_t thr,string & val,unsigned)5942 static void pop_prop_val(vthread_t thr, string&val, unsigned)
5943 {
5944       val = thr->pop_str();
5945 }
5946 
pop_prop_val(vthread_t thr,vvp_vector4_t & val,unsigned wid)5947 static void pop_prop_val(vthread_t thr, vvp_vector4_t&val, unsigned wid)
5948 {
5949       val = thr->pop_vec4();
5950       assert(val.size() >= wid);
5951       val.resize(wid);
5952 }
5953 
set_val(vvp_cobject * cobj,size_t pid,double & val)5954 static void set_val(vvp_cobject*cobj, size_t pid, double&val)
5955 {
5956       cobj->set_real(pid, val);
5957 }
5958 
set_val(vvp_cobject * cobj,size_t pid,string & val)5959 static void set_val(vvp_cobject*cobj, size_t pid, string&val)
5960 {
5961       cobj->set_string(pid, val);
5962 }
5963 
set_val(vvp_cobject * cobj,size_t pid,vvp_vector4_t & val)5964 static void set_val(vvp_cobject*cobj, size_t pid, vvp_vector4_t&val)
5965 {
5966       cobj->set_vec4(pid, val);
5967 }
5968 
5969 template <typename ELEM>
store_prop(vthread_t thr,vvp_code_t cp,unsigned wid=0)5970 static bool store_prop(vthread_t thr, vvp_code_t cp, unsigned wid=0)
5971 {
5972       size_t pid = cp->number;
5973       ELEM val;
5974       pop_prop_val(thr, val, wid); // Pop the value to store.
5975 
5976       vvp_object_t&obj = thr->peek_object();
5977       vvp_cobject*cobj = obj.peek<vvp_cobject>();
5978       assert(cobj);
5979 
5980       set_val(cobj, pid, val);
5981 
5982       return true;
5983 }
5984 
5985 /*
5986  * %store/prop/r <id>
5987  *
5988  * Pop a real value from the real stack, and store the value into the
5989  * property of the object references by the top of the stack. Do NOT
5990  * pop the object stack.
5991  */
of_STORE_PROP_R(vthread_t thr,vvp_code_t cp)5992 bool of_STORE_PROP_R(vthread_t thr, vvp_code_t cp)
5993 {
5994       return store_prop<double>(thr, cp);
5995 }
5996 
5997 /*
5998  * %store/prop/str <id>
5999  *
6000  * Pop a string value from the string stack, and store the value into
6001  * the property of the object references by the top of the stack. Do NOT
6002  * pop the object stack.
6003  */
of_STORE_PROP_STR(vthread_t thr,vvp_code_t cp)6004 bool of_STORE_PROP_STR(vthread_t thr, vvp_code_t cp)
6005 {
6006       return store_prop<string>(thr, cp);
6007 }
6008 
6009 /*
6010  * %store/prop/v <pid>, <wid>
6011  *
6012  * Store vector value into property <id> of cobject in the top of the
6013  * stack. Do NOT pop the object stack.
6014  */
of_STORE_PROP_V(vthread_t thr,vvp_code_t cp)6015 bool of_STORE_PROP_V(vthread_t thr, vvp_code_t cp)
6016 {
6017       return store_prop<vvp_vector4_t>(thr, cp, cp->bit_idx[0]);
6018 }
6019 
6020 template <typename ELEM, class QTYPE>
store_qb(vthread_t thr,vvp_code_t cp,unsigned wid=0)6021 static bool store_qb(vthread_t thr, vvp_code_t cp, unsigned wid=0)
6022 {
6023       ELEM value;
6024       vvp_net_t*net = cp->net;
6025       unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
6026       pop_value(thr, value, wid); // Pop the value to store.
6027 
6028       vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
6029       assert(queue);
6030       queue->push_back(value, max_size);
6031       return true;
6032 }
6033 
6034 /*
6035  * %store/qb/r <var-label>, <max-idx>
6036  */
of_STORE_QB_R(vthread_t thr,vvp_code_t cp)6037 bool of_STORE_QB_R(vthread_t thr, vvp_code_t cp)
6038 {
6039       return store_qb<double, vvp_queue_real>(thr, cp);
6040 }
6041 
6042 /*
6043  * %store/qb/str <var-label>, <max-idx>
6044  */
of_STORE_QB_STR(vthread_t thr,vvp_code_t cp)6045 bool of_STORE_QB_STR(vthread_t thr, vvp_code_t cp)
6046 {
6047       return store_qb<string, vvp_queue_string>(thr, cp);
6048 }
6049 
6050 /*
6051  * %store/qb/v <var-label>, <max-idx>, <wid>
6052  */
of_STORE_QB_V(vthread_t thr,vvp_code_t cp)6053 bool of_STORE_QB_V(vthread_t thr, vvp_code_t cp)
6054 {
6055       return store_qb<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
6056 }
6057 
6058 template <typename ELEM, class QTYPE>
store_qdar(vthread_t thr,vvp_code_t cp,unsigned wid=0)6059 static bool store_qdar(vthread_t thr, vvp_code_t cp, unsigned wid=0)
6060 {
6061       int64_t idx = thr->words[3].w_int;
6062       ELEM value;
6063       vvp_net_t*net = cp->net;
6064       unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
6065       pop_value(thr, value, wid); // Pop the value to store.
6066 
6067       vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
6068       assert(queue);
6069       if (idx < 0) {
6070 	    cerr << thr->get_fileline()
6071 	         << "Warning: cannot assign to a negative "
6072 	         << get_queue_type(value)
6073 	         << " index (" << idx << "). ";
6074 	    print_queue_value(value);
6075 	    cerr << " was not added." << endl;
6076       } else if (thr->flags[4] != BIT4_0) {
6077 	    cerr << thr->get_fileline()
6078 	         << "Warning: cannot assign to an undefined "
6079 	         << get_queue_type(value) << " index. ";
6080 	    print_queue_value(value);
6081 	    cerr << " was not added." << endl;
6082       } else
6083 	    queue->set_word_max(idx, value, max_size);
6084       return true;
6085 }
6086 
6087 /*
6088  * %store/qdar/r <var>, idx
6089  */
of_STORE_QDAR_R(vthread_t thr,vvp_code_t cp)6090 bool of_STORE_QDAR_R(vthread_t thr, vvp_code_t cp)
6091 {
6092       return store_qdar<double, vvp_queue_real>(thr, cp);
6093 }
6094 
6095 /*
6096  * %store/qdar/str <var>, idx
6097  */
of_STORE_QDAR_STR(vthread_t thr,vvp_code_t cp)6098 bool of_STORE_QDAR_STR(vthread_t thr, vvp_code_t cp)
6099 {
6100       return store_qdar<string, vvp_queue_string>(thr, cp);
6101 }
6102 
6103 /*
6104  * %store/qdar/v <var>, idx
6105  */
of_STORE_QDAR_V(vthread_t thr,vvp_code_t cp)6106 bool of_STORE_QDAR_V(vthread_t thr, vvp_code_t cp)
6107 {
6108       return store_qdar<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
6109 }
6110 
6111 template <typename ELEM, class QTYPE>
store_qf(vthread_t thr,vvp_code_t cp,unsigned wid=0)6112 static bool store_qf(vthread_t thr, vvp_code_t cp, unsigned wid=0)
6113 {
6114       ELEM value;
6115       vvp_net_t*net = cp->net;
6116       unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
6117       pop_value(thr, value, wid); // Pop the value to store.
6118 
6119       vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
6120       assert(queue);
6121       queue->push_front(value, max_size);
6122       return true;
6123 }
6124 /*
6125  * %store/qf/r <var-label>, <max-idx>
6126  */
of_STORE_QF_R(vthread_t thr,vvp_code_t cp)6127 bool of_STORE_QF_R(vthread_t thr, vvp_code_t cp)
6128 {
6129       return store_qf<double, vvp_queue_real>(thr, cp);
6130 }
6131 
6132 /*
6133  * %store/qf/str <var-label>, <max-idx>
6134  */
of_STORE_QF_STR(vthread_t thr,vvp_code_t cp)6135 bool of_STORE_QF_STR(vthread_t thr, vvp_code_t cp)
6136 {
6137       return store_qf<string, vvp_queue_string>(thr, cp);
6138 }
6139 
6140 /*
6141  * %store/qb/v <var-label>, <max-idx>, <wid>
6142  */
of_STORE_QF_V(vthread_t thr,vvp_code_t cp)6143 bool of_STORE_QF_V(vthread_t thr, vvp_code_t cp)
6144 {
6145       return store_qf<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
6146 }
6147 
6148 template <typename ELEM, class QTYPE>
store_qobj(vthread_t thr,vvp_code_t cp,unsigned wid=0)6149 static bool store_qobj(vthread_t thr, vvp_code_t cp, unsigned wid=0)
6150 {
6151 // FIXME: Can we actually use wid here?
6152       (void)wid;
6153       vvp_net_t*net = cp->net;
6154       unsigned max_size = thr->words[cp->bit_idx[0]].w_int;
6155 
6156       vvp_queue*queue = get_queue_object<QTYPE>(thr, net);
6157       assert(queue);
6158 
6159       vvp_object_t src;
6160       thr->pop_object(src);
6161 
6162       queue->copy_elems(src, max_size);
6163       return true;
6164 }
6165 
of_STORE_QOBJ_R(vthread_t thr,vvp_code_t cp)6166 bool of_STORE_QOBJ_R(vthread_t thr, vvp_code_t cp)
6167 {
6168       return store_qobj<double, vvp_queue_real>(thr, cp);
6169 }
6170 
of_STORE_QOBJ_STR(vthread_t thr,vvp_code_t cp)6171 bool of_STORE_QOBJ_STR(vthread_t thr, vvp_code_t cp)
6172 {
6173       return store_qobj<string, vvp_queue_string>(thr, cp);
6174 }
6175 
of_STORE_QOBJ_V(vthread_t thr,vvp_code_t cp)6176 bool of_STORE_QOBJ_V(vthread_t thr, vvp_code_t cp)
6177 {
6178       return store_qobj<vvp_vector4_t, vvp_queue_vec4>(thr, cp, cp->bit_idx[1]);
6179 }
6180 
vvp_send(vthread_t thr,vvp_net_ptr_t ptr,double & val)6181 static void vvp_send(vthread_t thr, vvp_net_ptr_t ptr, double&val)
6182 {
6183       vvp_send_real(ptr, val, thr->wt_context);
6184 }
6185 
vvp_send(vthread_t thr,vvp_net_ptr_t ptr,string & val)6186 static void vvp_send(vthread_t thr, vvp_net_ptr_t ptr, string&val)
6187 {
6188       vvp_send_string(ptr, val, thr->wt_context);
6189 }
6190 
6191 template <typename ELEM>
store(vthread_t thr,vvp_code_t cp)6192 static bool store(vthread_t thr, vvp_code_t cp)
6193 {
6194       ELEM val;
6195       pop_value(thr, val, 0);
6196 	/* set the value into port 0 of the destination. */
6197       vvp_net_ptr_t ptr (cp->net, 0);
6198       vvp_send(thr, ptr, val);
6199       return true;
6200 }
6201 
of_STORE_REAL(vthread_t thr,vvp_code_t cp)6202 bool of_STORE_REAL(vthread_t thr, vvp_code_t cp)
6203 {
6204       return store<double>(thr, cp);
6205 }
6206 
6207 template <typename ELEM>
storea(vthread_t thr,vvp_code_t cp)6208 static bool storea(vthread_t thr, vvp_code_t cp)
6209 {
6210       unsigned idx = cp->bit_idx[0];
6211       unsigned adr = thr->words[idx].w_int;
6212       ELEM val;
6213       pop_value(thr, val, 0);
6214 
6215       if (thr->flags[4] != BIT4_1)
6216 	    cp->array->set_word(adr, val);
6217 
6218       return true;
6219 }
6220 
6221 /*
6222  * %store/reala <var-label> <index>
6223  */
of_STORE_REALA(vthread_t thr,vvp_code_t cp)6224 bool of_STORE_REALA(vthread_t thr, vvp_code_t cp)
6225 {
6226       return storea<double>(thr, cp);
6227 }
6228 
of_STORE_STR(vthread_t thr,vvp_code_t cp)6229 bool of_STORE_STR(vthread_t thr, vvp_code_t cp)
6230 {
6231       return store<string>(thr, cp);
6232 }
6233 
6234 /*
6235  * %store/stra <array-label> <index>
6236  */
of_STORE_STRA(vthread_t thr,vvp_code_t cp)6237 bool of_STORE_STRA(vthread_t thr, vvp_code_t cp)
6238 {
6239       return storea<string>(thr, cp);
6240 }
6241 
6242 /*
6243  * %store/vec4 <var-label>, <offset>, <wid>
6244  *
6245  * <offset> is the index register that contains the base offset into
6246  * the destination. If zero, the offset of 0 is used instead of index
6247  * register zero. The offset value is SIGNED, and can be negative.
6248  *
6249  * <wid> is the actual width, an unsigned number.
6250  *
6251  * This function tests flag bit 4. If that flag is set, and <offset>
6252  * is an actual index register (not zero) then this assumes that the
6253  * calculation of the <offset> contents failed, and the store is
6254  * aborted.
6255  *
6256  * NOTE: This instruction may loose the <wid> argument because it is
6257  * not consistent with the %store/vec4/<etc> instructions which have
6258  * no <wid>.
6259  */
of_STORE_VEC4(vthread_t thr,vvp_code_t cp)6260 bool of_STORE_VEC4(vthread_t thr, vvp_code_t cp)
6261 {
6262       vvp_net_ptr_t ptr(cp->net, 0);
6263       vvp_signal_value*sig = dynamic_cast<vvp_signal_value*> (cp->net->fil);
6264       unsigned off_index = cp->bit_idx[0];
6265       int wid = cp->bit_idx[1];
6266 
6267       int off = off_index? thr->words[off_index].w_int : 0;
6268       const int sig_value_size = sig->value_size();
6269 
6270       vvp_vector4_t&val = thr->peek_vec4();
6271       unsigned val_size = val.size();
6272 
6273       if ((int)val_size < wid) {
6274 	    cerr << thr->get_fileline()
6275 	         << "XXXX Internal error: val.size()=" << val_size
6276 		 << ", expecting >= " << wid << endl;
6277       }
6278       assert((int)val_size >= wid);
6279       if ((int)val_size > wid) {
6280 	    val.resize(wid);
6281 	    val_size = wid;
6282       }
6283 
6284 	// If there is a problem loading the index register, flags-4
6285 	// will be set to 1, and we know here to skip the actual assignment.
6286       if (off_index!=0 && thr->flags[4] == BIT4_1) {
6287 	    thr->pop_vec4(1);
6288 	    return true;
6289       }
6290 
6291       if (off <= -wid) {
6292 	    thr->pop_vec4(1);
6293 	    return true;
6294       }
6295       if (off >= sig_value_size) {
6296 	    thr->pop_vec4(1);
6297 	    return true;
6298       }
6299 
6300 	// If the index is below the vector, then only assign the high
6301 	// bits that overlap with the target.
6302       if (off < 0) {
6303 	    int use_off = -off;
6304 	    wid -= use_off;
6305 	    val = val.subvalue(use_off, wid);
6306 	    val_size = wid;
6307 	    off = 0;
6308       }
6309 
6310 	// If the value is partly above the target, then only assign
6311 	// the bits that overlap.
6312       if ((off+wid) > sig_value_size) {
6313 	    wid = sig_value_size - off;
6314 	    val = val.subvalue(0, wid);
6315 	    val.resize(wid);
6316 	    val_size = wid;
6317       }
6318 
6319 
6320       if (off==0 && val_size==(unsigned)sig_value_size)
6321 	    vvp_send_vec4(ptr, val, thr->wt_context);
6322       else
6323 	    vvp_send_vec4_pv(ptr, val, off, wid, sig_value_size, thr->wt_context);
6324 
6325       thr->pop_vec4(1);
6326       return true;
6327 }
6328 
6329 /*
6330  * %store/vec4a <var-label>, <addr>, <offset>
6331  */
of_STORE_VEC4A(vthread_t thr,vvp_code_t cp)6332 bool of_STORE_VEC4A(vthread_t thr, vvp_code_t cp)
6333 {
6334       unsigned adr_index = cp->bit_idx[0];
6335       unsigned off_index = cp->bit_idx[1];
6336 
6337       vvp_vector4_t&value = thr->peek_vec4();
6338 
6339       long adr = adr_index? thr->words[adr_index].w_int : 0;
6340       long off = off_index? thr->words[off_index].w_int : 0;
6341 
6342 	// Suppress action if flags-4 is true.
6343       if (thr->flags[4] == BIT4_1) {
6344 	    thr->pop_vec4(1);
6345 	    return true;
6346       }
6347 
6348       cp->array->set_word(adr, off, value);
6349 
6350       thr->pop_vec4(1);
6351       return true;
6352 }
6353 
6354 /*
6355  * %sub
6356  *   pop r;
6357  *   pop l;
6358  *   push l-r;
6359  */
of_SUB(vthread_t thr,vvp_code_t)6360 bool of_SUB(vthread_t thr, vvp_code_t)
6361 {
6362       vvp_vector4_t r = thr->pop_vec4();
6363       vvp_vector4_t&l = thr->peek_vec4();
6364 
6365       l.sub(r);
6366       return true;
6367 }
6368 
6369 /*
6370  * %subi <vala>, <valb>, <wid>
6371  *
6372  * Pop1 operand, get the other operand from the arguments, and push
6373  * the result.
6374  */
of_SUBI(vthread_t thr,vvp_code_t cp)6375 bool of_SUBI(vthread_t thr, vvp_code_t cp)
6376 {
6377       unsigned wid = cp->number;
6378 
6379       vvp_vector4_t&l = thr->peek_vec4();
6380 
6381 	// I expect that most of the bits of an immediate value are
6382 	// going to be zero, so start the result vector with all zero
6383 	// bits. Then we only need to replace the bits that are different.
6384       vvp_vector4_t r (wid, BIT4_0);
6385       get_immediate_rval (cp, r);
6386 
6387       l.sub(r);
6388 
6389       return true;
6390 
6391 }
6392 
of_SUB_WR(vthread_t thr,vvp_code_t)6393 bool of_SUB_WR(vthread_t thr, vvp_code_t)
6394 {
6395       double r = thr->pop_real();
6396       double l = thr->pop_real();
6397       thr->push_real(l - r);
6398       return true;
6399 }
6400 
6401 /*
6402  * %substr <first>, <last>
6403  * Pop a string, take the substring (SystemVerilog style), and return
6404  * the result to the stack. This opcode actually works by editing the
6405  * string in place.
6406  */
of_SUBSTR(vthread_t thr,vvp_code_t cp)6407 bool of_SUBSTR(vthread_t thr, vvp_code_t cp)
6408 {
6409       int32_t first = thr->words[cp->bit_idx[0]].w_int;
6410       int32_t last = thr->words[cp->bit_idx[1]].w_int;
6411       string&val = thr->peek_str(0);
6412 
6413       if (first < 0 || last < first || last >= (int32_t)val.size()) {
6414 	    val = string("");
6415 	    return true;
6416       }
6417 
6418       val = val.substr(first, last-first+1);
6419       return true;
6420 }
6421 
6422 /*
6423  * %substr/vec4 <index>, <wid>
6424  */
of_SUBSTR_VEC4(vthread_t thr,vvp_code_t cp)6425 bool of_SUBSTR_VEC4(vthread_t thr, vvp_code_t cp)
6426 {
6427       unsigned sel_idx = cp->bit_idx[0];
6428       unsigned wid = cp->bit_idx[1];
6429 
6430       int32_t sel = thr->words[sel_idx].w_int;
6431       string&val = thr->peek_str(0);
6432 
6433       assert(wid%8 == 0);
6434 
6435       if (sel < 0 || sel >= (int32_t)val.size()) {
6436 	    vvp_vector4_t res (wid, BIT4_0);
6437 	    thr->push_vec4(res);
6438 	    return true;
6439       }
6440 
6441       vvp_vector4_t res (wid, BIT4_0);
6442 
6443       assert(wid==8);
6444       unsigned char tmp = val[sel];
6445       for (int idx = 0 ; idx < 8 ; idx += 1) {
6446 	    if (tmp & (1<<idx))
6447 		  res.set_bit(idx, BIT4_1);
6448       }
6449 
6450       thr->push_vec4(res);
6451       return true;
6452 }
6453 
of_FILE_LINE(vthread_t thr,vvp_code_t cp)6454 bool of_FILE_LINE(vthread_t thr, vvp_code_t cp)
6455 {
6456       vpiHandle handle = cp->handle;
6457 
6458 	/* When it is available, keep the file/line information in the
6459 	   thread for error/warning messages. */
6460       thr->set_fileline(vpi_get_str(vpiFile, handle),
6461                         vpi_get(vpiLineNo, handle));
6462 
6463       if (show_file_line)
6464 	    cerr << thr->get_fileline()
6465 	         << vpi_get_str(_vpiDescription, handle) << endl;
6466 
6467       return true;
6468 }
6469 
6470 /*
6471  * %test_nul <var-label>;
6472  * Test if the object at the specified variable is nil. If so, write
6473  * "1" into flags[4], otherwise write "0" into flags[4].
6474  */
of_TEST_NUL(vthread_t thr,vvp_code_t cp)6475 bool of_TEST_NUL(vthread_t thr, vvp_code_t cp)
6476 {
6477       vvp_net_t*net = cp->net;
6478 
6479       assert(net);
6480       vvp_fun_signal_object*obj = dynamic_cast<vvp_fun_signal_object*> (net->fun);
6481       assert(obj);
6482 
6483       if (obj->get_object().test_nil())
6484 	    thr->flags[4] = BIT4_1;
6485       else
6486 	    thr->flags[4] = BIT4_0;
6487 
6488       return true;
6489 }
6490 
of_TEST_NUL_A(vthread_t thr,vvp_code_t cp)6491 bool of_TEST_NUL_A(vthread_t thr, vvp_code_t cp)
6492 {
6493       unsigned idx = cp->bit_idx[0];
6494       unsigned adr = thr->words[idx].w_int;
6495       vvp_object_t word;
6496 
6497 	/* If the address is undefined, return true. */
6498       if (thr->flags[4] == BIT4_1) {
6499 	    return true;
6500       }
6501 
6502       cp->array->get_word_obj(adr, word);
6503       if (word.test_nil())
6504 	    thr->flags[4] = BIT4_1;
6505       else
6506 	    thr->flags[4] = BIT4_0;
6507 
6508       return true;
6509 }
6510 
of_TEST_NUL_OBJ(vthread_t thr,vvp_code_t)6511 bool of_TEST_NUL_OBJ(vthread_t thr, vvp_code_t)
6512 {
6513       if (thr->peek_object().test_nil())
6514 	    thr->flags[4] = BIT4_1;
6515       else
6516 	    thr->flags[4] = BIT4_0;
6517       return true;
6518 }
6519 
6520 /*
6521  * %test_nul/prop <pid>, <idx>
6522  */
of_TEST_NUL_PROP(vthread_t thr,vvp_code_t cp)6523 bool of_TEST_NUL_PROP(vthread_t thr, vvp_code_t cp)
6524 {
6525       unsigned pid = cp->number;
6526       unsigned idx = cp->bit_idx[0];
6527 
6528       if (idx != 0) {
6529 	    assert(idx < vthread_s::WORDS_COUNT);
6530 	    idx = thr->words[idx].w_uint;
6531       }
6532 
6533       vvp_object_t&obj = thr->peek_object();
6534       vvp_cobject*cobj  = obj.peek<vvp_cobject>();
6535 
6536       vvp_object_t val;
6537       cobj->get_object(pid, val, idx);
6538 
6539       if (val.test_nil())
6540 	    thr->flags[4] = BIT4_1;
6541       else
6542 	    thr->flags[4] = BIT4_0;
6543 
6544       return true;
6545 }
6546 
of_VPI_CALL(vthread_t thr,vvp_code_t cp)6547 bool of_VPI_CALL(vthread_t thr, vvp_code_t cp)
6548 {
6549       vpip_execute_vpi_call(thr, cp->handle);
6550 
6551       if (schedule_stopped()) {
6552 	    if (! schedule_finished())
6553 		  schedule_vthread(thr, 0, false);
6554 
6555 	    return false;
6556       }
6557 
6558       return schedule_finished()? false : true;
6559 }
6560 
6561 /* %wait <label>;
6562  * Implement the wait by locating the vvp_net_T for the event, and
6563  * adding this thread to the threads list for the event. The some
6564  * argument is the  reference to the functor to wait for. This must be
6565  * an event object of some sort.
6566  */
of_WAIT(vthread_t thr,vvp_code_t cp)6567 bool of_WAIT(vthread_t thr, vvp_code_t cp)
6568 {
6569       assert(! thr->i_am_in_function);
6570       assert(! thr->waiting_for_event);
6571       thr->waiting_for_event = 1;
6572 
6573 	/* Add this thread to the list in the event. */
6574       waitable_hooks_s*ep = dynamic_cast<waitable_hooks_s*> (cp->net->fun);
6575       assert(ep);
6576       thr->wait_next = ep->add_waiting_thread(thr);
6577 
6578 	/* Return false to suspend this thread. */
6579       return false;
6580 }
6581 
6582 /*
6583  * Implement the %wait/fork (SystemVerilog) instruction by suspending
6584  * the current thread until all the detached children have finished.
6585  */
of_WAIT_FORK(vthread_t thr,vvp_code_t)6586 bool of_WAIT_FORK(vthread_t thr, vvp_code_t)
6587 {
6588 	/* If a %wait/fork is being executed then the parent thread
6589 	 * cannot be waiting in a join or already waiting. */
6590       assert(! thr->i_am_in_function);
6591       assert(! thr->i_am_joining);
6592       assert(! thr->i_am_waiting);
6593 
6594 	/* There should be no active children when waiting. */
6595       assert(thr->children.empty());
6596 
6597 	/* If there are no detached children then there is nothing to
6598 	 * wait for. */
6599       if (thr->detached_children.empty()) return true;
6600 
6601 	/* Flag that this process is waiting for the detached children
6602 	 * to finish and suspend it. */
6603       thr->i_am_waiting = 1;
6604       return false;
6605 }
6606 
6607 /*
6608  * %xnor
6609  */
of_XNOR(vthread_t thr,vvp_code_t)6610 bool of_XNOR(vthread_t thr, vvp_code_t)
6611 {
6612       vvp_vector4_t valr = thr->pop_vec4();
6613       vvp_vector4_t&vall = thr->peek_vec4();
6614       assert(vall.size() == valr.size());
6615       unsigned wid = vall.size();
6616 
6617       for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
6618 
6619 	    vvp_bit4_t lb = vall.value(idx);
6620 	    vvp_bit4_t rb = valr.value(idx);
6621 	    vall.set_bit(idx, ~(lb ^ rb));
6622       }
6623 
6624       return true;
6625 }
6626 
6627 /*
6628  * %xor
6629  */
of_XOR(vthread_t thr,vvp_code_t)6630 bool of_XOR(vthread_t thr, vvp_code_t)
6631 {
6632       vvp_vector4_t valr = thr->pop_vec4();
6633       vvp_vector4_t&vall = thr->peek_vec4();
6634       assert(vall.size() == valr.size());
6635       unsigned wid = vall.size();
6636 
6637       for (unsigned idx = 0 ;  idx < wid ;  idx += 1) {
6638 
6639 	    vvp_bit4_t lb = vall.value(idx);
6640 	    vvp_bit4_t rb = valr.value(idx);
6641 	    vall.set_bit(idx, lb ^ rb);
6642       }
6643 
6644       return true;
6645 }
6646 
6647 
of_ZOMBIE(vthread_t thr,vvp_code_t)6648 bool of_ZOMBIE(vthread_t thr, vvp_code_t)
6649 {
6650       thr->pc = codespace_null();
6651       if ((thr->parent == 0) && (thr->children.empty())) {
6652 	    if (thr->delay_delete)
6653 		  schedule_del_thr(thr);
6654 	    else
6655 		  vthread_delete(thr);
6656       }
6657       return false;
6658 }
6659 
6660 /*
6661  * This is a phantom opcode used to call user defined functions. It
6662  * is used in code generated by the .ufunc statement. It contains a
6663  * pointer to the executable code of the function and a pointer to
6664  * a ufunc_core object that has all the port information about the
6665  * function.
6666  */
do_exec_ufunc(vthread_t thr,vvp_code_t cp,vthread_t child)6667 static bool do_exec_ufunc(vthread_t thr, vvp_code_t cp, vthread_t child)
6668 {
6669       __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope();
6670       assert(child_scope);
6671 
6672       assert(child_scope->get_type_code() == vpiFunction);
6673       assert(thr->children.empty());
6674 
6675 
6676         /* We can take a number of shortcuts because we know that a
6677            continuous assignment can only occur in a static scope. */
6678       assert(thr->wt_context == 0);
6679       assert(thr->rd_context == 0);
6680 
6681         /* If an automatic function, allocate a context for this call. */
6682       vvp_context_t child_context = 0;
6683       if (child_scope->is_automatic()) {
6684             child_context = vthread_alloc_context(child_scope);
6685             thr->wt_context = child_context;
6686             thr->rd_context = child_context;
6687       }
6688 
6689       child->wt_context = child_context;
6690       child->rd_context = child_context;
6691 
6692 	/* Copy all the inputs to the ufunc object to the port
6693 	   variables of the function. This copies all the values
6694 	   atomically. */
6695       cp->ufunc_core_ptr->assign_bits_to_ports(child_context);
6696       child->delay_delete = 1;
6697 
6698       child->parent = thr;
6699       thr->children.insert(child);
6700 	// This should be the only child
6701       assert(thr->children.size()==1);
6702 
6703       child->is_scheduled = 1;
6704       child->i_am_in_function = 1;
6705       vthread_run(child);
6706       running_thread = thr;
6707 
6708       if (child->i_have_ended) {
6709 	    do_join(thr, child);
6710             return true;
6711       } else {
6712 	    thr->i_am_joining = 1;
6713 	    return false;
6714       }
6715 }
6716 
of_EXEC_UFUNC_REAL(vthread_t thr,vvp_code_t cp)6717 bool of_EXEC_UFUNC_REAL(vthread_t thr, vvp_code_t cp)
6718 {
6719       __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope();
6720       assert(child_scope);
6721 
6722 	/* Create a temporary thread and run it immediately. */
6723       vthread_t child = vthread_new(cp->cptr, child_scope);
6724       thr->push_real(0.0);
6725       child->args_real.push_back(0);
6726 
6727       return do_exec_ufunc(thr, cp, child);
6728 }
6729 
of_EXEC_UFUNC_VEC4(vthread_t thr,vvp_code_t cp)6730 bool of_EXEC_UFUNC_VEC4(vthread_t thr, vvp_code_t cp)
6731 {
6732       __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope();
6733       assert(child_scope);
6734 
6735       vpiScopeFunction*scope_func = dynamic_cast<vpiScopeFunction*>(child_scope);
6736       assert(scope_func);
6737 
6738 	/* Create a temporary thread and run it immediately. */
6739       vthread_t child = vthread_new(cp->cptr, child_scope);
6740       thr->push_vec4(vvp_vector4_t(scope_func->get_func_width(), scope_func->get_func_init_val()));
6741       child->args_vec4.push_back(0);
6742 
6743       return do_exec_ufunc(thr, cp, child);
6744 }
6745 
6746 /*
6747  * This is a phantom opcode used to harvest the result of calling a user
6748  * defined function. It is used in code generated by the .ufunc statement.
6749  */
of_REAP_UFUNC(vthread_t thr,vvp_code_t cp)6750 bool of_REAP_UFUNC(vthread_t thr, vvp_code_t cp)
6751 {
6752       __vpiScope*child_scope = cp->ufunc_core_ptr->func_scope();
6753       assert(child_scope);
6754 
6755 	/* Copy the output from the result variable to the output
6756 	   ports of the .ufunc device. */
6757       cp->ufunc_core_ptr->finish_thread();
6758 
6759         /* If an automatic function, free the context for this call. */
6760       if (child_scope->is_automatic()) {
6761             vthread_free_context(thr->rd_context, child_scope);
6762             thr->wt_context = 0;
6763             thr->rd_context = 0;
6764       }
6765 
6766       return true;
6767 }
6768