1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Wrappers for generic Unix system calls                       ---*/
5 /*---                                            syswrap-generic.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2000-2017 Julian Seward
13       jseward@acm.org
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 */
32 
33 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_dragonfly)
34 
35 #include "pub_core_basics.h"
36 #include "pub_core_vki.h"
37 #include "pub_core_vkiscnums.h"
38 #include "pub_core_threadstate.h"
39 #include "pub_core_debuginfo.h"     // VG_(di_notify_*)
40 #include "pub_core_aspacemgr.h"
41 #include "pub_core_transtab.h"      // VG_(discard_translations)
42 #include "pub_core_xarray.h"
43 #include "pub_core_clientstate.h"   // VG_(brk_base), VG_(brk_limit)
44 #include "pub_core_debuglog.h"
45 #include "pub_core_errormgr.h"
46 #include "pub_core_gdbserver.h"     // VG_(gdbserver)
47 #include "pub_core_libcbase.h"
48 #include "pub_core_libcassert.h"
49 #include "pub_core_libcfile.h"
50 #include "pub_core_libcprint.h"
51 #include "pub_core_libcproc.h"
52 #include "pub_core_libcsignal.h"
53 #include "pub_core_machine.h"       // VG_(get_SP)
54 #include "pub_core_mallocfree.h"
55 #include "pub_core_options.h"
56 #include "pub_core_scheduler.h"
57 #include "pub_core_signals.h"
58 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
59 #include "pub_core_syscall.h"
60 #include "pub_core_syswrap.h"
61 #include "pub_core_tooliface.h"
62 #include "pub_core_ume.h"
63 #include "pub_core_stacks.h"
64 
65 #include "priv_types_n_macros.h"
66 #include "priv_syswrap-generic.h"
67 
68 #include "config.h"
69 
70 
ML_(guess_and_register_stack)71 void ML_(guess_and_register_stack) (Addr sp, ThreadState* tst)
72 {
73    Bool debug = False;
74    NSegment const* seg;
75 
76    /* We don't really know where the client stack is, because its
77       allocated by the client.  The best we can do is look at the
78       memory mappings and try to derive some useful information.  We
79       assume that sp starts near its highest possible value, and can
80       only go down to the start of the mmaped segment. */
81    seg = VG_(am_find_nsegment)(sp);
82    if (seg
83        && VG_(am_is_valid_for_client)(sp, 1, VKI_PROT_READ | VKI_PROT_WRITE)) {
84       tst->client_stack_highest_byte = (Addr)VG_PGROUNDUP(sp)-1;
85       tst->client_stack_szB = tst->client_stack_highest_byte - seg->start + 1;
86 
87       tst->os_state.stk_id
88          = VG_(register_stack)(seg->start, tst->client_stack_highest_byte);
89 
90       if (debug)
91 	 VG_(printf)("tid %u: guessed client stack range [%#lx-%#lx]"
92                      " as stk_id %lu\n",
93 		     tst->tid, seg->start, tst->client_stack_highest_byte,
94                      tst->os_state.stk_id);
95    } else {
96       VG_(message)(Vg_UserMsg,
97                    "!? New thread %u starts with SP(%#lx) unmapped\n",
98 		   tst->tid, sp);
99       tst->client_stack_highest_byte = 0;
100       tst->client_stack_szB  = 0;
101    }
102 }
103 
104 /* Returns True iff address range is something the client can
105    plausibly mess with: all of it is either already belongs to the
106    client or is free or a reservation. */
107 
ML_(valid_client_addr)108 Bool ML_(valid_client_addr)(Addr start, SizeT size, ThreadId tid,
109                                    const HChar *syscallname)
110 {
111    Bool ret;
112 
113    if (size == 0)
114       return True;
115 
116    ret = VG_(am_is_valid_for_client_or_free_or_resvn)
117             (start,size,VKI_PROT_NONE);
118 
119    if (0)
120       VG_(printf)("%s: test=%#lx-%#lx ret=%d\n",
121 		  syscallname, start, start+size-1, (Int)ret);
122 
123    if (!ret && syscallname != NULL) {
124       VG_(message)(Vg_UserMsg, "Warning: client syscall %s tried "
125                                "to modify addresses %#lx-%#lx\n",
126                                syscallname, start, start+size-1);
127       if (VG_(clo_verbosity) > 1) {
128          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
129       }
130    }
131 
132    return ret;
133 }
134 
135 
ML_(client_signal_OK)136 Bool ML_(client_signal_OK)(Int sigNo)
137 {
138    /* signal 0 is OK for kill */
139    Bool ret = sigNo >= 0 && sigNo <= VG_SIGVGRTUSERMAX;
140 
141    //VG_(printf)("client_signal_OK(%d) -> %d\n", sigNo, ret);
142 
143    return ret;
144 }
145 
146 
147 /* Handy small function to help stop wrappers from segfaulting when
148    presented with bogus client addresses.  Is not used for generating
149    user-visible errors. */
150 
ML_(safe_to_deref)151 Bool ML_(safe_to_deref) ( const void *start, SizeT size )
152 {
153    return VG_(am_is_valid_for_client)( (Addr)start, size, VKI_PROT_READ );
154 }
155 
156 
157 /* ---------------------------------------------------------------------
158    Doing mmap, mremap
159    ------------------------------------------------------------------ */
160 
161 /* AFAICT from kernel sources (mm/mprotect.c) and general experimentation,
162    munmap, mprotect (and mremap??) work at the page level.  So addresses
163    and lengths must be adjusted for this. */
164 
165 /* Mash around start and length so that the area exactly covers
166    an integral number of pages.  If we don't do that, memcheck's
167    idea of addressible memory diverges from that of the
168    kernel's, which causes the leak detector to crash. */
169 static
page_align_addr_and_len(Addr * a,SizeT * len)170 void page_align_addr_and_len( Addr* a, SizeT* len)
171 {
172    Addr ra;
173 
174    ra = VG_PGROUNDDN(*a);
175    *len = VG_PGROUNDUP(*a + *len) - ra;
176    *a = ra;
177 }
178 
notify_core_of_mmap(Addr a,SizeT len,UInt prot,UInt flags,Int fd,Off64T offset)179 static void notify_core_of_mmap(Addr a, SizeT len, UInt prot,
180                                 UInt flags, Int fd, Off64T offset)
181 {
182    Bool d;
183 
184    /* 'a' is the return value from a real kernel mmap, hence: */
185    vg_assert(VG_IS_PAGE_ALIGNED(a));
186    /* whereas len is whatever the syscall supplied.  So: */
187    len = VG_PGROUNDUP(len);
188 
189    d = VG_(am_notify_client_mmap)( a, len, prot, flags, fd, offset );
190 
191    if (d)
192       VG_(discard_translations)( a, (ULong)len,
193                                  "notify_core_of_mmap" );
194 }
195 
notify_tool_of_mmap(Addr a,SizeT len,UInt prot,ULong di_handle)196 static void notify_tool_of_mmap(Addr a, SizeT len, UInt prot, ULong di_handle)
197 {
198    Bool rr, ww, xx;
199 
200    /* 'a' is the return value from a real kernel mmap, hence: */
201    vg_assert(VG_IS_PAGE_ALIGNED(a));
202    /* whereas len is whatever the syscall supplied.  So: */
203    len = VG_PGROUNDUP(len);
204 
205    rr = toBool(prot & VKI_PROT_READ);
206    ww = toBool(prot & VKI_PROT_WRITE);
207    xx = toBool(prot & VKI_PROT_EXEC);
208 
209    VG_TRACK( new_mem_mmap, a, len, rr, ww, xx, di_handle );
210 }
211 
212 
213 /* When a client mmap has been successfully done, this function must
214    be called.  It notifies both aspacem and the tool of the new
215    mapping.
216 
217    JRS 2008-Aug-14: But notice this is *very* obscure.  The only place
218    it is called from is POST(sys_io_setup).  In particular,
219    ML_(generic_PRE_sys_mmap), in m_syswrap, is the "normal case" handler for
220    client mmap.  But it doesn't call this function; instead it does the
221    relevant notifications itself.  Here, we just pass di_handle=0 to
222    notify_tool_of_mmap as we have no better information.  But really this
223    function should be done away with; problem is I don't understand what
224    POST(sys_io_setup) does or how it works.
225 
226    [However, this function is used lots for Darwin, because
227     ML_(generic_PRE_sys_mmap) cannot be used for Darwin.]
228  */
229 void
ML_(notify_core_and_tool_of_mmap)230 ML_(notify_core_and_tool_of_mmap) ( Addr a, SizeT len, UInt prot,
231                                     UInt flags, Int fd, Off64T offset )
232 {
233    // XXX: unlike the other notify_core_and_tool* functions, this one doesn't
234    // do anything with debug info (ie. it doesn't call VG_(di_notify_mmap)).
235    // Should it?  --njn
236    notify_core_of_mmap(a, len, prot, flags, fd, offset);
237    notify_tool_of_mmap(a, len, prot, 0/*di_handle*/);
238 }
239 
240 void
ML_(notify_core_and_tool_of_munmap)241 ML_(notify_core_and_tool_of_munmap) ( Addr a, SizeT len )
242 {
243    Bool d;
244 
245    page_align_addr_and_len(&a, &len);
246    d = VG_(am_notify_munmap)(a, len);
247    VG_TRACK( die_mem_munmap, a, len );
248    VG_(di_notify_munmap)( a, len );
249    if (d)
250       VG_(discard_translations)( a, (ULong)len,
251                                  "ML_(notify_core_and_tool_of_munmap)" );
252 }
253 
254 void
ML_(notify_core_and_tool_of_mprotect)255 ML_(notify_core_and_tool_of_mprotect) ( Addr a, SizeT len, Int prot )
256 {
257    Bool rr = toBool(prot & VKI_PROT_READ);
258    Bool ww = toBool(prot & VKI_PROT_WRITE);
259    Bool xx = toBool(prot & VKI_PROT_EXEC);
260    Bool d;
261 
262    page_align_addr_and_len(&a, &len);
263    d = VG_(am_notify_mprotect)(a, len, prot);
264    VG_TRACK( change_mem_mprotect, a, len, rr, ww, xx );
265    VG_(di_notify_mprotect)( a, len, prot );
266    if (d)
267       VG_(discard_translations)( a, (ULong)len,
268                                  "ML_(notify_core_and_tool_of_mprotect)" );
269 }
270 
271 
272 
273 #if HAVE_MREMAP
274 /* Expand (or shrink) an existing mapping, potentially moving it at
275    the same time (controlled by the MREMAP_MAYMOVE flag).  Nightmare.
276 */
277 static
do_mremap(Addr old_addr,SizeT old_len,Addr new_addr,SizeT new_len,UWord flags,ThreadId tid)278 SysRes do_mremap( Addr old_addr, SizeT old_len,
279                   Addr new_addr, SizeT new_len,
280                   UWord flags, ThreadId tid )
281 {
282 #  define MIN_SIZET(_aa,_bb) (_aa) < (_bb) ? (_aa) : (_bb)
283 
284    Bool      ok, d;
285    NSegment const* old_seg;
286    Addr      advised;
287    Bool      f_fixed   = toBool(flags & VKI_MREMAP_FIXED);
288    Bool      f_maymove = toBool(flags & VKI_MREMAP_MAYMOVE);
289 
290    if (0)
291       VG_(printf)("do_remap (old %#lx %lu) (new %#lx %lu) %s %s\n",
292                   old_addr,old_len,new_addr,new_len,
293                   flags & VKI_MREMAP_MAYMOVE ? "MAYMOVE" : "",
294                   flags & VKI_MREMAP_FIXED ? "FIXED" : "");
295    if (0)
296       VG_(am_show_nsegments)(0, "do_remap: before");
297 
298    if (flags & ~(VKI_MREMAP_FIXED | VKI_MREMAP_MAYMOVE))
299       goto eINVAL;
300 
301    if (!VG_IS_PAGE_ALIGNED(old_addr))
302       goto eINVAL;
303 
304    old_len = VG_PGROUNDUP(old_len);
305    new_len = VG_PGROUNDUP(new_len);
306 
307    if (new_len == 0)
308       goto eINVAL;
309 
310    /* kernel doesn't reject this, but we do. */
311    if (old_len == 0)
312       goto eINVAL;
313 
314    /* reject wraparounds */
315    if (old_addr + old_len < old_addr)
316       goto eINVAL;
317    if (f_fixed == True && new_addr + new_len < new_len)
318       goto eINVAL;
319 
320    /* kernel rejects all fixed, no-move requests (which are
321       meaningless). */
322    if (f_fixed == True && f_maymove == False)
323       goto eINVAL;
324 
325    /* Stay away from non-client areas. */
326    if (!ML_(valid_client_addr)(old_addr, old_len, tid, "mremap(old_addr)"))
327       goto eINVAL;
328 
329    /* In all remaining cases, if the old range does not fall within a
330       single segment, fail. */
331    old_seg = VG_(am_find_nsegment)( old_addr );
332    if (old_addr < old_seg->start || old_addr+old_len-1 > old_seg->end)
333       goto eINVAL;
334    if (old_seg->kind != SkAnonC && old_seg->kind != SkFileC
335        && old_seg->kind != SkShmC)
336       goto eINVAL;
337 
338    vg_assert(old_len > 0);
339    vg_assert(new_len > 0);
340    vg_assert(VG_IS_PAGE_ALIGNED(old_len));
341    vg_assert(VG_IS_PAGE_ALIGNED(new_len));
342    vg_assert(VG_IS_PAGE_ALIGNED(old_addr));
343 
344    /* There are 3 remaining cases:
345 
346       * maymove == False
347 
348         new space has to be at old address, so:
349             - shrink    -> unmap end
350             - same size -> do nothing
351             - grow      -> if can grow in-place, do so, else fail
352 
353       * maymove == True, fixed == False
354 
355         new space can be anywhere, so:
356             - shrink    -> unmap end
357             - same size -> do nothing
358             - grow      -> if can grow in-place, do so, else
359                            move to anywhere large enough, else fail
360 
361       * maymove == True, fixed == True
362 
363         new space must be at new address, so:
364 
365             - if new address is not page aligned, fail
366             - if new address range overlaps old one, fail
367             - if new address range cannot be allocated, fail
368             - else move to new address range with new size
369             - else fail
370    */
371 
372    if (f_maymove == False) {
373       /* new space has to be at old address */
374       if (new_len < old_len)
375          goto shrink_in_place;
376       if (new_len > old_len)
377          goto grow_in_place_or_fail;
378       goto same_in_place;
379    }
380 
381    if (f_maymove == True && f_fixed == False) {
382       /* new space can be anywhere */
383       if (new_len < old_len)
384          goto shrink_in_place;
385       if (new_len > old_len)
386          goto grow_in_place_or_move_anywhere_or_fail;
387       goto same_in_place;
388    }
389 
390    if (f_maymove == True && f_fixed == True) {
391       /* new space can only be at the new address */
392       if (!VG_IS_PAGE_ALIGNED(new_addr))
393          goto eINVAL;
394       if (new_addr+new_len-1 < old_addr || new_addr > old_addr+old_len-1) {
395          /* no overlap */
396       } else {
397          goto eINVAL;
398       }
399       if (new_addr == 0)
400          goto eINVAL;
401          /* VG_(am_get_advisory_client_simple) interprets zero to mean
402             non-fixed, which is not what we want */
403       advised = VG_(am_get_advisory_client_simple)(new_addr, new_len, &ok);
404       if (!ok || advised != new_addr)
405          goto eNOMEM;
406       ok = VG_(am_relocate_nooverlap_client)
407               ( &d, old_addr, old_len, new_addr, new_len );
408       if (ok) {
409          VG_TRACK( copy_mem_remap, old_addr, new_addr,
410                                    MIN_SIZET(old_len,new_len) );
411          if (new_len > old_len)
412             VG_TRACK( new_mem_mmap, new_addr+old_len, new_len-old_len,
413                       old_seg->hasR, old_seg->hasW, old_seg->hasX,
414                       0/*di_handle*/ );
415          VG_TRACK(die_mem_munmap, old_addr, old_len);
416          if (d) {
417             VG_(discard_translations)( old_addr, old_len, "do_remap(1)" );
418             VG_(discard_translations)( new_addr, new_len, "do_remap(2)" );
419          }
420          return VG_(mk_SysRes_Success)( new_addr );
421       }
422       goto eNOMEM;
423    }
424 
425    /* end of the 3 cases */
426    /*NOTREACHED*/ vg_assert(0);
427 
428   grow_in_place_or_move_anywhere_or_fail:
429    {
430    /* try growing it in-place */
431    Addr   needA = old_addr + old_len;
432    SSizeT needL = new_len - old_len;
433 
434    vg_assert(needL > 0);
435    vg_assert(needA > 0);
436 
437    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
438    if (ok) {
439       /* Fixes bug #129866. */
440       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
441    }
442    if (ok && advised == needA) {
443       const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
444       if (new_seg) {
445          VG_TRACK( new_mem_mmap, needA, needL,
446                                  new_seg->hasR,
447                                  new_seg->hasW, new_seg->hasX,
448                                  0/*di_handle*/ );
449          return VG_(mk_SysRes_Success)( old_addr );
450       }
451    }
452 
453    /* that failed.  Look elsewhere. */
454    advised = VG_(am_get_advisory_client_simple)( 0, new_len, &ok );
455    if (ok) {
456       Bool oldR = old_seg->hasR;
457       Bool oldW = old_seg->hasW;
458       Bool oldX = old_seg->hasX;
459       /* assert new area does not overlap old */
460       vg_assert(advised+new_len-1 < old_addr
461                 || advised > old_addr+old_len-1);
462       ok = VG_(am_relocate_nooverlap_client)
463               ( &d, old_addr, old_len, advised, new_len );
464       if (ok) {
465          VG_TRACK( copy_mem_remap, old_addr, advised,
466                                    MIN_SIZET(old_len,new_len) );
467          if (new_len > old_len)
468             VG_TRACK( new_mem_mmap, advised+old_len, new_len-old_len,
469                       oldR, oldW, oldX, 0/*di_handle*/ );
470          VG_TRACK(die_mem_munmap, old_addr, old_len);
471          if (d) {
472             VG_(discard_translations)( old_addr, old_len, "do_remap(4)" );
473             VG_(discard_translations)( advised, new_len, "do_remap(5)" );
474          }
475          return VG_(mk_SysRes_Success)( advised );
476       }
477    }
478    goto eNOMEM;
479    }
480    /*NOTREACHED*/ vg_assert(0);
481 
482   grow_in_place_or_fail:
483    {
484    Addr  needA = old_addr + old_len;
485    SizeT needL = new_len - old_len;
486 
487    vg_assert(needA > 0);
488 
489    advised = VG_(am_get_advisory_client_simple)( needA, needL, &ok );
490    if (ok) {
491       /* Fixes bug #129866. */
492       ok = VG_(am_covered_by_single_free_segment) ( needA, needL );
493    }
494    if (!ok || advised != needA)
495       goto eNOMEM;
496    const NSegment *new_seg = VG_(am_extend_map_client)( old_addr, needL );
497    if (!new_seg)
498       goto eNOMEM;
499    VG_TRACK( new_mem_mmap, needA, needL,
500                            new_seg->hasR, new_seg->hasW, new_seg->hasX,
501                            0/*di_handle*/ );
502 
503    return VG_(mk_SysRes_Success)( old_addr );
504    }
505    /*NOTREACHED*/ vg_assert(0);
506 
507   shrink_in_place:
508    {
509    SysRes sres = VG_(am_munmap_client)( &d, old_addr+new_len, old_len-new_len );
510    if (sr_isError(sres))
511       return sres;
512    VG_TRACK( die_mem_munmap, old_addr+new_len, old_len-new_len );
513    if (d)
514       VG_(discard_translations)( old_addr+new_len, old_len-new_len,
515                                  "do_remap(7)" );
516    return VG_(mk_SysRes_Success)( old_addr );
517    }
518    /*NOTREACHED*/ vg_assert(0);
519 
520   same_in_place:
521    return VG_(mk_SysRes_Success)( old_addr );
522    /*NOTREACHED*/ vg_assert(0);
523 
524   eINVAL:
525    return VG_(mk_SysRes_Error)( VKI_EINVAL );
526   eNOMEM:
527    return VG_(mk_SysRes_Error)( VKI_ENOMEM );
528 
529 #  undef MIN_SIZET
530 }
531 #endif /* HAVE_MREMAP */
532 
533 
534 /* ---------------------------------------------------------------------
535    File-descriptor tracking
536    ------------------------------------------------------------------ */
537 
538 /* One of these is allocated for each open file descriptor.  */
539 typedef struct OpenFd
540 {
541    Int fd;                        /* The file descriptor */
542    HChar *pathname;               /* NULL if not a regular file or unknown */
543    ExeContext *where;             /* NULL if inherited from parent */
544    struct OpenFd *next, *prev;
545 } OpenFd;
546 
547 /* List of allocated file descriptors. */
548 static OpenFd *allocated_fds = NULL;
549 
550 /* Count of open file descriptors. */
551 static Int fd_count = 0;
552 
553 
554 /* Note the fact that a file descriptor was just closed. */
ML_(record_fd_close)555 void ML_(record_fd_close)(Int fd)
556 {
557    OpenFd *i = allocated_fds;
558 
559    if (fd >= VG_(fd_hard_limit))
560       return;			/* Valgrind internal */
561 
562    while(i) {
563       if(i->fd == fd) {
564          if(i->prev)
565             i->prev->next = i->next;
566          else
567             allocated_fds = i->next;
568          if(i->next)
569             i->next->prev = i->prev;
570          if(i->pathname)
571             VG_(free) (i->pathname);
572          VG_(free) (i);
573          fd_count--;
574          break;
575       }
576       i = i->next;
577    }
578 }
579 
580 /* Note the fact that a file descriptor was just opened.  If the
581    tid is -1, this indicates an inherited fd.  If the pathname is NULL,
582    this either indicates a non-standard file (i.e. a pipe or socket or
583    some such thing) or that we don't know the filename.  If the fd is
584    already open, then we're probably doing a dup2() to an existing fd,
585    so just overwrite the existing one. */
ML_(record_fd_open_with_given_name)586 void ML_(record_fd_open_with_given_name)(ThreadId tid, Int fd,
587                                          const HChar *pathname)
588 {
589    OpenFd *i;
590 
591    if (fd >= VG_(fd_hard_limit))
592       return;			/* Valgrind internal */
593 
594    /* Check to see if this fd is already open. */
595    i = allocated_fds;
596    while (i) {
597       if (i->fd == fd) {
598          if (i->pathname) VG_(free)(i->pathname);
599          break;
600       }
601       i = i->next;
602    }
603 
604    /* Not already one: allocate an OpenFd */
605    if (i == NULL) {
606       i = VG_(malloc)("syswrap.rfdowgn.1", sizeof(OpenFd));
607 
608       i->prev = NULL;
609       i->next = allocated_fds;
610       if(allocated_fds) allocated_fds->prev = i;
611       allocated_fds = i;
612       fd_count++;
613    }
614 
615    i->fd = fd;
616    i->pathname = VG_(strdup)("syswrap.rfdowgn.2", pathname);
617    i->where = (tid == -1) ? NULL : VG_(record_ExeContext)(tid, 0/*first_ip_delta*/);
618 }
619 
620 // Record opening of an fd, and find its name.
ML_(record_fd_open_named)621 void ML_(record_fd_open_named)(ThreadId tid, Int fd)
622 {
623    const HChar* buf;
624    const HChar* name;
625    if (VG_(resolve_filename)(fd, &buf))
626       name = buf;
627    else
628       name = NULL;
629 
630    ML_(record_fd_open_with_given_name)(tid, fd, name);
631 }
632 
633 // Record opening of a nameless fd.
ML_(record_fd_open_nameless)634 void ML_(record_fd_open_nameless)(ThreadId tid, Int fd)
635 {
636    ML_(record_fd_open_with_given_name)(tid, fd, NULL);
637 }
638 
639 // Return if a given file descriptor is already recorded.
ML_(fd_recorded)640 Bool ML_(fd_recorded)(Int fd)
641 {
642    OpenFd *i = allocated_fds;
643    while (i) {
644       if (i->fd == fd)
645          return True;
646       i = i->next;
647    }
648    return False;
649 }
650 
651 /* Returned string must not be modified nor free'd. */
ML_(find_fd_recorded_by_fd)652 const HChar *ML_(find_fd_recorded_by_fd)(Int fd)
653 {
654    OpenFd *i = allocated_fds;
655 
656    while (i) {
657       if (i->fd == fd)
658          return i->pathname;
659       i = i->next;
660    }
661 
662    return NULL;
663 }
664 
665 static
unix_to_name(struct vki_sockaddr_un * sa,UInt len,HChar * name)666 HChar *unix_to_name(struct vki_sockaddr_un *sa, UInt len, HChar *name)
667 {
668    if (sa == NULL || len == 0 || sa->sun_path[0] == '\0') {
669       VG_(sprintf)(name, "<unknown>");
670    } else {
671       VG_(sprintf)(name, "%s", sa->sun_path);
672    }
673 
674    return name;
675 }
676 
677 static
inet_to_name(struct vki_sockaddr_in * sa,UInt len,HChar * name)678 HChar *inet_to_name(struct vki_sockaddr_in *sa, UInt len, HChar *name)
679 {
680    if (sa == NULL || len == 0) {
681       VG_(sprintf)(name, "<unknown>");
682    } else if (sa->sin_port == 0) {
683       VG_(sprintf)(name, "<unbound>");
684    } else {
685       UInt addr = VG_(ntohl)(sa->sin_addr.s_addr);
686       VG_(sprintf)(name, "%u.%u.%u.%u:%u",
687                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
688                    (addr>>8) & 0xFF, addr & 0xFF,
689                    VG_(ntohs)(sa->sin_port));
690    }
691 
692    return name;
693 }
694 
695 static
inet6_format(HChar * s,const UChar ip[16])696 void inet6_format(HChar *s, const UChar ip[16])
697 {
698    static const unsigned char V4mappedprefix[12] = {0,0,0,0,0,0,0,0,0,0,0xff,0xff};
699 
700    if (!VG_(memcmp)(ip, V4mappedprefix, 12)) {
701       const struct vki_in_addr *sin_addr =
702           (const struct vki_in_addr *)(ip + 12);
703       UInt addr = VG_(ntohl)(sin_addr->s_addr);
704 
705       VG_(sprintf)(s, "::ffff:%u.%u.%u.%u",
706                    (addr>>24) & 0xFF, (addr>>16) & 0xFF,
707                    (addr>>8) & 0xFF, addr & 0xFF);
708    } else {
709       Bool compressing = False;
710       Bool compressed = False;
711       Int len = 0;
712       Int i;
713 
714       for (i = 0; i < 16; i += 2) {
715          UInt word = ((UInt)ip[i] << 8) | (UInt)ip[i+1];
716          if (word == 0 && !compressed) {
717             compressing = True;
718          } else {
719             if (compressing) {
720                compressing = False;
721                compressed = True;
722                s[len++] = ':';
723             }
724             if (i > 0) {
725                s[len++] = ':';
726             }
727             len += VG_(sprintf)(s + len, "%x", word);
728          }
729       }
730 
731       if (compressing) {
732          s[len++] = ':';
733          s[len++] = ':';
734       }
735 
736       s[len++] = 0;
737    }
738 
739    return;
740 }
741 
742 static
inet6_to_name(struct vki_sockaddr_in6 * sa,UInt len,HChar * name)743 HChar *inet6_to_name(struct vki_sockaddr_in6 *sa, UInt len, HChar *name)
744 {
745    if (sa == NULL || len == 0) {
746       VG_(sprintf)(name, "<unknown>");
747    } else if (sa->sin6_port == 0) {
748       VG_(sprintf)(name, "<unbound>");
749    } else {
750       HChar addr[100];    // large enough
751       inet6_format(addr, (void *)&(sa->sin6_addr));
752       VG_(sprintf)(name, "[%s]:%u", addr, VG_(ntohs)(sa->sin6_port));
753    }
754 
755    return name;
756 }
757 
758 /*
759  * Try get some details about a socket.
760  */
761 static void
getsockdetails(Int fd)762 getsockdetails(Int fd)
763 {
764    union u {
765       struct vki_sockaddr a;
766       struct vki_sockaddr_in in;
767       struct vki_sockaddr_in6 in6;
768       struct vki_sockaddr_un un;
769    } laddr;
770    Int llen;
771 
772    llen = sizeof(laddr);
773    VG_(memset)(&laddr, 0, llen);
774 
775    if(VG_(getsockname)(fd, (struct vki_sockaddr *)&(laddr.a), &llen) != -1) {
776       switch(laddr.a.sa_family) {
777       case VKI_AF_INET: {
778          HChar lname[32];   // large enough
779          HChar pname[32];   // large enough
780          struct vki_sockaddr_in paddr;
781          Int plen = sizeof(struct vki_sockaddr_in);
782 
783          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
784             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> %s\n", fd,
785                          inet_to_name(&(laddr.in), llen, lname),
786                          inet_to_name(&paddr, plen, pname));
787          } else {
788             VG_(message)(Vg_UserMsg, "Open AF_INET socket %d: %s <-> unbound\n",
789                          fd, inet_to_name(&(laddr.in), llen, lname));
790          }
791          return;
792          }
793       case VKI_AF_INET6: {
794          HChar lname[128];  // large enough
795          HChar pname[128];  // large enough
796          struct vki_sockaddr_in6 paddr;
797          Int plen = sizeof(struct vki_sockaddr_in6);
798 
799          if (VG_(getpeername)(fd, (struct vki_sockaddr *)&paddr, &plen) != -1) {
800             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> %s\n", fd,
801                          inet6_to_name(&(laddr.in6), llen, lname),
802                          inet6_to_name(&paddr, plen, pname));
803          } else {
804             VG_(message)(Vg_UserMsg, "Open AF_INET6 socket %d: %s <-> unbound\n",
805                          fd, inet6_to_name(&(laddr.in6), llen, lname));
806          }
807          return;
808          }
809       case VKI_AF_UNIX: {
810          static char lname[256];
811          VG_(message)(Vg_UserMsg, "Open AF_UNIX socket %d: %s\n", fd,
812                       unix_to_name(&(laddr.un), llen, lname));
813          return;
814          }
815       default:
816          VG_(message)(Vg_UserMsg, "Open pf-%d socket %d:\n",
817                       laddr.a.sa_family, fd);
818          return;
819       }
820    }
821 
822    VG_(message)(Vg_UserMsg, "Open socket %d:\n", fd);
823 }
824 
825 
826 /* Dump out a summary, and a more detailed list, of open file descriptors. */
VG_(show_open_fds)827 void VG_(show_open_fds) (const HChar* when)
828 {
829    OpenFd *i = allocated_fds;
830 
831    VG_(message)(Vg_UserMsg, "FILE DESCRIPTORS: %d open %s.\n", fd_count, when);
832 
833    while (i) {
834       if (i->pathname) {
835          VG_(message)(Vg_UserMsg, "Open file descriptor %d: %s\n", i->fd,
836                       i->pathname);
837       } else {
838          Int val;
839          Int len = sizeof(val);
840 
841          if (VG_(getsockopt)(i->fd, VKI_SOL_SOCKET, VKI_SO_TYPE, &val, &len)
842              == -1) {
843             VG_(message)(Vg_UserMsg, "Open file descriptor %d:\n", i->fd);
844          } else {
845             getsockdetails(i->fd);
846          }
847       }
848 
849       if(i->where) {
850          VG_(pp_ExeContext)(i->where);
851          VG_(message)(Vg_UserMsg, "\n");
852       } else {
853          VG_(message)(Vg_UserMsg, "   <inherited from parent>\n");
854          VG_(message)(Vg_UserMsg, "\n");
855       }
856 
857       i = i->next;
858    }
859 
860    VG_(message)(Vg_UserMsg, "\n");
861 }
862 
863 /* If /proc/self/fd doesn't exist (e.g. you've got a Linux kernel that doesn't
864    have /proc support compiled in, or a non-Linux kernel), then we need to
865    find out what file descriptors we inherited from our parent process the
866    hard way - by checking each fd in turn. */
867 static
init_preopened_fds_without_proc_self_fd(void)868 void init_preopened_fds_without_proc_self_fd(void)
869 {
870    struct vki_rlimit lim;
871    UInt count;
872    Int i;
873 
874    if (VG_(getrlimit) (VKI_RLIMIT_NOFILE, &lim) == -1) {
875       /* Hmm.  getrlimit() failed.  Now we're screwed, so just choose
876          an arbitrarily high number.  1024 happens to be the limit in
877          the 2.4 Linux kernels. */
878       count = 1024;
879    } else {
880       count = lim.rlim_cur;
881    }
882 
883    for (i = 0; i < count; i++)
884       if (VG_(fcntl)(i, VKI_F_GETFL, 0) != -1)
885          ML_(record_fd_open_named)(-1, i);
886 }
887 
888 /* Initialize the list of open file descriptors with the file descriptors
889    we inherited from out parent process. */
890 
VG_(init_preopened_fds)891 void VG_(init_preopened_fds)(void)
892 {
893 // DDD: should probably use HAVE_PROC here or similar, instead.
894 #if defined(VGO_linux)
895    Int ret;
896    struct vki_dirent64 d;
897    SysRes f;
898 
899    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
900    if (sr_isError(f)) {
901       init_preopened_fds_without_proc_self_fd();
902       return;
903    }
904 
905    while ((ret = VG_(getdents64)(sr_Res(f), &d, sizeof(d))) != 0) {
906       if (ret == -1)
907          goto out;
908 
909       if (VG_(strcmp)(d.d_name, ".") && VG_(strcmp)(d.d_name, "..")) {
910          HChar* s;
911          Int fno = VG_(strtoll10)(d.d_name, &s);
912          if (*s == '\0') {
913             if (fno != sr_Res(f))
914                if (VG_(clo_track_fds))
915                   ML_(record_fd_open_named)(-1, fno);
916          } else {
917             VG_(message)(Vg_DebugMsg,
918                "Warning: invalid file name in /proc/self/fd: %s\n",
919                d.d_name);
920          }
921       }
922 
923       VG_(lseek)(sr_Res(f), d.d_off, VKI_SEEK_SET);
924    }
925 
926   out:
927    VG_(close)(sr_Res(f));
928 
929 #elif defined(VGO_darwin) || defined(VGO_dragonfly)
930    init_preopened_fds_without_proc_self_fd();
931 
932 #elif defined(VGO_solaris)
933    Int ret;
934    Char buf[VKI_MAXGETDENTS_SIZE];
935    SysRes f;
936 
937    f = VG_(open)("/proc/self/fd", VKI_O_RDONLY, 0);
938    if (sr_isError(f)) {
939       init_preopened_fds_without_proc_self_fd();
940       return;
941    }
942 
943    while ((ret = VG_(getdents64)(sr_Res(f), (struct vki_dirent64 *) buf,
944                                  sizeof(buf))) > 0) {
945       Int i = 0;
946       while (i < ret) {
947          /* Proceed one entry. */
948          struct vki_dirent64 *d = (struct vki_dirent64 *) (buf + i);
949          if (VG_(strcmp)(d->d_name, ".") && VG_(strcmp)(d->d_name, "..")) {
950             HChar *s;
951             Int fno = VG_(strtoll10)(d->d_name, &s);
952             if (*s == '\0') {
953                if (fno != sr_Res(f))
954                   if (VG_(clo_track_fds))
955                      ML_(record_fd_open_named)(-1, fno);
956             } else {
957                VG_(message)(Vg_DebugMsg,
958                      "Warning: invalid file name in /proc/self/fd: %s\n",
959                      d->d_name);
960             }
961          }
962 
963          /* Move on the next entry. */
964          i += d->d_reclen;
965       }
966    }
967 
968    VG_(close)(sr_Res(f));
969 
970 #else
971 #  error Unknown OS
972 #endif
973 }
974 
975 static
pre_mem_read_sendmsg(ThreadId tid,Bool read,const HChar * msg,Addr base,SizeT size)976 void pre_mem_read_sendmsg ( ThreadId tid, Bool read,
977                             const HChar *msg, Addr base, SizeT size )
978 {
979    HChar outmsg[VG_(strlen)(msg) + 10]; // large enough
980    VG_(sprintf)(outmsg, "sendmsg%s", msg);
981    PRE_MEM_READ( outmsg, base, size );
982 }
983 
984 static
pre_mem_write_recvmsg(ThreadId tid,Bool read,const HChar * msg,Addr base,SizeT size)985 void pre_mem_write_recvmsg ( ThreadId tid, Bool read,
986                              const HChar *msg, Addr base, SizeT size )
987 {
988    HChar outmsg[VG_(strlen)(msg) + 10]; // large enough
989    VG_(sprintf)(outmsg, "recvmsg%s", msg);
990    if ( read )
991       PRE_MEM_READ( outmsg, base, size );
992    else
993       PRE_MEM_WRITE( outmsg, base, size );
994 }
995 
996 static
post_mem_write_recvmsg(ThreadId tid,Bool read,const HChar * fieldName,Addr base,SizeT size)997 void post_mem_write_recvmsg ( ThreadId tid, Bool read,
998                               const HChar *fieldName, Addr base, SizeT size )
999 {
1000    if ( !read )
1001       POST_MEM_WRITE( base, size );
1002 }
1003 
1004 static
msghdr_foreachfield(ThreadId tid,const HChar * name,struct vki_msghdr * msg,UInt length,void (* foreach_func)(ThreadId,Bool,const HChar *,Addr,SizeT),Bool rekv)1005 void msghdr_foreachfield (
1006         ThreadId tid,
1007         const HChar *name,
1008         struct vki_msghdr *msg,
1009         UInt length,
1010         void (*foreach_func)( ThreadId, Bool, const HChar *, Addr, SizeT ),
1011         Bool rekv /* "recv" apparently shadows some header decl on OSX108 */
1012      )
1013 {
1014    HChar fieldName[VG_(strlen)(name) + 32]; // large enough.
1015    Addr a;
1016    SizeT s;
1017 
1018    if ( !msg )
1019       return;
1020 
1021    VG_(sprintf) ( fieldName, "(%s)", name );
1022 
1023    /* FIELDPAIR helps the compiler do one call to foreach_func
1024       for consecutive (no holes) fields. */
1025 #define FIELDPAIR(f1,f2) \
1026    if (offsetof(struct vki_msghdr, f1) + sizeof(msg->f1)                \
1027        == offsetof(struct vki_msghdr, f2))                              \
1028       s += sizeof(msg->f2);                                             \
1029    else {                                                               \
1030       foreach_func (tid, True, fieldName, a, s);                        \
1031       a = (Addr)&msg->f2;                                               \
1032       s = sizeof(msg->f2);                                              \
1033    }
1034 
1035    a = (Addr)&msg->msg_name;
1036    s = sizeof(msg->msg_name);
1037    FIELDPAIR(msg_name,    msg_namelen);
1038    FIELDPAIR(msg_namelen, msg_iov);
1039    FIELDPAIR(msg_iov,     msg_iovlen);
1040    FIELDPAIR(msg_iovlen,  msg_control);
1041    FIELDPAIR(msg_control, msg_controllen);
1042    foreach_func ( tid, True, fieldName, a, s);
1043 #undef FIELDPAIR
1044 
1045    /* msg_flags is completely ignored for send_mesg, recv_mesg doesn't read
1046       the field, but does write to it. */
1047    if ( rekv )
1048       foreach_func ( tid, False, fieldName, (Addr)&msg->msg_flags, sizeof( msg->msg_flags ) );
1049 
1050    if ( ML_(safe_to_deref)(&msg->msg_name, sizeof (void *))
1051         && msg->msg_name ) {
1052       VG_(sprintf) ( fieldName, "(%s.msg_name)", name );
1053       foreach_func ( tid, False, fieldName,
1054                      (Addr)msg->msg_name, msg->msg_namelen );
1055    }
1056 
1057    if ( ML_(safe_to_deref)(&msg->msg_iov, sizeof (void *))
1058         && msg->msg_iov ) {
1059       struct vki_iovec *iov = msg->msg_iov;
1060       UInt i;
1061 
1062       if (ML_(safe_to_deref)(&msg->msg_iovlen, sizeof (UInt))) {
1063          VG_(sprintf) ( fieldName, "(%s.msg_iov)", name );
1064          foreach_func ( tid, True, fieldName, (Addr)iov,
1065                         msg->msg_iovlen * sizeof( struct vki_iovec ) );
1066 
1067          for ( i = 0; i < msg->msg_iovlen && length > 0; ++i, ++iov ) {
1068             if (ML_(safe_to_deref)(&iov->iov_len, sizeof (UInt))) {
1069                UInt iov_len = iov->iov_len <= length ? iov->iov_len : length;
1070                VG_(sprintf) ( fieldName, "(%s.msg_iov[%u])", name, i );
1071                foreach_func ( tid, False, fieldName,
1072                               (Addr)iov->iov_base, iov_len );
1073                length = length - iov_len;
1074             }
1075          }
1076       }
1077    }
1078 
1079    if ( ML_(safe_to_deref) (&msg->msg_control, sizeof (void *))
1080         && msg->msg_control ) {
1081       VG_(sprintf) ( fieldName, "(%s.msg_control)", name );
1082       foreach_func ( tid, False, fieldName,
1083                      (Addr)msg->msg_control, msg->msg_controllen );
1084    }
1085 
1086 }
1087 
check_cmsg_for_fds(ThreadId tid,struct vki_msghdr * msg)1088 static void check_cmsg_for_fds(ThreadId tid, struct vki_msghdr *msg)
1089 {
1090    struct vki_cmsghdr *cm = VKI_CMSG_FIRSTHDR(msg);
1091 
1092    while (cm) {
1093       if (cm->cmsg_level == VKI_SOL_SOCKET
1094           && cm->cmsg_type == VKI_SCM_RIGHTS ) {
1095          Int *fds = (Int *) VKI_CMSG_DATA(cm);
1096          Int fdc = (cm->cmsg_len - VKI_CMSG_ALIGN(sizeof(struct vki_cmsghdr)))
1097                          / sizeof(int);
1098          Int i;
1099 
1100          for (i = 0; i < fdc; i++)
1101             if(VG_(clo_track_fds))
1102                // XXX: must we check the range on these fds with
1103                //      ML_(fd_allowed)()?
1104                ML_(record_fd_open_named)(tid, fds[i]);
1105       }
1106 
1107       cm = VKI_CMSG_NXTHDR(msg, cm);
1108    }
1109 }
1110 
1111 /* GrP kernel ignores sa_len (at least on Darwin); this checks the rest */
1112 static
pre_mem_read_sockaddr(ThreadId tid,const HChar * description,struct vki_sockaddr * sa,UInt salen)1113 void pre_mem_read_sockaddr ( ThreadId tid,
1114                              const HChar *description,
1115                              struct vki_sockaddr *sa, UInt salen )
1116 {
1117    HChar outmsg[VG_(strlen)( description ) + 30]; // large enough
1118    struct vki_sockaddr_un*  saun = (struct vki_sockaddr_un *)sa;
1119    struct vki_sockaddr_in*  sin  = (struct vki_sockaddr_in *)sa;
1120    struct vki_sockaddr_in6* sin6 = (struct vki_sockaddr_in6 *)sa;
1121 #  ifdef VKI_AF_BLUETOOTH
1122    struct vki_sockaddr_rc*  rc   = (struct vki_sockaddr_rc *)sa;
1123 #  endif
1124 #  ifdef VKI_AF_NETLINK
1125    struct vki_sockaddr_nl*  nl   = (struct vki_sockaddr_nl *)sa;
1126 #  endif
1127 
1128    /* NULL/zero-length sockaddrs are legal */
1129    if ( sa == NULL || salen == 0 ) return;
1130 
1131    VG_(sprintf) ( outmsg, description, "sa_family" );
1132    PRE_MEM_READ( outmsg, (Addr) &sa->sa_family, sizeof(vki_sa_family_t));
1133 #if defined(VGO_dragonfly)
1134    VG_(sprintf) ( outmsg, description, ".sa_len" );
1135    PRE_MEM_READ( outmsg, (Addr) &sa->sa_len, sizeof(char));
1136 #endif
1137 
1138    /* Don't do any extra checking if we cannot determine the sa_family. */
1139    if (! ML_(safe_to_deref) (&sa->sa_family, sizeof(vki_sa_family_t)))
1140       return;
1141 
1142    switch (sa->sa_family) {
1143 
1144       case VKI_AF_UNIX:
1145          if (ML_(safe_to_deref) (&saun->sun_path, sizeof (Addr))) {
1146             VG_(sprintf) ( outmsg, description, "sun_path" );
1147             PRE_MEM_RASCIIZ( outmsg, (Addr) saun->sun_path );
1148             // GrP fixme max of sun_len-2? what about nul char?
1149          }
1150          break;
1151 
1152       case VKI_AF_INET:
1153          VG_(sprintf) ( outmsg, description, "sin_port" );
1154          PRE_MEM_READ( outmsg, (Addr) &sin->sin_port, sizeof (sin->sin_port) );
1155          VG_(sprintf) ( outmsg, description, "sin_addr" );
1156          PRE_MEM_READ( outmsg, (Addr) &sin->sin_addr, sizeof (sin->sin_addr) );
1157          break;
1158 
1159       case VKI_AF_INET6:
1160          VG_(sprintf) ( outmsg, description, "sin6_port" );
1161          PRE_MEM_READ( outmsg,
1162             (Addr) &sin6->sin6_port, sizeof (sin6->sin6_port) );
1163          VG_(sprintf) ( outmsg, description, "sin6_flowinfo" );
1164          PRE_MEM_READ( outmsg,
1165             (Addr) &sin6->sin6_flowinfo, sizeof (sin6->sin6_flowinfo) );
1166          VG_(sprintf) ( outmsg, description, "sin6_addr" );
1167          PRE_MEM_READ( outmsg,
1168             (Addr) &sin6->sin6_addr, sizeof (sin6->sin6_addr) );
1169          VG_(sprintf) ( outmsg, description, "sin6_scope_id" );
1170          PRE_MEM_READ( outmsg,
1171             (Addr) &sin6->sin6_scope_id, sizeof (sin6->sin6_scope_id) );
1172          break;
1173 
1174 #     ifdef VKI_AF_BLUETOOTH
1175       case VKI_AF_BLUETOOTH:
1176          VG_(sprintf) ( outmsg, description, "rc_bdaddr" );
1177          PRE_MEM_READ( outmsg, (Addr) &rc->rc_bdaddr, sizeof (rc->rc_bdaddr) );
1178          VG_(sprintf) ( outmsg, description, "rc_channel" );
1179          PRE_MEM_READ( outmsg, (Addr) &rc->rc_channel, sizeof (rc->rc_channel) );
1180          break;
1181 #     endif
1182 
1183 #     ifdef VKI_AF_NETLINK
1184       case VKI_AF_NETLINK:
1185          VG_(sprintf)(outmsg, description, "nl_pid");
1186          PRE_MEM_READ(outmsg, (Addr)&nl->nl_pid, sizeof(nl->nl_pid));
1187          VG_(sprintf)(outmsg, description, "nl_groups");
1188          PRE_MEM_READ(outmsg, (Addr)&nl->nl_groups, sizeof(nl->nl_groups));
1189          break;
1190 #     endif
1191 
1192 #     ifdef VKI_AF_UNSPEC
1193       case VKI_AF_UNSPEC:
1194          break;
1195 #     endif
1196 
1197       default:
1198          /* No specific information about this address family.
1199             Let's just check the full data following the family.
1200             Note that this can give false positive if this (unknown)
1201             struct sockaddr_???? has padding bytes between its elements. */
1202          VG_(sprintf) ( outmsg, description, "sa_data" );
1203          PRE_MEM_READ( outmsg, (Addr)&sa->sa_family + sizeof(sa->sa_family),
1204                        salen -  sizeof(sa->sa_family));
1205          break;
1206    }
1207 }
1208 
1209 /* Dereference a pointer to a UInt. */
deref_UInt(ThreadId tid,Addr a,const HChar * s)1210 static UInt deref_UInt ( ThreadId tid, Addr a, const HChar* s )
1211 {
1212    UInt* a_p = (UInt*)a;
1213    PRE_MEM_READ( s, (Addr)a_p, sizeof(UInt) );
1214    if (a_p == NULL || ! ML_(safe_to_deref) (a_p, sizeof(UInt)))
1215       return 0;
1216    else
1217       return *a_p;
1218 }
1219 
ML_(buf_and_len_pre_check)1220 void ML_(buf_and_len_pre_check) ( ThreadId tid, Addr buf_p, Addr buflen_p,
1221                                   const HChar* buf_s, const HChar* buflen_s )
1222 {
1223    if (VG_(tdict).track_pre_mem_write) {
1224       UInt buflen_in = deref_UInt( tid, buflen_p, buflen_s);
1225       if (buflen_in > 0) {
1226          VG_(tdict).track_pre_mem_write(
1227             Vg_CoreSysCall, tid, buf_s, buf_p, buflen_in );
1228       }
1229    }
1230 }
1231 
ML_(buf_and_len_post_check)1232 void ML_(buf_and_len_post_check) ( ThreadId tid, SysRes res,
1233                                    Addr buf_p, Addr buflen_p, const HChar* s )
1234 {
1235    if (!sr_isError(res) && VG_(tdict).track_post_mem_write) {
1236       UInt buflen_out = deref_UInt( tid, buflen_p, s);
1237       if (buflen_out > 0 && buf_p != (Addr)NULL) {
1238          VG_(tdict).track_post_mem_write( Vg_CoreSysCall, tid, buf_p, buflen_out );
1239       }
1240    }
1241 }
1242 
1243 /* ---------------------------------------------------------------------
1244    Data seg end, for brk()
1245    ------------------------------------------------------------------ */
1246 
1247 /*   +--------+------------+
1248      | anon   |    resvn   |
1249      +--------+------------+
1250 
1251      ^     ^  ^
1252      |     |  boundary is page aligned
1253      |     VG_(brk_limit) -- no alignment constraint
1254      VG_(brk_base) -- page aligned -- does not move
1255 
1256      Both the anon part and the reservation part are always at least
1257      one page.
1258 */
1259 
1260 /* Set the new data segment end to NEWBRK.  If this succeeds, return
1261    NEWBRK, else return the current data segment end. */
1262 
do_brk(Addr newbrk,ThreadId tid)1263 static Addr do_brk ( Addr newbrk, ThreadId tid )
1264 {
1265    NSegment const* aseg;
1266    Addr newbrkP;
1267    SizeT delta;
1268    Bool debug = False;
1269 
1270    if (debug)
1271       VG_(printf)("\ndo_brk: brk_base=%#lx brk_limit=%#lx newbrk=%#lx\n",
1272 		  VG_(brk_base), VG_(brk_limit), newbrk);
1273 
1274    if (0) VG_(am_show_nsegments)(0, "in_brk");
1275 
1276    if (newbrk < VG_(brk_base))
1277       /* Clearly impossible. */
1278       goto bad;
1279 
1280    if (newbrk < VG_(brk_limit)) {
1281       /* shrinking the data segment.  Be lazy and don't munmap the
1282          excess area. */
1283       NSegment const * seg = VG_(am_find_nsegment)(newbrk);
1284       vg_assert(seg);
1285 
1286       if (seg->hasT)
1287          VG_(discard_translations)( newbrk, VG_(brk_limit) - newbrk,
1288                                     "do_brk(shrink)" );
1289       /* Since we're being lazy and not unmapping pages, we have to
1290          zero out the area, so that if the area later comes back into
1291          circulation, it will be filled with zeroes, as if it really
1292          had been unmapped and later remapped.  Be a bit paranoid and
1293          try hard to ensure we're not going to segfault by doing the
1294          write - check both ends of the range are in the same segment
1295          and that segment is writable. */
1296       NSegment const * seg2;
1297 
1298       seg2 = VG_(am_find_nsegment)( VG_(brk_limit) - 1 );
1299       vg_assert(seg2);
1300 
1301       if (seg == seg2 && seg->hasW)
1302          VG_(memset)( (void*)newbrk, 0, VG_(brk_limit) - newbrk );
1303 
1304       VG_(brk_limit) = newbrk;
1305       return newbrk;
1306    }
1307 
1308    /* otherwise we're expanding the brk segment. */
1309    if (VG_(brk_limit) > VG_(brk_base))
1310       aseg = VG_(am_find_nsegment)( VG_(brk_limit)-1 );
1311    else
1312       aseg = VG_(am_find_nsegment)( VG_(brk_limit) );
1313 
1314    /* These should be assured by setup_client_dataseg in m_main. */
1315    vg_assert(aseg);
1316    vg_assert(aseg->kind == SkAnonC);
1317 
1318    if (newbrk <= aseg->end + 1) {
1319       /* still fits within the anon segment. */
1320       VG_(brk_limit) = newbrk;
1321       return newbrk;
1322    }
1323 
1324    newbrkP = VG_PGROUNDUP(newbrk);
1325    delta = newbrkP - (aseg->end + 1);
1326    vg_assert(delta > 0);
1327    vg_assert(VG_IS_PAGE_ALIGNED(delta));
1328 
1329    Bool overflow = False;
1330    if (! VG_(am_extend_into_adjacent_reservation_client)( aseg->start, delta,
1331                                                           &overflow)) {
1332       if (overflow) {
1333          static Bool alreadyComplained = False;
1334          if (!alreadyComplained) {
1335             alreadyComplained = True;
1336             if (VG_(clo_verbosity) > 0) {
1337                VG_(umsg)("brk segment overflow in thread #%u: "
1338                          "can't grow to %#lx\n",
1339                          tid, newbrkP);
1340                VG_(umsg)("(see section Limitations in user manual)\n");
1341                VG_(umsg)("NOTE: further instances of this message "
1342                          "will not be shown\n");
1343             }
1344          }
1345       } else {
1346          if (VG_(clo_verbosity) > 0) {
1347             VG_(umsg)("Cannot map memory to grow brk segment in thread #%u "
1348                       "to %#lx\n", tid, newbrkP);
1349             VG_(umsg)("(see section Limitations in user manual)\n");
1350          }
1351       }
1352       goto bad;
1353    }
1354 
1355    VG_(brk_limit) = newbrk;
1356    return newbrk;
1357 
1358   bad:
1359    return VG_(brk_limit);
1360 }
1361 
1362 
1363 /* ---------------------------------------------------------------------
1364    Vet file descriptors for sanity
1365    ------------------------------------------------------------------ */
1366 /*
1367 > - what does the "Bool soft" parameter mean?
1368 
1369 (Tom Hughes, 3 Oct 05):
1370 
1371 Whether or not to consider a file descriptor invalid if it is above
1372 the current soft limit.
1373 
1374 Basically if we are testing whether a newly created file descriptor is
1375 valid (in a post handler) then we set soft to true, and if we are
1376 testing whether a file descriptor that is about to be used (in a pre
1377 handler) is valid [viz, an already-existing fd] then we set it to false.
1378 
1379 The point is that if the (virtual) soft limit is lowered then any
1380 existing descriptors can still be read/written/closed etc (so long as
1381 they are below the valgrind reserved descriptors) but no new
1382 descriptors can be created above the new soft limit.
1383 
1384 (jrs 4 Oct 05: in which case, I've renamed it "isNewFd")
1385 */
1386 
1387 /* Return true if we're allowed to use or create this fd */
ML_(fd_allowed)1388 Bool ML_(fd_allowed)(Int fd, const HChar *syscallname, ThreadId tid,
1389                      Bool isNewFd)
1390 {
1391    Bool allowed = True;
1392 
1393    /* hard limits always apply */
1394    if (fd < 0 || fd >= VG_(fd_hard_limit))
1395       allowed = False;
1396 
1397    /* hijacking the output fds is never allowed */
1398    if (fd == VG_(log_output_sink).fd || fd == VG_(xml_output_sink).fd)
1399       allowed = False;
1400 
1401    /* if creating a new fd (rather than using an existing one), the
1402       soft limit must also be observed */
1403    if (isNewFd && fd >= VG_(fd_soft_limit))
1404       allowed = False;
1405 
1406    /* this looks like it ought to be included, but causes problems: */
1407    /*
1408    if (fd == 2 && VG_(debugLog_getLevel)() > 0)
1409       allowed = False;
1410    */
1411    /* The difficulty is as follows: consider a program P which expects
1412       to be able to mess with (redirect) its own stderr (fd 2).
1413       Usually to deal with P we would issue command line flags to send
1414       logging somewhere other than stderr, so as not to disrupt P.
1415       The problem is that -d unilaterally hijacks stderr with no
1416       consultation with P.  And so, if this check is enabled, P will
1417       work OK normally but fail if -d is issued.
1418 
1419       Basically -d is a hack and you take your chances when using it.
1420       It's very useful for low level debugging -- particularly at
1421       startup -- and having its presence change the behaviour of the
1422       client is exactly what we don't want.  */
1423 
1424    /* croak? */
1425    if ((!allowed) && VG_(showing_core_errors)() ) {
1426       VG_(message)(Vg_UserMsg,
1427          "Warning: invalid file descriptor %d in syscall %s()\n",
1428          fd, syscallname);
1429       if (fd == VG_(log_output_sink).fd && VG_(log_output_sink).fd >= 0)
1430 	 VG_(message)(Vg_UserMsg,
1431             "   Use --log-fd=<number> to select an alternative log fd.\n");
1432       if (fd == VG_(xml_output_sink).fd && VG_(xml_output_sink).fd >= 0)
1433 	 VG_(message)(Vg_UserMsg,
1434             "   Use --xml-fd=<number> to select an alternative XML "
1435             "output fd.\n");
1436       // DDD: consider always printing this stack trace, it's useful.
1437       // Also consider also making this a proper core error, ie.
1438       // suppressible and all that.
1439       if (VG_(clo_verbosity) > 1) {
1440          VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1441       }
1442    }
1443 
1444    return allowed;
1445 }
1446 
1447 
1448 /* ---------------------------------------------------------------------
1449    Deal with a bunch of socket-related syscalls
1450    ------------------------------------------------------------------ */
1451 
1452 /* ------ */
1453 
1454 void
ML_(generic_PRE_sys_socketpair)1455 ML_(generic_PRE_sys_socketpair) ( ThreadId tid,
1456                                   UWord arg0, UWord arg1,
1457                                   UWord arg2, UWord arg3 )
1458 {
1459    /* int socketpair(int d, int type, int protocol, int sv[2]); */
1460    PRE_MEM_WRITE( "socketcall.socketpair(sv)",
1461                   arg3, 2*sizeof(int) );
1462 }
1463 
1464 SysRes
ML_(generic_POST_sys_socketpair)1465 ML_(generic_POST_sys_socketpair) ( ThreadId tid,
1466                                    SysRes res,
1467                                    UWord arg0, UWord arg1,
1468                                    UWord arg2, UWord arg3 )
1469 {
1470    SysRes r = res;
1471    Int fd1 = ((Int*)arg3)[0];
1472    Int fd2 = ((Int*)arg3)[1];
1473    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1474    POST_MEM_WRITE( arg3, 2*sizeof(int) );
1475    if (!ML_(fd_allowed)(fd1, "socketcall.socketpair", tid, True) ||
1476        !ML_(fd_allowed)(fd2, "socketcall.socketpair", tid, True)) {
1477       VG_(close)(fd1);
1478       VG_(close)(fd2);
1479       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1480    } else {
1481       POST_MEM_WRITE( arg3, 2*sizeof(int) );
1482       if (VG_(clo_track_fds)) {
1483          ML_(record_fd_open_nameless)(tid, fd1);
1484          ML_(record_fd_open_nameless)(tid, fd2);
1485       }
1486    }
1487    return r;
1488 }
1489 
1490 /* ------ */
1491 
1492 SysRes
ML_(generic_POST_sys_socket)1493 ML_(generic_POST_sys_socket) ( ThreadId tid, SysRes res )
1494 {
1495    SysRes r = res;
1496    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1497    if (!ML_(fd_allowed)(sr_Res(res), "socket", tid, True)) {
1498       VG_(close)(sr_Res(res));
1499       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1500    } else {
1501       if (VG_(clo_track_fds))
1502          ML_(record_fd_open_nameless)(tid, sr_Res(res));
1503    }
1504    return r;
1505 }
1506 
1507 /* ------ */
1508 
1509 void
ML_(generic_PRE_sys_bind)1510 ML_(generic_PRE_sys_bind) ( ThreadId tid,
1511                             UWord arg0, UWord arg1, UWord arg2 )
1512 {
1513    /* int bind(int sockfd, struct sockaddr *my_addr,
1514                int addrlen); */
1515    pre_mem_read_sockaddr(
1516       tid, "socketcall.bind(my_addr.%s)",
1517       (struct vki_sockaddr *) arg1, arg2
1518    );
1519 }
1520 
1521 /* ------ */
1522 
1523 void
ML_(generic_PRE_sys_accept)1524 ML_(generic_PRE_sys_accept) ( ThreadId tid,
1525                               UWord arg0, UWord arg1, UWord arg2 )
1526 {
1527    /* int accept(int s, struct sockaddr *addr, int *addrlen); */
1528    Addr addr_p     = arg1;
1529    Addr addrlen_p  = arg2;
1530    if (addr_p != (Addr)NULL)
1531       ML_(buf_and_len_pre_check) ( tid, addr_p, addrlen_p,
1532                                    "socketcall.accept(addr)",
1533                                    "socketcall.accept(addrlen_in)" );
1534 }
1535 
1536 SysRes
ML_(generic_POST_sys_accept)1537 ML_(generic_POST_sys_accept) ( ThreadId tid,
1538                                SysRes res,
1539                                UWord arg0, UWord arg1, UWord arg2 )
1540 {
1541    SysRes r = res;
1542    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1543    if (!ML_(fd_allowed)(sr_Res(res), "accept", tid, True)) {
1544       VG_(close)(sr_Res(res));
1545       r = VG_(mk_SysRes_Error)( VKI_EMFILE );
1546    } else {
1547       Addr addr_p     = arg1;
1548       Addr addrlen_p  = arg2;
1549       if (addr_p != (Addr)NULL)
1550          ML_(buf_and_len_post_check) ( tid, res, addr_p, addrlen_p,
1551                                        "socketcall.accept(addrlen_out)" );
1552       if (VG_(clo_track_fds))
1553           ML_(record_fd_open_nameless)(tid, sr_Res(res));
1554    }
1555    return r;
1556 }
1557 
1558 /* ------ */
1559 
1560 void
ML_(generic_PRE_sys_sendto)1561 ML_(generic_PRE_sys_sendto) ( ThreadId tid,
1562                               UWord arg0, UWord arg1, UWord arg2,
1563                               UWord arg3, UWord arg4, UWord arg5 )
1564 {
1565    /* int sendto(int s, const void *msg, int len,
1566                  unsigned int flags,
1567                  const struct sockaddr *to, int tolen); */
1568    PRE_MEM_READ( "socketcall.sendto(msg)",
1569                  arg1, /* msg */
1570                  arg2  /* len */ );
1571    pre_mem_read_sockaddr(
1572       tid, "socketcall.sendto(to.%s)",
1573       (struct vki_sockaddr *) arg4, arg5
1574    );
1575 }
1576 
1577 /* ------ */
1578 
1579 void
ML_(generic_PRE_sys_send)1580 ML_(generic_PRE_sys_send) ( ThreadId tid,
1581                             UWord arg0, UWord arg1, UWord arg2 )
1582 {
1583    /* int send(int s, const void *msg, size_t len, int flags); */
1584    PRE_MEM_READ( "socketcall.send(msg)",
1585                   arg1, /* msg */
1586                   arg2  /* len */ );
1587 
1588 }
1589 
1590 /* ------ */
1591 
1592 void
ML_(generic_PRE_sys_recvfrom)1593 ML_(generic_PRE_sys_recvfrom) ( ThreadId tid,
1594                                 UWord arg0, UWord arg1, UWord arg2,
1595                                 UWord arg3, UWord arg4, UWord arg5 )
1596 {
1597    /* int recvfrom(int s, void *buf, int len, unsigned int flags,
1598                    struct sockaddr *from, int *fromlen); */
1599    Addr buf_p      = arg1;
1600    Int  len        = arg2;
1601    Addr from_p     = arg4;
1602    Addr fromlen_p  = arg5;
1603    PRE_MEM_WRITE( "socketcall.recvfrom(buf)", buf_p, len );
1604    if (from_p != (Addr)NULL)
1605       ML_(buf_and_len_pre_check) ( tid, from_p, fromlen_p,
1606                                    "socketcall.recvfrom(from)",
1607                                    "socketcall.recvfrom(fromlen_in)" );
1608 }
1609 
1610 void
ML_(generic_POST_sys_recvfrom)1611 ML_(generic_POST_sys_recvfrom) ( ThreadId tid,
1612                                  SysRes res,
1613                                  UWord arg0, UWord arg1, UWord arg2,
1614                                  UWord arg3, UWord arg4, UWord arg5 )
1615 {
1616    Addr buf_p      = arg1;
1617    Int  len        = arg2;
1618    Addr from_p     = arg4;
1619    Addr fromlen_p  = arg5;
1620 
1621    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1622    if (from_p != (Addr)NULL)
1623       ML_(buf_and_len_post_check) ( tid, res, from_p, fromlen_p,
1624                                     "socketcall.recvfrom(fromlen_out)" );
1625    POST_MEM_WRITE( buf_p, len );
1626 }
1627 
1628 /* ------ */
1629 
1630 void
ML_(generic_PRE_sys_recv)1631 ML_(generic_PRE_sys_recv) ( ThreadId tid,
1632                             UWord arg0, UWord arg1, UWord arg2 )
1633 {
1634    /* int recv(int s, void *buf, int len, unsigned int flags); */
1635    /* man 2 recv says:
1636       The  recv call is normally used only on a connected socket
1637       (see connect(2)) and is identical to recvfrom with a  NULL
1638       from parameter.
1639    */
1640    PRE_MEM_WRITE( "socketcall.recv(buf)",
1641                   arg1, /* buf */
1642                   arg2  /* len */ );
1643 }
1644 
1645 void
ML_(generic_POST_sys_recv)1646 ML_(generic_POST_sys_recv) ( ThreadId tid,
1647                              UWord res,
1648                              UWord arg0, UWord arg1, UWord arg2 )
1649 {
1650    if (res >= 0 && arg1 != 0) {
1651       POST_MEM_WRITE( arg1, /* buf */
1652                       arg2  /* len */ );
1653    }
1654 }
1655 
1656 /* ------ */
1657 
1658 void
ML_(generic_PRE_sys_connect)1659 ML_(generic_PRE_sys_connect) ( ThreadId tid,
1660                                UWord arg0, UWord arg1, UWord arg2 )
1661 {
1662    /* int connect(int sockfd,
1663                   struct sockaddr *serv_addr, int addrlen ); */
1664    pre_mem_read_sockaddr( tid,
1665                           "socketcall.connect(serv_addr.%s)",
1666                           (struct vki_sockaddr *) arg1, arg2);
1667 }
1668 
1669 /* ------ */
1670 
1671 void
ML_(generic_PRE_sys_setsockopt)1672 ML_(generic_PRE_sys_setsockopt) ( ThreadId tid,
1673                                   UWord arg0, UWord arg1, UWord arg2,
1674                                   UWord arg3, UWord arg4 )
1675 {
1676    /* int setsockopt(int s, int level, int optname,
1677                      const void *optval, int optlen); */
1678    PRE_MEM_READ( "socketcall.setsockopt(optval)",
1679                  arg3, /* optval */
1680                  arg4  /* optlen */ );
1681 }
1682 
1683 /* ------ */
1684 
1685 void
ML_(generic_PRE_sys_getsockname)1686 ML_(generic_PRE_sys_getsockname) ( ThreadId tid,
1687                                    UWord arg0, UWord arg1, UWord arg2 )
1688 {
1689    /* int getsockname(int s, struct sockaddr* name, int* namelen) */
1690    Addr name_p     = arg1;
1691    Addr namelen_p  = arg2;
1692    /* Nb: name_p cannot be NULL */
1693    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1694                                 "socketcall.getsockname(name)",
1695                                 "socketcall.getsockname(namelen_in)" );
1696 }
1697 
1698 void
ML_(generic_POST_sys_getsockname)1699 ML_(generic_POST_sys_getsockname) ( ThreadId tid,
1700                                     SysRes res,
1701                                     UWord arg0, UWord arg1, UWord arg2 )
1702 {
1703    Addr name_p     = arg1;
1704    Addr namelen_p  = arg2;
1705    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1706    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1707                                  "socketcall.getsockname(namelen_out)" );
1708 }
1709 
1710 /* ------ */
1711 
1712 void
ML_(generic_PRE_sys_getpeername)1713 ML_(generic_PRE_sys_getpeername) ( ThreadId tid,
1714                                    UWord arg0, UWord arg1, UWord arg2 )
1715 {
1716    /* int getpeername(int s, struct sockaddr* name, int* namelen) */
1717    Addr name_p     = arg1;
1718    Addr namelen_p  = arg2;
1719    /* Nb: name_p cannot be NULL */
1720    ML_(buf_and_len_pre_check) ( tid, name_p, namelen_p,
1721                                 "socketcall.getpeername(name)",
1722                                 "socketcall.getpeername(namelen_in)" );
1723 }
1724 
1725 void
ML_(generic_POST_sys_getpeername)1726 ML_(generic_POST_sys_getpeername) ( ThreadId tid,
1727                                     SysRes res,
1728                                     UWord arg0, UWord arg1, UWord arg2 )
1729 {
1730    Addr name_p     = arg1;
1731    Addr namelen_p  = arg2;
1732    vg_assert(!sr_isError(res)); /* guaranteed by caller */
1733    ML_(buf_and_len_post_check) ( tid, res, name_p, namelen_p,
1734                                  "socketcall.getpeername(namelen_out)" );
1735 }
1736 
1737 /* ------ */
1738 
1739 void
ML_(generic_PRE_sys_sendmsg)1740 ML_(generic_PRE_sys_sendmsg) ( ThreadId tid, const HChar *name,
1741                                struct vki_msghdr *msg )
1742 {
1743    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_read_sendmsg, False );
1744 }
1745 
1746 /* ------ */
1747 
1748 void
ML_(generic_PRE_sys_recvmsg)1749 ML_(generic_PRE_sys_recvmsg) ( ThreadId tid, const HChar *name,
1750                                struct vki_msghdr *msg )
1751 {
1752    msghdr_foreachfield ( tid, name, msg, ~0, pre_mem_write_recvmsg, True );
1753 }
1754 
1755 void
ML_(generic_POST_sys_recvmsg)1756 ML_(generic_POST_sys_recvmsg) ( ThreadId tid, const HChar *name,
1757                                 struct vki_msghdr *msg, UInt length )
1758 {
1759    msghdr_foreachfield( tid, name, msg, length, post_mem_write_recvmsg, True );
1760    check_cmsg_for_fds( tid, msg );
1761 }
1762 
1763 
1764 /* ---------------------------------------------------------------------
1765    Deal with a bunch of IPC related syscalls
1766    ------------------------------------------------------------------ */
1767 
1768 /* ------ */
1769 
1770 void
ML_(generic_PRE_sys_semop)1771 ML_(generic_PRE_sys_semop) ( ThreadId tid,
1772                              UWord arg0, UWord arg1, UWord arg2 )
1773 {
1774    /* int semop(int semid, struct sembuf *sops, unsigned nsops); */
1775    PRE_MEM_READ( "semop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1776 }
1777 
1778 /* ------ */
1779 
1780 void
ML_(generic_PRE_sys_semtimedop)1781 ML_(generic_PRE_sys_semtimedop) ( ThreadId tid,
1782                                   UWord arg0, UWord arg1,
1783                                   UWord arg2, UWord arg3 )
1784 {
1785    /* int semtimedop(int semid, struct sembuf *sops, unsigned nsops,
1786                      struct timespec *timeout); */
1787    PRE_MEM_READ( "semtimedop(sops)", arg1, arg2 * sizeof(struct vki_sembuf) );
1788    if (arg3 != 0)
1789       PRE_MEM_READ( "semtimedop(timeout)", arg3, sizeof(struct vki_timespec) );
1790 }
1791 
1792 /* ------ */
1793 
1794 static
get_sem_count(Int semid)1795 UInt get_sem_count( Int semid )
1796 {
1797    union vki_semun arg;
1798    SysRes res;
1799 
1800 #  if defined(__NR_semctl)
1801 #  if defined(VGO_darwin)
1802    /* Darwin has no specific 64 bit semid_ds, but has __NR_semctl. */
1803    struct vki_semid_ds buf;
1804    arg.buf = &buf;
1805 #  else
1806    struct vki_semid64_ds buf;
1807    arg.buf64 = &buf;
1808 #  endif
1809    res = VG_(do_syscall4)(__NR_semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1810    if (sr_isError(res))
1811       return 0;
1812 
1813    return buf.sem_nsems;
1814 
1815 #  elif defined(__NR___semctl)
1816    struct vki_semid_ds buf;
1817    arg.buf = &buf;
1818    res = VG_(do_syscall4)(__NR___semctl, semid, 0, VKI_IPC_STAT, *(UWord *)&arg);
1819    if (sr_isError(res))
1820       return 0;
1821 
1822    return buf.sem_nsems;
1823 
1824 #  elif defined(__NR_semsys) /* Solaris */
1825    struct vki_semid_ds buf;
1826    arg.buf = &buf;
1827    res = VG_(do_syscall5)(__NR_semsys, VKI_SEMCTL, semid, 0, VKI_IPC_STAT,
1828                           *(UWord *)&arg);
1829    if (sr_isError(res))
1830       return 0;
1831 
1832    return buf.sem_nsems;
1833 
1834 #  else
1835    struct vki_semid_ds buf;
1836    arg.buf = &buf;
1837    res = VG_(do_syscall5)(__NR_ipc, 3 /* IPCOP_semctl */, semid, 0,
1838                           VKI_IPC_STAT, (UWord)&arg);
1839    if (sr_isError(res))
1840       return 0;
1841 
1842    return buf.sem_nsems;
1843 #  endif
1844 }
1845 
1846 void
ML_(generic_PRE_sys_semctl)1847 ML_(generic_PRE_sys_semctl) ( ThreadId tid,
1848                               UWord arg0, UWord arg1,
1849                               UWord arg2, UWord arg3 )
1850 {
1851    /* int semctl(int semid, int semnum, int cmd, ...); */
1852    union vki_semun arg = *(union vki_semun *)&arg3;
1853    UInt nsems;
1854    switch (arg2 /* cmd */) {
1855 #if defined(VKI_IPC_INFO)
1856    case VKI_IPC_INFO:
1857    case VKI_SEM_INFO:
1858 #if defined(VKI_IPC_64)
1859    case VKI_IPC_INFO|VKI_IPC_64:
1860    case VKI_SEM_INFO|VKI_IPC_64:
1861 #endif
1862 #if !defined(VGO_dragonfly)
1863       PRE_MEM_WRITE( "semctl(IPC_INFO, arg.buf)",
1864                      (Addr)arg.buf, sizeof(struct vki_seminfo) );
1865 #endif
1866       break;
1867 
1868    case VKI_IPC_STAT:
1869 #if defined(VKI_SEM_STAT)
1870    case VKI_SEM_STAT:
1871 #endif
1872       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1873                      (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1874       break;
1875 
1876 #if defined(VKI_IPC_64)
1877    case VKI_IPC_STAT|VKI_IPC_64:
1878 #if defined(VKI_SEM_STAT)
1879    case VKI_SEM_STAT|VKI_IPC_64:
1880 #endif
1881 #endif
1882 #if defined(VKI_IPC_STAT64)
1883    case VKI_IPC_STAT64:
1884 #endif
1885 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
1886       PRE_MEM_WRITE( "semctl(IPC_STAT, arg.buf)",
1887                      (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1888       break;
1889 #endif
1890 
1891    case VKI_IPC_SET:
1892       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1893                     (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1894       break;
1895 
1896 #if defined(VKI_IPC_64)
1897    case VKI_IPC_SET|VKI_IPC_64:
1898 #endif
1899 #if defined(VKI_IPC_SET64)
1900    case VKI_IPC_SET64:
1901 #endif
1902 #if defined(VKI_IPC64) || defined(VKI_IPC_SET64)
1903       PRE_MEM_READ( "semctl(IPC_SET, arg.buf)",
1904                     (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1905       break;
1906 #endif
1907 
1908    case VKI_GETALL:
1909 #if defined(VKI_IPC_64)
1910    case VKI_GETALL|VKI_IPC_64:
1911 #endif
1912       nsems = get_sem_count( arg0 );
1913       PRE_MEM_WRITE( "semctl(IPC_GETALL, arg.array)",
1914                      (Addr)arg.array, sizeof(unsigned short) * nsems );
1915       break;
1916 
1917    case VKI_SETALL:
1918 #if defined(VKI_IPC_64)
1919    case VKI_SETALL|VKI_IPC_64:
1920 #endif
1921       nsems = get_sem_count( arg0 );
1922       PRE_MEM_READ( "semctl(IPC_SETALL, arg.array)",
1923                     (Addr)arg.array, sizeof(unsigned short) * nsems );
1924       break;
1925    }
1926 }
1927 
1928 void
ML_(generic_POST_sys_semctl)1929 ML_(generic_POST_sys_semctl) ( ThreadId tid,
1930                                UWord res,
1931                                UWord arg0, UWord arg1,
1932                                UWord arg2, UWord arg3 )
1933 {
1934    union vki_semun arg = *(union vki_semun *)&arg3;
1935    UInt nsems;
1936    switch (arg2 /* cmd */) {
1937 #if defined(VKI_IPC_INFO)
1938    case VKI_IPC_INFO:
1939    case VKI_SEM_INFO:
1940 #if defined(VKI_IPC_64)
1941    case VKI_IPC_INFO|VKI_IPC_64:
1942    case VKI_SEM_INFO|VKI_IPC_64:
1943 #endif
1944 #if !defined(VGO_dragonfly)
1945       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_seminfo) );
1946 #endif
1947       break;
1948 #endif
1949 
1950    case VKI_IPC_STAT:
1951 #if defined(VKI_SEM_STAT)
1952    case VKI_SEM_STAT:
1953 #endif
1954       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid_ds) );
1955       break;
1956 
1957 #if defined(VKI_IPC_64)
1958    case VKI_IPC_STAT|VKI_IPC_64:
1959    case VKI_SEM_STAT|VKI_IPC_64:
1960 #endif
1961 #if defined(VKI_IPC_STAT64)
1962    case VKI_IPC_STAT64:
1963 #endif
1964 #if defined(VKI_IPC_64) || defined(VKI_IPC_STAT64)
1965       POST_MEM_WRITE( (Addr)arg.buf, sizeof(struct vki_semid64_ds) );
1966       break;
1967 #endif
1968 
1969    case VKI_GETALL:
1970 #if defined(VKI_IPC_64)
1971    case VKI_GETALL|VKI_IPC_64:
1972 #endif
1973       nsems = get_sem_count( arg0 );
1974       POST_MEM_WRITE( (Addr)arg.array, sizeof(unsigned short) * nsems );
1975       break;
1976    }
1977 }
1978 #endif
1979 
1980 /* ------ */
1981 
1982 /* ------ */
1983 
1984 static
get_shm_size(Int shmid)1985 SizeT get_shm_size ( Int shmid )
1986 {
1987 #if defined(__NR_shmctl)
1988 #  ifdef VKI_IPC_64
1989    struct vki_shmid64_ds buf;
1990 #    if defined(VGP_amd64_linux) || defined(VGP_arm64_linux)
1991      /* See bug 222545 comment 7 */
1992      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1993                                      VKI_IPC_STAT, (UWord)&buf);
1994 #    else
1995      SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid,
1996                                      VKI_IPC_STAT|VKI_IPC_64, (UWord)&buf);
1997 #    endif
1998 #  else /* !def VKI_IPC_64 */
1999    struct vki_shmid_ds buf;
2000    SysRes __res = VG_(do_syscall3)(__NR_shmctl, shmid, VKI_IPC_STAT, (UWord)&buf);
2001 #  endif /* def VKI_IPC_64 */
2002 #elif defined(__NR_shmsys) /* Solaris */
2003    struct vki_shmid_ds buf;
2004    SysRes __res = VG_(do_syscall4)(__NR_shmsys, VKI_SHMCTL, shmid, VKI_IPC_STAT,
2005                          (UWord)&buf);
2006 #else
2007    struct vki_shmid_ds buf;
2008    SysRes __res = VG_(do_syscall5)(__NR_ipc, 24 /* IPCOP_shmctl */, shmid,
2009                                  VKI_IPC_STAT, 0, (UWord)&buf);
2010 #endif
2011    if (sr_isError(__res))
2012       return 0;
2013 
2014    return (SizeT) buf.shm_segsz;
2015 }
2016 
2017 UWord
ML_(generic_PRE_sys_shmat)2018 ML_(generic_PRE_sys_shmat) ( ThreadId tid,
2019                              UWord arg0, UWord arg1, UWord arg2 )
2020 {
2021    /* void *shmat(int shmid, const void *shmaddr, int shmflg); */
2022    SizeT  segmentSize = get_shm_size ( arg0 );
2023    UWord tmp;
2024    Bool  ok;
2025    if (arg1 == 0) {
2026       /* arm-linux only: work around the fact that
2027          VG_(am_get_advisory_client_simple) produces something that is
2028          VKI_PAGE_SIZE aligned, whereas what we want is something
2029          VKI_SHMLBA aligned, and VKI_SHMLBA >= VKI_PAGE_SIZE.  Hence
2030          increase the request size by VKI_SHMLBA - VKI_PAGE_SIZE and
2031          then round the result up to the next VKI_SHMLBA boundary.
2032          See bug 222545 comment 15.  So far, arm-linux is the only
2033          platform where this is known to be necessary. */
2034       vg_assert(VKI_SHMLBA >= VKI_PAGE_SIZE);
2035       if (VKI_SHMLBA > VKI_PAGE_SIZE) {
2036          segmentSize += VKI_SHMLBA - VKI_PAGE_SIZE;
2037       }
2038       tmp = VG_(am_get_advisory_client_simple)(0, segmentSize, &ok);
2039       if (ok) {
2040          if (VKI_SHMLBA > VKI_PAGE_SIZE) {
2041             arg1 = VG_ROUNDUP(tmp, VKI_SHMLBA);
2042          } else {
2043             arg1 = tmp;
2044          }
2045       }
2046    }
2047    else if (!ML_(valid_client_addr)(arg1, segmentSize, tid, "shmat"))
2048       arg1 = 0;
2049    return arg1;
2050 }
2051 
2052 void
ML_(generic_POST_sys_shmat)2053 ML_(generic_POST_sys_shmat) ( ThreadId tid,
2054                               UWord res,
2055                               UWord arg0, UWord arg1, UWord arg2 )
2056 {
2057    SizeT segmentSize = VG_PGROUNDUP(get_shm_size(arg0));
2058    if ( segmentSize > 0 ) {
2059       UInt prot = VKI_PROT_READ|VKI_PROT_WRITE;
2060       Bool d;
2061 
2062       if (arg2 & VKI_SHM_RDONLY)
2063          prot &= ~VKI_PROT_WRITE;
2064       /* It isn't exactly correct to pass 0 for the fd and offset
2065          here.  The kernel seems to think the corresponding section
2066          does have dev/ino numbers:
2067 
2068          04e52000-04ec8000 rw-s 00000000 00:06 1966090  /SYSV00000000 (deleted)
2069 
2070          However there is no obvious way to find them.  In order to
2071          cope with the discrepancy, aspacem's sync checker omits the
2072          dev/ino correspondence check in cases where V does not know
2073          the dev/ino. */
2074       d = VG_(am_notify_client_shmat)( res, segmentSize, prot );
2075 
2076       /* we don't distinguish whether it's read-only or
2077        * read-write -- it doesn't matter really. */
2078       VG_TRACK( new_mem_mmap, res, segmentSize, True, True, False,
2079                               0/*di_handle*/ );
2080       if (d)
2081          VG_(discard_translations)( (Addr)res,
2082                                     (ULong)VG_PGROUNDUP(segmentSize),
2083                                     "ML_(generic_POST_sys_shmat)" );
2084    }
2085 }
2086 
2087 /* ------ */
2088 
2089 Bool
ML_(generic_PRE_sys_shmdt)2090 ML_(generic_PRE_sys_shmdt) ( ThreadId tid, UWord arg0 )
2091 {
2092    /* int shmdt(const void *shmaddr); */
2093    return ML_(valid_client_addr)(arg0, 1, tid, "shmdt");
2094 }
2095 
2096 void
ML_(generic_POST_sys_shmdt)2097 ML_(generic_POST_sys_shmdt) ( ThreadId tid, UWord res, UWord arg0 )
2098 {
2099    NSegment const* s = VG_(am_find_nsegment)(arg0);
2100 
2101    if (s != NULL) {
2102       Addr  s_start = s->start;
2103       SizeT s_len   = s->end+1 - s->start;
2104       Bool  d;
2105 
2106       vg_assert(s->kind == SkShmC);
2107       vg_assert(s->start == arg0);
2108 
2109       d = VG_(am_notify_munmap)(s_start, s_len);
2110       s = NULL; /* s is now invalid */
2111       VG_TRACK( die_mem_munmap, s_start, s_len );
2112       if (d)
2113          VG_(discard_translations)( s_start,
2114                                     (ULong)s_len,
2115                                     "ML_(generic_POST_sys_shmdt)" );
2116    }
2117 }
2118 /* ------ */
2119 
2120 void
ML_(generic_PRE_sys_shmctl)2121 ML_(generic_PRE_sys_shmctl) ( ThreadId tid,
2122                               UWord arg0, UWord arg1, UWord arg2 )
2123 {
2124    /* int shmctl(int shmid, int cmd, struct shmid_ds *buf); */
2125    switch (arg1 /* cmd */) {
2126 #if defined(VKI_IPC_INFO)
2127    case VKI_IPC_INFO:
2128 #   if defined(VGO_dragonfly)
2129       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2130                      arg2, sizeof(struct vki_shmid_ds) );
2131 #   else
2132       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2133                      arg2, sizeof(struct vki_shminfo) );
2134 #   endif
2135       break;
2136 #if defined(VKI_IPC_64)
2137    case VKI_IPC_INFO|VKI_IPC_64:
2138       PRE_MEM_WRITE( "shmctl(IPC_INFO, buf)",
2139                      arg2, sizeof(struct vki_shminfo64) );
2140       break;
2141 #endif
2142 #endif
2143 
2144 #if defined(VKI_SHM_INFO)
2145    case VKI_SHM_INFO:
2146 #if defined(VKI_IPC_64)
2147    case VKI_SHM_INFO|VKI_IPC_64:
2148 #endif
2149       PRE_MEM_WRITE( "shmctl(SHM_INFO, buf)",
2150                      arg2, sizeof(struct vki_shm_info) );
2151       break;
2152 #endif
2153 
2154    case VKI_IPC_STAT:
2155 #if defined(VKI_SHM_STAT)
2156    case VKI_SHM_STAT:
2157 #endif
2158       PRE_MEM_WRITE( "shmctl(IPC_STAT, buf)",
2159                      arg2, sizeof(struct vki_shmid_ds) );
2160       break;
2161 
2162 #if defined(VKI_IPC_64)
2163    case VKI_IPC_STAT|VKI_IPC_64:
2164    case VKI_SHM_STAT|VKI_IPC_64:
2165       PRE_MEM_WRITE( "shmctl(IPC_STAT, arg.buf)",
2166                      arg2, sizeof(struct vki_shmid64_ds) );
2167       break;
2168 #endif
2169 
2170    case VKI_IPC_SET:
2171       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2172                     arg2, sizeof(struct vki_shmid_ds) );
2173       break;
2174 
2175 #if defined(VKI_IPC_64)
2176    case VKI_IPC_SET|VKI_IPC_64:
2177       PRE_MEM_READ( "shmctl(IPC_SET, arg.buf)",
2178                     arg2, sizeof(struct vki_shmid64_ds) );
2179       break;
2180 #endif
2181    }
2182 }
2183 
2184 void
ML_(generic_POST_sys_shmctl)2185 ML_(generic_POST_sys_shmctl) ( ThreadId tid,
2186                                UWord res,
2187                                UWord arg0, UWord arg1, UWord arg2 )
2188 {
2189    switch (arg1 /* cmd */) {
2190 #if defined(VKI_IPC_INFO)
2191    case VKI_IPC_INFO:
2192 #   if defined(VGO_dragonfly)
2193       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
2194 #   else
2195       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo) );
2196 #   endif
2197       break;
2198 #if defined(VKI_IPC_64)
2199    case VKI_IPC_INFO|VKI_IPC_64:
2200       POST_MEM_WRITE( arg2, sizeof(struct vki_shminfo64) );
2201       break;
2202 #endif
2203 #endif
2204 
2205 #if defined(VKI_SHM_INFO)
2206    case VKI_SHM_INFO:
2207    case VKI_SHM_INFO|VKI_IPC_64:
2208       POST_MEM_WRITE( arg2, sizeof(struct vki_shm_info) );
2209       break;
2210 #endif
2211 
2212    case VKI_IPC_STAT:
2213 #if defined(VKI_SHM_STAT)
2214    case VKI_SHM_STAT:
2215 #endif
2216       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid_ds) );
2217       break;
2218 
2219 #if defined(VKI_IPC_64)
2220    case VKI_IPC_STAT|VKI_IPC_64:
2221    case VKI_SHM_STAT|VKI_IPC_64:
2222       POST_MEM_WRITE( arg2, sizeof(struct vki_shmid64_ds) );
2223       break;
2224 #endif
2225 
2226 
2227    }
2228 }
2229 
2230 /* ---------------------------------------------------------------------
2231    Generic handler for mmap
2232    ------------------------------------------------------------------ */
2233 
2234 /*
2235  * Although mmap is specified by POSIX and the argument are generally
2236  * consistent across platforms the precise details of the low level
2237  * argument passing conventions differ. For example:
2238  *
2239  * - On x86-linux there is mmap (aka old_mmap) which takes the
2240  *   arguments in a memory block and the offset in bytes; and
2241  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2242  *   way and the offset in pages.
2243  *
2244  * - On ppc32-linux there is mmap (aka sys_mmap) which takes the
2245  *   arguments in the normal way and the offset in bytes; and
2246  *   mmap2 (aka sys_mmap2) which takes the arguments in the normal
2247  *   way and the offset in pages.
2248  *
2249  * - On amd64-linux everything is simple and there is just the one
2250  *   call, mmap (aka sys_mmap)  which takes the arguments in the
2251  *   normal way and the offset in bytes.
2252  *
2253  * - On s390x-linux there is mmap (aka old_mmap) which takes the
2254  *   arguments in a memory block and the offset in bytes. mmap2
2255  *   is also available (but not exported via unistd.h) with
2256  *   arguments in a memory block and the offset in pages.
2257  *
2258  * To cope with all this we provide a generic handler function here
2259  * and then each platform implements one or more system call handlers
2260  * which call this generic routine after extracting and normalising
2261  * the arguments.
2262  */
2263 
2264 SysRes
ML_(generic_PRE_sys_mmap)2265 ML_(generic_PRE_sys_mmap) ( ThreadId tid,
2266                             UWord arg1, UWord arg2, UWord arg3,
2267                             UWord arg4, UWord arg5, Off64T arg6 )
2268 {
2269    Addr       advised;
2270    SysRes     sres;
2271    MapRequest mreq;
2272    Bool       mreq_ok;
2273 
2274 #  if defined(VGO_darwin)
2275    // Nb: we can't use this on Darwin, it has races:
2276    // * needs to RETRY if advisory succeeds but map fails
2277    //   (could have been some other thread in a nonblocking call)
2278    // * needs to not use fixed-position mmap() on Darwin
2279    //   (mmap will cheerfully smash whatever's already there, which might
2280    //   be a new mapping from some other thread in a nonblocking call)
2281    VG_(core_panic)("can't use ML_(generic_PRE_sys_mmap) on Darwin");
2282 #  endif
2283 
2284    if (arg2 == 0) {
2285       /* SuSV3 says: If len is zero, mmap() shall fail and no mapping
2286          shall be established. */
2287       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2288    }
2289 
2290    if (arg4 & VKI_MAP_FIXED && !VG_IS_PAGE_ALIGNED(arg1)) {
2291       /* zap any misaligned addresses. */
2292       /* SuSV3 says misaligned addresses only cause the MAP_FIXED case
2293          to fail.   Here, we catch them all. */
2294       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2295    }
2296 
2297    if (!VG_IS_PAGE_ALIGNED(arg6)) {
2298       /* zap any misaligned offsets. */
2299       /* SuSV3 says: The off argument is constrained to be aligned and
2300          sized according to the value returned by sysconf() when
2301          passed _SC_PAGESIZE or _SC_PAGE_SIZE. */
2302       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2303    }
2304 
2305    /* Figure out what kind of allocation constraints there are
2306       (fixed/hint/any), and ask aspacem what we should do. */
2307    mreq.start = arg1;
2308    mreq.len   = arg2;
2309    if (arg4 & VKI_MAP_FIXED) {
2310       mreq.rkind = MFixed;
2311    } else
2312 #if defined(VKI_MAP_ALIGN) /* Solaris specific */
2313    if (arg4 & VKI_MAP_ALIGN) {
2314       mreq.rkind = MAlign;
2315       if (mreq.start == 0) {
2316          mreq.start = VKI_PAGE_SIZE;
2317       }
2318       /* VKI_MAP_FIXED and VKI_MAP_ALIGN don't like each other. */
2319       arg4 &= ~VKI_MAP_ALIGN;
2320    } else
2321 #endif
2322    if (arg1 != 0) {
2323       mreq.rkind = MHint;
2324    } else {
2325       mreq.rkind = MAny;
2326    }
2327 
2328    /* Enquire ... */
2329    advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2330    if (!mreq_ok) {
2331       /* Our request was bounced, so we'd better fail. */
2332       return VG_(mk_SysRes_Error)( VKI_EINVAL );
2333    }
2334 
2335 #  if defined(VKI_MAP_32BIT)
2336    /* MAP_32BIT is royally unportable, so if the client asks for it, try our
2337       best to make it work (but without complexifying aspacemgr).
2338       If the user requested MAP_32BIT, the mmap-ed space must be in the
2339       first 2GB of the address space. So, return ENOMEM if aspacemgr
2340       advisory is above the first 2GB. If MAP_FIXED is also requested,
2341       MAP_32BIT has to be ignored.
2342       Assumption about aspacemgr behaviour: aspacemgr scans the address space
2343       from low addresses to find a free segment. No special effort is done
2344       to keep the first 2GB 'free' for this MAP_32BIT. So, this will often
2345       fail once the program has already allocated significant memory. */
2346    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)) {
2347       if (advised + arg2 >= 0x80000000)
2348          return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2349    }
2350 #  endif
2351 
2352    /* Otherwise we're OK (so far).  Install aspacem's choice of
2353       address, and let the mmap go through.  */
2354    sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2355                                     arg4 | VKI_MAP_FIXED,
2356                                     arg5, arg6);
2357 
2358 #  if defined(VKI_MAP_32BIT)
2359    /* No recovery trial if the advisory was not accepted. */
2360    if ((arg4 & VKI_MAP_32BIT) && !(arg4 & VKI_MAP_FIXED)
2361        && sr_isError(sres)) {
2362       return VG_(mk_SysRes_Error)( VKI_ENOMEM );
2363    }
2364 #  endif
2365 
2366    /* A refinement: it may be that the kernel refused aspacem's choice
2367       of address.  If we were originally asked for a hinted mapping,
2368       there is still a last chance: try again at any address.
2369       Hence: */
2370    if (mreq.rkind == MHint && sr_isError(sres)) {
2371       mreq.start = 0;
2372       mreq.len   = arg2;
2373       mreq.rkind = MAny;
2374       advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2375       if (!mreq_ok) {
2376          /* Our request was bounced, so we'd better fail. */
2377          return VG_(mk_SysRes_Error)( VKI_EINVAL );
2378       }
2379       /* and try again with the kernel */
2380       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2381                                        arg4 | VKI_MAP_FIXED,
2382                                        arg5, arg6);
2383    }
2384 
2385    /* Yet another refinement : sometimes valgrind chooses an address
2386       which is not acceptable by the kernel. This at least happens
2387       when mmap-ing huge pages, using the flag MAP_HUGETLB.
2388       valgrind aspacem does not know about huge pages, and modifying
2389       it to handle huge pages is not straightforward (e.g. need
2390       to understand special file system mount options).
2391       So, let's just redo an mmap, without giving any constraint to
2392       the kernel. If that succeeds, check with aspacem that the returned
2393       address is acceptable.
2394       This will give a similar effect as if the user would have
2395       hinted that address.
2396       The aspacem state will be correctly updated afterwards.
2397       We however cannot do this last refinement when the user asked
2398       for a fixed mapping, as the user asked a specific address. */
2399    if (sr_isError(sres) && !(arg4 & VKI_MAP_FIXED)) {
2400       advised = 0;
2401       /* try mmap with NULL address and without VKI_MAP_FIXED
2402          to let the kernel decide. */
2403       sres = VG_(am_do_mmap_NO_NOTIFY)(advised, arg2, arg3,
2404                                        arg4,
2405                                        arg5, arg6);
2406       if (!sr_isError(sres)) {
2407          /* The kernel is supposed to know what it is doing, but let's
2408             do a last sanity check anyway, as if the chosen address had
2409             been initially hinted by the client. The whole point of this
2410             last try was to allow mmap of huge pages to succeed without
2411             making aspacem understand them, on the other hand the kernel
2412             does not know about valgrind reservations, so this mapping
2413             can end up in free space and reservations. */
2414          mreq.start = (Addr)sr_Res(sres);
2415          mreq.len   = arg2;
2416          mreq.rkind = MHint;
2417          advised = VG_(am_get_advisory)( &mreq, True/*client*/, &mreq_ok );
2418          vg_assert(mreq_ok && advised == mreq.start);
2419       }
2420    }
2421 
2422    if (!sr_isError(sres)) {
2423       ULong di_handle;
2424       /* Notify aspacem. */
2425       notify_core_of_mmap(
2426          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2427          arg2, /* length */
2428          arg3, /* prot */
2429          arg4, /* the original flags value */
2430          arg5, /* fd */
2431          arg6  /* offset */
2432       );
2433       /* Load symbols? */
2434       di_handle = VG_(di_notify_mmap)( (Addr)sr_Res(sres),
2435                                        False/*allow_SkFileV*/, (Int)arg5 );
2436       /* Notify the tool. */
2437       notify_tool_of_mmap(
2438          (Addr)sr_Res(sres), /* addr kernel actually assigned */
2439          arg2, /* length */
2440          arg3, /* prot */
2441          di_handle /* so the tool can refer to the read debuginfo later,
2442                       if it wants. */
2443       );
2444    }
2445 
2446    /* Stay sane */
2447    if (!sr_isError(sres) && (arg4 & VKI_MAP_FIXED))
2448       vg_assert(sr_Res(sres) == arg1);
2449 
2450    return sres;
2451 }
2452 
2453 
2454 /* ---------------------------------------------------------------------
2455    The Main Entertainment ... syscall wrappers
2456    ------------------------------------------------------------------ */
2457 
2458 /* Note: the PRE() and POST() wrappers are for the actual functions
2459    implementing the system calls in the OS kernel.  These mostly have
2460    names like sys_write();  a few have names like old_mmap().  See the
2461    comment for ML_(syscall_table)[] for important info about the __NR_foo
2462    constants and their relationship to the sys_foo() functions.
2463 
2464    Some notes about names used for syscalls and args:
2465    - For the --trace-syscalls=yes output, we use the sys_foo() name to avoid
2466      ambiguity.
2467 
2468    - For error messages, we generally use a somewhat generic name
2469      for the syscall (eg. "write" rather than "sys_write").  This should be
2470      good enough for the average user to understand what is happening,
2471      without confusing them with names like "sys_write".
2472 
2473    - Also, for error messages the arg names are mostly taken from the man
2474      pages (even though many of those man pages are really for glibc
2475      functions of the same name), rather than from the OS kernel source,
2476      for the same reason -- a user presented with a "bogus foo(bar)" arg
2477      will most likely look at the "foo" man page to see which is the "bar"
2478      arg.
2479 
2480    Note that we use our own vki_* types.  The one exception is in
2481    PRE_REG_READn calls, where pointer types haven't been changed, because
2482    they don't need to be -- eg. for "foo*" to be used, the type foo need not
2483    be visible.
2484 
2485    XXX: some of these are arch-specific, and should be factored out.
2486 */
2487 
2488 #define PRE(name)      DEFN_PRE_TEMPLATE(generic, name)
2489 #define POST(name)     DEFN_POST_TEMPLATE(generic, name)
2490 
2491 #if !defined(VGO_dragonfly) /* On dragonfly, exit(2) is all-threads shutdown */
PRE(sys_exit)2492 PRE(sys_exit)
2493 {
2494    ThreadState* tst;
2495    /* simple; just make this thread exit */
2496    PRINT("exit( %ld )", SARG1);
2497    PRE_REG_READ1(void, "exit", int, status);
2498    tst = VG_(get_ThreadState)(tid);
2499    /* Set the thread's status to be exiting, then claim that the
2500       syscall succeeded. */
2501    tst->exitreason = VgSrc_ExitThread;
2502    tst->os_state.exitcode = ARG1;
2503    SET_STATUS_Success(0);
2504 }
2505 #endif
2506 
PRE(sys_ni_syscall)2507 PRE(sys_ni_syscall)
2508 {
2509    PRINT("unimplemented (by the kernel) syscall: %s! (ni_syscall)\n",
2510       VG_SYSNUM_STRING(SYSNO));
2511    PRE_REG_READ0(long, "ni_syscall");
2512    SET_STATUS_Failure( VKI_ENOSYS );
2513 }
2514 
PRE(sys_iopl)2515 PRE(sys_iopl)
2516 {
2517    PRINT("sys_iopl ( %" FMT_REGWORD "u )", ARG1);
2518    PRE_REG_READ1(long, "iopl", unsigned long, level);
2519 }
2520 
PRE(sys_fsync)2521 PRE(sys_fsync)
2522 {
2523    *flags |= SfMayBlock;
2524    PRINT("sys_fsync ( %" FMT_REGWORD "u )", ARG1);
2525    PRE_REG_READ1(long, "fsync", unsigned int, fd);
2526 }
2527 
PRE(sys_fdatasync)2528 PRE(sys_fdatasync)
2529 {
2530    *flags |= SfMayBlock;
2531    PRINT("sys_fdatasync ( %" FMT_REGWORD "u )", ARG1);
2532    PRE_REG_READ1(long, "fdatasync", unsigned int, fd);
2533 }
2534 
PRE(sys_msync)2535 PRE(sys_msync)
2536 {
2537    *flags |= SfMayBlock;
2538    PRINT("sys_msync ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %#"
2539                       FMT_REGWORD "x )", ARG1, ARG2, ARG3);
2540    PRE_REG_READ3(long, "msync",
2541                  unsigned long, start, vki_size_t, length, int, flags);
2542    PRE_MEM_READ( "msync(start)", ARG1, ARG2 );
2543 }
2544 
2545 // Nb: getpmsg() and putpmsg() are special additional syscalls used in early
2546 // versions of LiS (Linux Streams).  They are not part of the kernel.
2547 // Therefore, we have to provide this type ourself, rather than getting it
2548 // from the kernel sources.
2549 struct vki_pmsg_strbuf {
2550    int     maxlen;         /* no. of bytes in buffer */
2551    int     len;            /* no. of bytes returned */
2552    vki_caddr_t buf;        /* pointer to data */
2553 };
PRE(sys_getpmsg)2554 PRE(sys_getpmsg)
2555 {
2556    /* LiS getpmsg from http://www.gcom.com/home/linux/lis/ */
2557    struct vki_pmsg_strbuf *ctrl;
2558    struct vki_pmsg_strbuf *data;
2559    *flags |= SfMayBlock;
2560    PRINT("sys_getpmsg ( %ld, %#" FMT_REGWORD "x, %#" FMT_REGWORD "x, %#"
2561                         FMT_REGWORD "x, %#" FMT_REGWORD "x )", SARG1,
2562                         ARG2, ARG3, ARG4, ARG5);
2563    PRE_REG_READ5(int, "getpmsg",
2564                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2565                  int *, bandp, int *, flagsp);
2566    ctrl = (struct vki_pmsg_strbuf *)(Addr)ARG2;
2567    data = (struct vki_pmsg_strbuf *)(Addr)ARG3;
2568    if (ctrl && ctrl->maxlen > 0)
2569       PRE_MEM_WRITE( "getpmsg(ctrl)", (Addr)ctrl->buf, ctrl->maxlen);
2570    if (data && data->maxlen > 0)
2571       PRE_MEM_WRITE( "getpmsg(data)", (Addr)data->buf, data->maxlen);
2572    if (ARG4)
2573       PRE_MEM_WRITE( "getpmsg(bandp)", (Addr)ARG4, sizeof(int));
2574    if (ARG5)
2575       PRE_MEM_WRITE( "getpmsg(flagsp)", (Addr)ARG5, sizeof(int));
2576 }
POST(sys_getpmsg)2577 POST(sys_getpmsg)
2578 {
2579    struct vki_pmsg_strbuf *ctrl;
2580    struct vki_pmsg_strbuf *data;
2581    vg_assert(SUCCESS);
2582    ctrl = (struct vki_pmsg_strbuf *)(Addr)ARG2;
2583    data = (struct vki_pmsg_strbuf *)(Addr)ARG3;
2584    if (RES == 0 && ctrl && ctrl->len > 0) {
2585       POST_MEM_WRITE( (Addr)ctrl->buf, ctrl->len);
2586    }
2587    if (RES == 0 && data && data->len > 0) {
2588       POST_MEM_WRITE( (Addr)data->buf, data->len);
2589    }
2590 }
2591 
PRE(sys_putpmsg)2592 PRE(sys_putpmsg)
2593 {
2594    /* LiS putpmsg from http://www.gcom.com/home/linux/lis/ */
2595    struct vki_pmsg_strbuf *ctrl;
2596    struct vki_pmsg_strbuf *data;
2597    *flags |= SfMayBlock;
2598    PRINT("sys_putpmsg ( %ld, %#" FMT_REGWORD "x, %#" FMT_REGWORD
2599                         "x, %ld, %ld )", SARG1, ARG2, ARG3, SARG4, SARG5);
2600    PRE_REG_READ5(int, "putpmsg",
2601                  int, fd, struct strbuf *, ctrl, struct strbuf *, data,
2602                  int, band, int, flags);
2603    ctrl = (struct vki_pmsg_strbuf *)(Addr)ARG2;
2604    data = (struct vki_pmsg_strbuf *)(Addr)ARG3;
2605    if (ctrl && ctrl->len > 0)
2606       PRE_MEM_READ( "putpmsg(ctrl)", (Addr)ctrl->buf, ctrl->len);
2607    if (data && data->len > 0)
2608       PRE_MEM_READ( "putpmsg(data)", (Addr)data->buf, data->len);
2609 }
2610 
PRE(sys_getitimer)2611 PRE(sys_getitimer)
2612 {
2613    struct vki_itimerval *value = (struct vki_itimerval*)(Addr)ARG2;
2614    PRINT("sys_getitimer ( %ld, %#" FMT_REGWORD "x )", SARG1, ARG2);
2615    PRE_REG_READ2(long, "getitimer", int, which, struct itimerval *, value);
2616 
2617    PRE_timeval_WRITE( "getitimer(&value->it_interval)", &(value->it_interval));
2618    PRE_timeval_WRITE( "getitimer(&value->it_value)",    &(value->it_value));
2619 }
2620 
POST(sys_getitimer)2621 POST(sys_getitimer)
2622 {
2623    if (ARG2 != (Addr)NULL) {
2624       struct vki_itimerval *value = (struct vki_itimerval*)(Addr)ARG2;
2625       POST_timeval_WRITE( &(value->it_interval) );
2626       POST_timeval_WRITE( &(value->it_value) );
2627    }
2628 }
2629 
PRE(sys_setitimer)2630 PRE(sys_setitimer)
2631 {
2632    PRINT("sys_setitimer ( %ld, %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",
2633                           SARG1, ARG2, ARG3);
2634    PRE_REG_READ3(long, "setitimer",
2635                  int, which,
2636                  struct itimerval *, value, struct itimerval *, ovalue);
2637    if (ARG2 != (Addr)NULL) {
2638       struct vki_itimerval *value = (struct vki_itimerval*)(Addr)ARG2;
2639       PRE_timeval_READ( "setitimer(&value->it_interval)",
2640                          &(value->it_interval));
2641       PRE_timeval_READ( "setitimer(&value->it_value)",
2642                          &(value->it_value));
2643    }
2644    if (ARG3 != (Addr)NULL) {
2645       struct vki_itimerval *ovalue = (struct vki_itimerval*)(Addr)ARG3;
2646       PRE_timeval_WRITE( "setitimer(&ovalue->it_interval)",
2647                          &(ovalue->it_interval));
2648       PRE_timeval_WRITE( "setitimer(&ovalue->it_value)",
2649                          &(ovalue->it_value));
2650    }
2651 }
2652 
POST(sys_setitimer)2653 POST(sys_setitimer)
2654 {
2655    if (ARG3 != (Addr)NULL) {
2656       struct vki_itimerval *ovalue = (struct vki_itimerval*)(Addr)ARG3;
2657       POST_timeval_WRITE( &(ovalue->it_interval) );
2658       POST_timeval_WRITE( &(ovalue->it_value) );
2659    }
2660 }
2661 
PRE(sys_chroot)2662 PRE(sys_chroot)
2663 {
2664    PRINT("sys_chroot ( %#" FMT_REGWORD "x )", ARG1);
2665    PRE_REG_READ1(long, "chroot", const char *, path);
2666    PRE_MEM_RASCIIZ( "chroot(path)", ARG1 );
2667 }
2668 
PRE(sys_madvise)2669 PRE(sys_madvise)
2670 {
2671    *flags |= SfMayBlock;
2672    PRINT("sys_madvise ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %ld )",
2673                         ARG1, ARG2, SARG3);
2674    PRE_REG_READ3(long, "madvise",
2675                  unsigned long, start, vki_size_t, length, int, advice);
2676 }
2677 
PRE(sys_mcontrol)2678 PRE(sys_mcontrol)
2679 {
2680    *flags |= SfMayBlock;
2681    PRINT("sys_mcontrol ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %ld, %ld )",
2682                         ARG1, ARG2, SARG3, SARG4);
2683    PRE_REG_READ4(long, "mcontrol",
2684                  unsigned long, start, vki_size_t, length, int, advice, vki_off_t, val);
2685 }
2686 
2687 #if HAVE_MREMAP
PRE(sys_mremap)2688 PRE(sys_mremap)
2689 {
2690    // Nb: this is different to the glibc version described in the man pages,
2691    // which lacks the fifth 'new_address' argument.
2692    if (ARG4 & VKI_MREMAP_FIXED) {
2693       PRINT("sys_mremap ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %"
2694             FMT_REGWORD "u, %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",
2695             ARG1, ARG2, ARG3, ARG4, ARG5);
2696       PRE_REG_READ5(unsigned long, "mremap",
2697                     unsigned long, old_addr, unsigned long, old_size,
2698                     unsigned long, new_size, unsigned long, flags,
2699                     unsigned long, new_addr);
2700    } else {
2701       PRINT("sys_mremap ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %"
2702             FMT_REGWORD "u, 0x%" FMT_REGWORD "x )",
2703             ARG1, ARG2, ARG3, ARG4);
2704       PRE_REG_READ4(unsigned long, "mremap",
2705                     unsigned long, old_addr, unsigned long, old_size,
2706                     unsigned long, new_size, unsigned long, flags);
2707    }
2708    SET_STATUS_from_SysRes(
2709       do_mremap((Addr)ARG1, ARG2, (Addr)ARG5, ARG3, ARG4, tid)
2710    );
2711 }
2712 #endif /* HAVE_MREMAP */
2713 
PRE(sys_nice)2714 PRE(sys_nice)
2715 {
2716    PRINT("sys_nice ( %ld )", SARG1);
2717    PRE_REG_READ1(long, "nice", int, inc);
2718 }
2719 
PRE(sys_mlock)2720 PRE(sys_mlock)
2721 {
2722    *flags |= SfMayBlock;
2723    PRINT("sys_mlock ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u )", ARG1, ARG2);
2724    PRE_REG_READ2(long, "mlock", unsigned long, addr, vki_size_t, len);
2725 }
2726 
PRE(sys_munlock)2727 PRE(sys_munlock)
2728 {
2729    *flags |= SfMayBlock;
2730    PRINT("sys_munlock ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u )", ARG1, ARG2);
2731    PRE_REG_READ2(long, "munlock", unsigned long, addr, vki_size_t, len);
2732 }
2733 
PRE(sys_mlockall)2734 PRE(sys_mlockall)
2735 {
2736    *flags |= SfMayBlock;
2737    PRINT("sys_mlockall ( %" FMT_REGWORD "x )", ARG1);
2738    PRE_REG_READ1(long, "mlockall", int, flags);
2739 }
2740 
PRE(sys_setpriority)2741 PRE(sys_setpriority)
2742 {
2743    PRINT("sys_setpriority ( %ld, %ld, %ld )", SARG1, SARG2, SARG3);
2744    PRE_REG_READ3(long, "setpriority", int, which, int, who, int, prio);
2745 }
2746 
PRE(sys_getpriority)2747 PRE(sys_getpriority)
2748 {
2749    PRINT("sys_getpriority ( %ld, %ld )", SARG1, SARG2);
2750    PRE_REG_READ2(long, "getpriority", int, which, int, who);
2751 }
2752 
2753 #if defined(VGO_linux)
PRE(sys_pwrite64)2754 PRE(sys_pwrite64)
2755 {
2756    *flags |= SfMayBlock;
2757 #if VG_WORDSIZE == 4
2758    PRINT("sys_pwrite64 ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
2759          FMT_REGWORD "u, %lld )", ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
2760    PRE_REG_READ5(ssize_t, "pwrite64",
2761                  unsigned int, fd, const char *, buf, vki_size_t, count,
2762                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2763 #elif VG_WORDSIZE == 8
2764    PRINT("sys_pwrite64 ( %lu, %#lx, %lu, %ld )",
2765          ARG1, ARG2, ARG3, SARG4);
2766    PRE_REG_READ4(ssize_t, "pwrite64",
2767                  unsigned int, fd, const char *, buf, vki_size_t, count,
2768                  Word, offset);
2769 #else
2770 #  error Unexpected word size
2771 #endif
2772    PRE_MEM_READ( "pwrite64(buf)", ARG2, ARG3 );
2773 }
2774 #endif
2775 
PRE(sys_sync)2776 PRE(sys_sync)
2777 {
2778    *flags |= SfMayBlock;
2779    PRINT("sys_sync ( )");
2780    PRE_REG_READ0(long, "sync");
2781 }
2782 
PRE(sys_fstatfs)2783 PRE(sys_fstatfs)
2784 {
2785    FUSE_COMPATIBLE_MAY_BLOCK();
2786    PRINT("sys_fstatfs ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )", ARG1, ARG2);
2787    PRE_REG_READ2(long, "fstatfs",
2788                  unsigned int, fd, struct statfs *, buf);
2789    PRE_MEM_WRITE( "fstatfs(buf)", ARG2, sizeof(struct vki_statfs) );
2790 }
2791 
POST(sys_fstatfs)2792 POST(sys_fstatfs)
2793 {
2794    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
2795 }
2796 
PRE(sys_fstatfs64)2797 PRE(sys_fstatfs64)
2798 {
2799    FUSE_COMPATIBLE_MAY_BLOCK();
2800    PRINT("sys_fstatfs64 ( %" FMT_REGWORD "u, %" FMT_REGWORD "u, %#"
2801          FMT_REGWORD "x )", ARG1, ARG2, ARG3);
2802    PRE_REG_READ3(long, "fstatfs64",
2803                  unsigned int, fd, vki_size_t, size, struct statfs64 *, buf);
2804    PRE_MEM_WRITE( "fstatfs64(buf)", ARG3, ARG2 );
2805 }
POST(sys_fstatfs64)2806 POST(sys_fstatfs64)
2807 {
2808    POST_MEM_WRITE( ARG3, ARG2 );
2809 }
2810 
PRE(sys_getsid)2811 PRE(sys_getsid)
2812 {
2813    PRINT("sys_getsid ( %ld )", SARG1);
2814    PRE_REG_READ1(long, "getsid", vki_pid_t, pid);
2815 }
2816 
2817 #if defined(VGO_linux)
PRE(sys_pread64)2818 PRE(sys_pread64)
2819 {
2820    *flags |= SfMayBlock;
2821 #if VG_WORDSIZE == 4
2822    PRINT("sys_pread64 ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
2823          FMT_REGWORD "u, %lld )", ARG1, ARG2, ARG3, (Long)MERGE64(ARG4,ARG5));
2824    PRE_REG_READ5(ssize_t, "pread64",
2825                  unsigned int, fd, char *, buf, vki_size_t, count,
2826                  vki_u32, MERGE64_FIRST(offset), vki_u32, MERGE64_SECOND(offset));
2827 #elif VG_WORDSIZE == 8
2828    PRINT("sys_pread64 ( %lu, %#lx, %lu, %ld )",
2829          ARG1, ARG2, ARG3, SARG4);
2830    PRE_REG_READ4(ssize_t, "pread64",
2831                  unsigned int, fd, char *, buf, vki_size_t, count,
2832                  Word, offset);
2833 #else
2834 #  error Unexpected word size
2835 #endif
2836    PRE_MEM_WRITE( "pread64(buf)", ARG2, ARG3 );
2837 }
POST(sys_pread64)2838 POST(sys_pread64)
2839 {
2840    vg_assert(SUCCESS);
2841    if (RES > 0) {
2842       POST_MEM_WRITE( ARG2, RES );
2843    }
2844 }
2845 #endif
2846 
PRE(sys_mknod)2847 PRE(sys_mknod)
2848 {
2849    FUSE_COMPATIBLE_MAY_BLOCK();
2850    PRINT("sys_mknod ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x, %#"
2851          FMT_REGWORD "x )", ARG1, (HChar*)(Addr)ARG1, ARG2, ARG3 );
2852    PRE_REG_READ3(long, "mknod",
2853                  const char *, pathname, int, mode, unsigned, dev);
2854    PRE_MEM_RASCIIZ( "mknod(pathname)", ARG1 );
2855 }
2856 
PRE(sys_flock)2857 PRE(sys_flock)
2858 {
2859    *flags |= SfMayBlock;
2860    PRINT("sys_flock ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2 );
2861    PRE_REG_READ2(long, "flock", unsigned int, fd, unsigned int, operation);
2862 }
2863 
2864 // Pre_read a char** argument.
ML_(pre_argv_envp)2865 void ML_(pre_argv_envp)(Addr a, ThreadId tid, const HChar *s1, const HChar *s2)
2866 {
2867    while (True) {
2868       Addr a_deref;
2869       Addr* a_p = (Addr*)a;
2870       PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
2871       a_deref = *a_p;
2872       if (0 == a_deref)
2873          break;
2874       PRE_MEM_RASCIIZ( s2, a_deref );
2875       a += sizeof(char*);
2876    }
2877 }
2878 
i_am_the_only_thread(void)2879 static Bool i_am_the_only_thread ( void )
2880 {
2881    Int c = VG_(count_living_threads)();
2882    vg_assert(c >= 1); /* stay sane */
2883    return c == 1;
2884 }
2885 
2886 /* Wait until all other threads disappear. */
VG_(reap_threads)2887 void VG_(reap_threads)(ThreadId self)
2888 {
2889    while (!i_am_the_only_thread()) {
2890       /* Let other thread(s) run */
2891       VG_(vg_yield)();
2892       VG_(poll_signals)(self);
2893    }
2894    vg_assert(i_am_the_only_thread());
2895 }
2896 
2897 // XXX: prototype here seemingly doesn't match the prototype for i386-linux,
2898 // but it seems to work nonetheless...
PRE(sys_execve)2899 PRE(sys_execve)
2900 {
2901    HChar*       path = NULL;       /* path to executable */
2902    HChar**      envp = NULL;
2903    HChar**      argv = NULL;
2904    HChar**      arg2copy;
2905    HChar*       launcher_basename = NULL;
2906    ThreadState* tst;
2907    Int          i, j, tot_args;
2908    SysRes       res;
2909    Bool         setuid_allowed, trace_this_child;
2910 
2911    PRINT("sys_execve ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x, %#"
2912          FMT_REGWORD "x )", ARG1, (HChar*)(Addr)ARG1, ARG2, ARG3);
2913    PRE_REG_READ3(vki_off_t, "execve",
2914                  char *, filename, char **, argv, char **, envp);
2915    PRE_MEM_RASCIIZ( "execve(filename)", ARG1 );
2916    if (ARG2 != 0) {
2917       /* At least the terminating NULL must be addressable. */
2918       if (!ML_(safe_to_deref)((HChar **) (Addr)ARG2, sizeof(HChar *))) {
2919          SET_STATUS_Failure(VKI_EFAULT);
2920          return;
2921       }
2922       ML_(pre_argv_envp)( ARG2, tid, "execve(argv)", "execve(argv[i])" );
2923    }
2924    if (ARG3 != 0) {
2925       /* At least the terminating NULL must be addressable. */
2926       if (!ML_(safe_to_deref)((HChar **) (Addr)ARG3, sizeof(HChar *))) {
2927          SET_STATUS_Failure(VKI_EFAULT);
2928          return;
2929       }
2930       ML_(pre_argv_envp)( ARG3, tid, "execve(envp)", "execve(envp[i])" );
2931    }
2932 
2933    vg_assert(VG_(is_valid_tid)(tid));
2934    tst = VG_(get_ThreadState)(tid);
2935 
2936    /* Erk.  If the exec fails, then the following will have made a
2937       mess of things which makes it hard for us to continue.  The
2938       right thing to do is piece everything together again in
2939       POST(execve), but that's close to impossible.  Instead, we make
2940       an effort to check that the execve will work before actually
2941       doing it. */
2942 
2943    /* Check that the name at least begins in client-accessible storage. */
2944    if (ARG1 == 0 /* obviously bogus */
2945        || !VG_(am_is_valid_for_client)( ARG1, 1, VKI_PROT_READ )) {
2946       SET_STATUS_Failure( VKI_EFAULT );
2947       return;
2948    }
2949 
2950    // debug-only printing
2951    if (0) {
2952       VG_(printf)("ARG1 = %p(%s)\n", (void*)(Addr)ARG1, (HChar*)(Addr)ARG1);
2953       if (ARG2) {
2954          VG_(printf)("ARG2 = ");
2955          Int q;
2956          HChar** vec = (HChar**)(Addr)ARG2;
2957          for (q = 0; vec[q]; q++)
2958             VG_(printf)("%p(%s) ", vec[q], vec[q]);
2959          VG_(printf)("\n");
2960       } else {
2961          VG_(printf)("ARG2 = null\n");
2962       }
2963    }
2964 
2965    // Decide whether or not we want to follow along
2966    { // Make 'child_argv' be a pointer to the child's arg vector
2967      // (skipping the exe name)
2968      const HChar** child_argv = (const HChar**)(Addr)ARG2;
2969      if (child_argv && child_argv[0] == NULL)
2970         child_argv = NULL;
2971      trace_this_child = VG_(should_we_trace_this_child)( (HChar*)(Addr)ARG1,
2972                                                           child_argv );
2973    }
2974 
2975    // Do the important checks:  it is a file, is executable, permissions are
2976    // ok, etc.  We allow setuid executables to run only in the case when
2977    // we are not simulating them, that is, they to be run natively.
2978    setuid_allowed = trace_this_child  ? False  : True;
2979    res = VG_(pre_exec_check)((const HChar *)(Addr)ARG1, NULL, setuid_allowed);
2980    if (sr_isError(res)) {
2981       SET_STATUS_Failure( sr_Err(res) );
2982       return;
2983    }
2984 
2985    /* If we're tracing the child, and the launcher name looks bogus
2986       (possibly because launcher.c couldn't figure it out, see
2987       comments therein) then we have no option but to fail. */
2988    if (trace_this_child
2989        && (VG_(name_of_launcher) == NULL
2990            || VG_(name_of_launcher)[0] != '/')) {
2991       SET_STATUS_Failure( VKI_ECHILD ); /* "No child processes" */
2992       return;
2993    }
2994 
2995    /* After this point, we can't recover if the execve fails. */
2996    VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)(Addr)ARG1);
2997 
2998 
2999    // Terminate gdbserver if it is active.
3000    if (VG_(clo_vgdb)  != Vg_VgdbNo) {
3001       // If the child will not be traced, we need to terminate gdbserver
3002       // to cleanup the gdbserver resources (e.g. the FIFO files).
3003       // If child will be traced, we also terminate gdbserver: the new
3004       // Valgrind will start a fresh gdbserver after exec.
3005       VG_(gdbserver) (0);
3006    }
3007 
3008    /* Resistance is futile.  Nuke all other threads.  POSIX mandates
3009       this. (Really, nuke them all, since the new process will make
3010       its own new thread.) */
3011    VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
3012    VG_(reap_threads)(tid);
3013 
3014    // Set up the child's exe path.
3015    //
3016    if (trace_this_child) {
3017 
3018       // We want to exec the launcher.  Get its pre-remembered path.
3019       path = VG_(name_of_launcher);
3020       // VG_(name_of_launcher) should have been acquired by m_main at
3021       // startup.
3022       vg_assert(path);
3023 
3024       launcher_basename = VG_(strrchr)(path, '/');
3025       if (launcher_basename == NULL || launcher_basename[1] == 0) {
3026          launcher_basename = path;  // hmm, tres dubious
3027       } else {
3028          launcher_basename++;
3029       }
3030 
3031    } else {
3032       path = (HChar*)(Addr)ARG1;
3033    }
3034 
3035    // Set up the child's environment.
3036    //
3037    // Remove the valgrind-specific stuff from the environment so the
3038    // child doesn't get vgpreload_core.so, vgpreload_<tool>.so, etc.
3039    // This is done unconditionally, since if we are tracing the child,
3040    // the child valgrind will set up the appropriate client environment.
3041    // Nb: we make a copy of the environment before trying to mangle it
3042    // as it might be in read-only memory (this was bug #101881).
3043    //
3044    // Then, if tracing the child, set VALGRIND_LIB for it.
3045    //
3046    if (ARG3 == 0) {
3047       envp = NULL;
3048    } else {
3049       envp = VG_(env_clone)( (HChar**)(Addr)ARG3 );
3050       if (envp == NULL) goto hosed;
3051       VG_(env_remove_valgrind_env_stuff)( envp, True /*ro_strings*/, NULL );
3052    }
3053 
3054    if (trace_this_child) {
3055       // Set VALGRIND_LIB in ARG3 (the environment)
3056       VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
3057    }
3058 
3059    // Set up the child's args.  If not tracing it, they are
3060    // simply ARG2.  Otherwise, they are
3061    //
3062    // [launcher_basename] ++ VG_(args_for_valgrind) ++ [ARG1] ++ ARG2[1..]
3063    //
3064    // except that the first VG_(args_for_valgrind_noexecpass) args
3065    // are omitted.
3066    //
3067    if (!trace_this_child) {
3068       argv = (HChar**)(Addr)ARG2;
3069    } else {
3070       vg_assert( VG_(args_for_valgrind) );
3071       vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
3072       vg_assert( VG_(args_for_valgrind_noexecpass)
3073                    <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
3074       /* how many args in total will there be? */
3075       // launcher basename
3076       tot_args = 1;
3077       // V's args
3078       tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
3079       tot_args -= VG_(args_for_valgrind_noexecpass);
3080       // name of client exe
3081       tot_args++;
3082       // args for client exe, skipping [0]
3083       arg2copy = (HChar**)(Addr)ARG2;
3084       if (arg2copy && arg2copy[0]) {
3085          for (i = 1; arg2copy[i]; i++)
3086             tot_args++;
3087       }
3088       // allocate
3089       argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
3090                           (tot_args+1) * sizeof(HChar*) );
3091       // copy
3092       j = 0;
3093       argv[j++] = launcher_basename;
3094       for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
3095          if (i < VG_(args_for_valgrind_noexecpass))
3096             continue;
3097          argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
3098       }
3099       argv[j++] = (HChar*)(Addr)ARG1;
3100       if (arg2copy && arg2copy[0])
3101          for (i = 1; arg2copy[i]; i++)
3102             argv[j++] = arg2copy[i];
3103       argv[j++] = NULL;
3104       // check
3105       vg_assert(j == tot_args+1);
3106    }
3107 
3108    /*
3109       Set the signal state up for exec.
3110 
3111       We need to set the real signal state to make sure the exec'd
3112       process gets SIG_IGN properly.
3113 
3114       Also set our real sigmask to match the client's sigmask so that
3115       the exec'd child will get the right mask.  First we need to
3116       clear out any pending signals so they they don't get delivered,
3117       which would confuse things.
3118 
3119       XXX This is a bug - the signals should remain pending, and be
3120       delivered to the new process after exec.  There's also a
3121       race-condition, since if someone delivers us a signal between
3122       the sigprocmask and the execve, we'll still get the signal. Oh
3123       well.
3124    */
3125    {
3126       vki_sigset_t allsigs;
3127       vki_siginfo_t info;
3128 
3129       /* What this loop does: it queries SCSS (the signal state that
3130          the client _thinks_ the kernel is in) by calling
3131          VG_(do_sys_sigaction), and modifies the real kernel signal
3132          state accordingly. */
3133       for (i = 1; i < VG_(max_signal); i++) {
3134          vki_sigaction_fromK_t sa_f;
3135          vki_sigaction_toK_t   sa_t;
3136          VG_(do_sys_sigaction)(i, NULL, &sa_f);
3137          VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
3138          if (sa_t.ksa_handler == VKI_SIG_IGN)
3139             VG_(sigaction)(i, &sa_t, NULL);
3140          else {
3141             sa_t.ksa_handler = VKI_SIG_DFL;
3142             VG_(sigaction)(i, &sa_t, NULL);
3143          }
3144       }
3145 
3146       VG_(sigfillset)(&allsigs);
3147       while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
3148          ;
3149 
3150       VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
3151    }
3152 
3153    if (0) {
3154       HChar **cpp;
3155       VG_(printf)("exec: %s\n", path);
3156       for (cpp = argv; cpp && *cpp; cpp++)
3157          VG_(printf)("argv: %s\n", *cpp);
3158       if (0)
3159          for (cpp = envp; cpp && *cpp; cpp++)
3160             VG_(printf)("env: %s\n", *cpp);
3161    }
3162 
3163    SET_STATUS_from_SysRes(
3164       VG_(do_syscall3)(__NR_execve, (UWord)path, (UWord)argv, (UWord)envp)
3165    );
3166 
3167    /* If we got here, then the execve failed.  We've already made way
3168       too much of a mess to continue, so we have to abort. */
3169   hosed:
3170    vg_assert(FAILURE);
3171    VG_(message)(Vg_UserMsg, "execve(%#" FMT_REGWORD "x(%s), %#" FMT_REGWORD
3172                 "x, %#" FMT_REGWORD "x) failed, errno %lu\n",
3173                 ARG1, (HChar*)(Addr)ARG1, ARG2, ARG3, ERR);
3174    VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
3175                             "execve() failing, so I'm dying.\n");
3176    VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execve), "
3177                             "or work out how to recover.\n");
3178    VG_(exit)(101);
3179 }
3180 
PRE(sys_access)3181 PRE(sys_access)
3182 {
3183    PRINT("sys_access ( %#" FMT_REGWORD "x(%s), %ld )", ARG1,
3184          (HChar*)(Addr)ARG1, SARG2);
3185    PRE_REG_READ2(long, "access", const char *, pathname, int, mode);
3186    PRE_MEM_RASCIIZ( "access(pathname)", ARG1 );
3187 }
3188 
PRE(sys_alarm)3189 PRE(sys_alarm)
3190 {
3191    PRINT("sys_alarm ( %" FMT_REGWORD "u )", ARG1);
3192    PRE_REG_READ1(unsigned long, "alarm", unsigned int, seconds);
3193 }
3194 
PRE(sys_brk)3195 PRE(sys_brk)
3196 {
3197    Addr brk_limit = VG_(brk_limit);
3198    Addr brk_new;
3199 
3200    /* libc   says: int   brk(void *end_data_segment);
3201       kernel says: void* brk(void* end_data_segment);  (more or less)
3202 
3203       libc returns 0 on success, and -1 (and sets errno) on failure.
3204       Nb: if you ask to shrink the dataseg end below what it
3205       currently is, that always succeeds, even if the dataseg end
3206       doesn't actually change (eg. brk(0)).  Unless it seg faults.
3207 
3208       Kernel returns the new dataseg end.  If the brk() failed, this
3209       will be unchanged from the old one.  That's why calling (kernel)
3210       brk(0) gives the current dataseg end (libc brk() just returns
3211       zero in that case).
3212 
3213       Both will seg fault if you shrink it back into a text segment.
3214    */
3215    PRINT("sys_brk ( %#" FMT_REGWORD "x )", ARG1);
3216    PRE_REG_READ1(unsigned long, "brk", unsigned long, end_data_segment);
3217 
3218    brk_new = do_brk(ARG1, tid);
3219    SET_STATUS_Success( brk_new );
3220 
3221    if (brk_new == ARG1) {
3222       /* brk() succeeded */
3223       if (brk_new < brk_limit) {
3224          /* successfully shrunk the data segment. */
3225          VG_TRACK( die_mem_brk, (Addr)ARG1,
3226 		   brk_limit-ARG1 );
3227       } else
3228       if (brk_new > brk_limit) {
3229          /* successfully grew the data segment */
3230          VG_TRACK( new_mem_brk, brk_limit,
3231                    ARG1-brk_limit, tid );
3232       }
3233    } else {
3234       /* brk() failed */
3235       vg_assert(brk_limit == brk_new);
3236    }
3237 }
3238 
PRE(sys_chdir)3239 PRE(sys_chdir)
3240 {
3241    FUSE_COMPATIBLE_MAY_BLOCK();
3242    PRINT("sys_chdir ( %#" FMT_REGWORD "x(%s) )", ARG1,(char*)(Addr)ARG1);
3243    PRE_REG_READ1(long, "chdir", const char *, path);
3244    PRE_MEM_RASCIIZ( "chdir(path)", ARG1 );
3245 }
3246 
PRE(sys_chmod)3247 PRE(sys_chmod)
3248 {
3249    FUSE_COMPATIBLE_MAY_BLOCK();
3250    PRINT("sys_chmod ( %#" FMT_REGWORD "x(%s), %" FMT_REGWORD "u )", ARG1,
3251          (HChar*)(Addr)ARG1, ARG2);
3252    PRE_REG_READ2(long, "chmod", const char *, path, vki_mode_t, mode);
3253    PRE_MEM_RASCIIZ( "chmod(path)", ARG1 );
3254 }
3255 
PRE(sys_chown)3256 PRE(sys_chown)
3257 {
3258    FUSE_COMPATIBLE_MAY_BLOCK();
3259    PRINT("sys_chown ( %#" FMT_REGWORD "x(%s), 0x%" FMT_REGWORD "x, 0x%"
3260          FMT_REGWORD "x )", ARG1,(char*)(Addr)ARG1,ARG2,ARG3);
3261    PRE_REG_READ3(long, "chown",
3262                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
3263    PRE_MEM_RASCIIZ( "chown(path)", ARG1 );
3264 }
3265 
PRE(sys_lchown)3266 PRE(sys_lchown)
3267 {
3268    FUSE_COMPATIBLE_MAY_BLOCK();
3269    PRINT("sys_lchown ( %#" FMT_REGWORD "x(%s), 0x%" FMT_REGWORD "x, 0x%"
3270          FMT_REGWORD "x )", ARG1,(char*)(Addr)ARG1,ARG2,ARG3);
3271    PRE_REG_READ3(long, "lchown",
3272                  const char *, path, vki_uid_t, owner, vki_gid_t, group);
3273    PRE_MEM_RASCIIZ( "lchown(path)", ARG1 );
3274 }
3275 
PRE(sys_close)3276 PRE(sys_close)
3277 {
3278    FUSE_COMPATIBLE_MAY_BLOCK();
3279    PRINT("sys_close ( %" FMT_REGWORD "u )", ARG1);
3280    PRE_REG_READ1(long, "close", unsigned int, fd);
3281 
3282    /* Detect and negate attempts by the client to close Valgrind's log fd */
3283    if ( (!ML_(fd_allowed)(ARG1, "close", tid, False))
3284         /* If doing -d style logging (which is to fd=2), don't
3285            allow that to be closed either. */
3286         || (ARG1 == 2/*stderr*/ && VG_(debugLog_getLevel)() > 0) )
3287       SET_STATUS_Failure( VKI_EBADF );
3288 }
3289 
POST(sys_close)3290 POST(sys_close)
3291 {
3292    if (VG_(clo_track_fds)) ML_(record_fd_close)(ARG1);
3293 }
3294 
PRE(sys_dup)3295 PRE(sys_dup)
3296 {
3297    PRINT("sys_dup ( %" FMT_REGWORD "u )", ARG1);
3298    PRE_REG_READ1(long, "dup", unsigned int, oldfd);
3299 }
3300 
POST(sys_dup)3301 POST(sys_dup)
3302 {
3303    vg_assert(SUCCESS);
3304    if (!ML_(fd_allowed)(RES, "dup", tid, True)) {
3305       VG_(close)(RES);
3306       SET_STATUS_Failure( VKI_EMFILE );
3307    } else {
3308       if (VG_(clo_track_fds))
3309          ML_(record_fd_open_named)(tid, RES);
3310    }
3311 }
3312 
PRE(sys_dup2)3313 PRE(sys_dup2)
3314 {
3315    PRINT("sys_dup2 ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2);
3316    PRE_REG_READ2(long, "dup2", unsigned int, oldfd, unsigned int, newfd);
3317    if (!ML_(fd_allowed)(ARG2, "dup2", tid, True))
3318       SET_STATUS_Failure( VKI_EBADF );
3319 }
3320 
POST(sys_dup2)3321 POST(sys_dup2)
3322 {
3323    vg_assert(SUCCESS);
3324    if (VG_(clo_track_fds))
3325       ML_(record_fd_open_named)(tid, RES);
3326 }
3327 
PRE(sys_fchdir)3328 PRE(sys_fchdir)
3329 {
3330    FUSE_COMPATIBLE_MAY_BLOCK();
3331    PRINT("sys_fchdir ( %" FMT_REGWORD "u )", ARG1);
3332    PRE_REG_READ1(long, "fchdir", unsigned int, fd);
3333 }
3334 
PRE(sys_fchown)3335 PRE(sys_fchown)
3336 {
3337    FUSE_COMPATIBLE_MAY_BLOCK();
3338    PRINT("sys_fchown ( %" FMT_REGWORD "u, %" FMT_REGWORD "u, %"
3339          FMT_REGWORD "u )", ARG1, ARG2, ARG3);
3340    PRE_REG_READ3(long, "fchown",
3341                  unsigned int, fd, vki_uid_t, owner, vki_gid_t, group);
3342 }
3343 
PRE(sys_fchmod)3344 PRE(sys_fchmod)
3345 {
3346    FUSE_COMPATIBLE_MAY_BLOCK();
3347    PRINT("sys_fchmod ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2);
3348    PRE_REG_READ2(long, "fchmod", unsigned int, fildes, vki_mode_t, mode);
3349 }
3350 
PRE(sys_newfstat)3351 PRE(sys_newfstat)
3352 {
3353    FUSE_COMPATIBLE_MAY_BLOCK();
3354    PRINT("sys_newfstat ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )", ARG1, ARG2);
3355    PRE_REG_READ2(long, "fstat", unsigned int, fd, struct stat *, buf);
3356    PRE_MEM_WRITE( "fstat(buf)", ARG2, sizeof(struct vki_stat) );
3357 }
3358 
POST(sys_newfstat)3359 POST(sys_newfstat)
3360 {
3361    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3362 }
3363 
3364 #if !defined(VGO_solaris) && !defined(VGP_arm64_linux)
3365 static vki_sigset_t fork_saved_mask;
3366 
3367 // In Linux, the sys_fork() function varies across architectures, but we
3368 // ignore the various args it gets, and so it looks arch-neutral.  Hmm.
PRE(sys_fork)3369 PRE(sys_fork)
3370 {
3371    Bool is_child;
3372    Int child_pid;
3373    vki_sigset_t mask;
3374 
3375    PRINT("sys_fork ( )");
3376    PRE_REG_READ0(long, "fork");
3377 
3378    /* Block all signals during fork, so that we can fix things up in
3379       the child without being interrupted. */
3380    VG_(sigfillset)(&mask);
3381    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, &fork_saved_mask);
3382 
3383    VG_(do_atfork_pre)(tid);
3384 
3385    SET_STATUS_from_SysRes( VG_(do_syscall0)(__NR_fork) );
3386 
3387    if (!SUCCESS) return;
3388 
3389 #if defined(VGO_linux) || defined(VGO_dragonfly)
3390    // RES is 0 for child, non-0 (the child's PID) for parent.
3391    is_child = ( RES == 0 ? True : False );
3392    child_pid = ( is_child ? -1 : RES );
3393 #elif defined(VGO_darwin)
3394    // RES is the child's pid.  RESHI is 1 for child, 0 for parent.
3395    is_child = RESHI;
3396    child_pid = RES;
3397 #else
3398 #  error Unknown OS
3399 #endif
3400 
3401    if (is_child) {
3402       VG_(do_atfork_child)(tid);
3403 
3404       /* restore signal mask */
3405       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3406    } else {
3407       VG_(do_atfork_parent)(tid);
3408 
3409       PRINT("   fork: process %d created child %d\n", VG_(getpid)(), child_pid);
3410 
3411       /* restore signal mask */
3412       VG_(sigprocmask)(VKI_SIG_SETMASK, &fork_saved_mask, NULL);
3413    }
3414 }
3415 #endif // !defined(VGO_solaris) && !defined(VGP_arm64_linux)
3416 
3417 // ftruncate/truncate have padded arguments on Dragonfly.
3418 #if !defined(VGO_dragonfly)
PRE(sys_ftruncate)3419 PRE(sys_ftruncate)
3420 {
3421    *flags |= SfMayBlock;
3422    PRINT("sys_ftruncate ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2);
3423    PRE_REG_READ2(long, "ftruncate", unsigned int, fd, unsigned long, length);
3424 }
3425 #endif
3426 
PRE(sys_truncate)3427 PRE(sys_truncate)
3428 {
3429    *flags |= SfMayBlock;
3430    PRINT("sys_truncate ( %#" FMT_REGWORD "x(%s), %" FMT_REGWORD "u )",
3431          ARG1, (HChar*)(Addr)ARG1, ARG2);
3432    PRE_REG_READ2(long, "truncate",
3433                  const char *, path, unsigned long, length);
3434    PRE_MEM_RASCIIZ( "truncate(path)", ARG1 );
3435 }
3436 
PRE(sys_ftruncate64)3437 PRE(sys_ftruncate64)
3438 {
3439    *flags |= SfMayBlock;
3440 #if VG_WORDSIZE == 4
3441    PRINT("sys_ftruncate64 ( %" FMT_REGWORD "u, %llu )", ARG1,
3442          MERGE64(ARG2,ARG3));
3443    PRE_REG_READ3(long, "ftruncate64",
3444                  unsigned int, fd,
3445                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3446 #else
3447    PRINT("sys_ftruncate64 ( %lu, %lu )", ARG1, ARG2);
3448    PRE_REG_READ2(long, "ftruncate64",
3449                  unsigned int,fd, UWord,length);
3450 #endif
3451 }
3452 
PRE(sys_truncate64)3453 PRE(sys_truncate64)
3454 {
3455    *flags |= SfMayBlock;
3456 #if VG_WORDSIZE == 4
3457    PRINT("sys_truncate64 ( %#" FMT_REGWORD "x, %lld )", ARG1,
3458          (Long)MERGE64(ARG2, ARG3));
3459    PRE_REG_READ3(long, "truncate64",
3460                  const char *, path,
3461                  UWord, MERGE64_FIRST(length), UWord, MERGE64_SECOND(length));
3462 #else
3463    PRINT("sys_truncate64 ( %#lx, %lld )", ARG1, (Long)ARG2);
3464    PRE_REG_READ2(long, "truncate64",
3465                  const char *,path, UWord,length);
3466 #endif
3467    PRE_MEM_RASCIIZ( "truncate64(path)", ARG1 );
3468 }
3469 
PRE(sys_getdents)3470 PRE(sys_getdents)
3471 {
3472    *flags |= SfMayBlock;
3473    PRINT("sys_getdents ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %" FMT_REGWORD
3474          "u )", ARG1, ARG2, ARG3);
3475    PRE_REG_READ3(long, "getdents",
3476                  unsigned int, fd, struct vki_dirent *, dirp,
3477                  unsigned int, count);
3478    PRE_MEM_WRITE( "getdents(dirp)", ARG2, ARG3 );
3479 }
3480 
POST(sys_getdents)3481 POST(sys_getdents)
3482 {
3483    vg_assert(SUCCESS);
3484    if (RES > 0)
3485       POST_MEM_WRITE( ARG2, RES );
3486 }
3487 
PRE(sys_getdents64)3488 PRE(sys_getdents64)
3489 {
3490    *flags |= SfMayBlock;
3491    PRINT("sys_getdents64 ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
3492          FMT_REGWORD "u )",ARG1, ARG2, ARG3);
3493    PRE_REG_READ3(long, "getdents64",
3494                  unsigned int, fd, struct vki_dirent64 *, dirp,
3495                  unsigned int, count);
3496    PRE_MEM_WRITE( "getdents64(dirp)", ARG2, ARG3 );
3497 }
3498 
POST(sys_getdents64)3499 POST(sys_getdents64)
3500 {
3501    vg_assert(SUCCESS);
3502    if (RES > 0)
3503       POST_MEM_WRITE( ARG2, RES );
3504 }
3505 
PRE(sys_getgroups)3506 PRE(sys_getgroups)
3507 {
3508    PRINT("sys_getgroups ( %ld, %#" FMT_REGWORD "x )", SARG1, ARG2);
3509    PRE_REG_READ2(long, "getgroups", int, size, vki_gid_t *, list);
3510    if (ARG1 > 0)
3511       PRE_MEM_WRITE( "getgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
3512 }
3513 
POST(sys_getgroups)3514 POST(sys_getgroups)
3515 {
3516    vg_assert(SUCCESS);
3517    if (ARG1 > 0 && RES > 0)
3518       POST_MEM_WRITE( ARG2, RES * sizeof(vki_gid_t) );
3519 }
3520 
PRE(sys_getcwd)3521 PRE(sys_getcwd)
3522 {
3523    // Comment from linux/fs/dcache.c:
3524    //   NOTE! The user-level library version returns a character pointer.
3525    //   The kernel system call just returns the length of the buffer filled
3526    //   (which includes the ending '\0' character), or a negative error
3527    //   value.
3528    // Is this Linux-specific?  If so it should be moved to syswrap-linux.c.
3529    PRINT("sys_getcwd ( %#" FMT_REGWORD "x, %llu )", ARG1,(ULong)ARG2);
3530    PRE_REG_READ2(long, "getcwd", char *, buf, unsigned long, size);
3531    PRE_MEM_WRITE( "getcwd(buf)", ARG1, ARG2 );
3532 }
3533 
POST(sys_getcwd)3534 POST(sys_getcwd)
3535 {
3536    vg_assert(SUCCESS);
3537    if (RES != (Addr)NULL)
3538       POST_MEM_WRITE( ARG1, RES );
3539 }
3540 
PRE(sys_geteuid)3541 PRE(sys_geteuid)
3542 {
3543    PRINT("sys_geteuid ( )");
3544    PRE_REG_READ0(long, "geteuid");
3545 }
3546 
PRE(sys_getegid)3547 PRE(sys_getegid)
3548 {
3549    PRINT("sys_getegid ( )");
3550    PRE_REG_READ0(long, "getegid");
3551 }
3552 
PRE(sys_getgid)3553 PRE(sys_getgid)
3554 {
3555    PRINT("sys_getgid ( )");
3556    PRE_REG_READ0(long, "getgid");
3557 }
3558 
PRE(sys_getpid)3559 PRE(sys_getpid)
3560 {
3561    PRINT("sys_getpid ()");
3562    PRE_REG_READ0(long, "getpid");
3563 }
3564 
PRE(sys_getpgid)3565 PRE(sys_getpgid)
3566 {
3567    PRINT("sys_getpgid ( %ld )", SARG1);
3568    PRE_REG_READ1(long, "getpgid", vki_pid_t, pid);
3569 }
3570 
PRE(sys_getpgrp)3571 PRE(sys_getpgrp)
3572 {
3573    PRINT("sys_getpgrp ()");
3574    PRE_REG_READ0(long, "getpgrp");
3575 }
3576 
PRE(sys_getppid)3577 PRE(sys_getppid)
3578 {
3579    PRINT("sys_getppid ()");
3580    PRE_REG_READ0(long, "getppid");
3581 }
3582 
common_post_getrlimit(ThreadId tid,UWord a1,UWord a2)3583 static void common_post_getrlimit(ThreadId tid, UWord a1, UWord a2)
3584 {
3585    POST_MEM_WRITE( a2, sizeof(struct vki_rlimit) );
3586 
3587 #ifdef _RLIMIT_POSIX_FLAG
3588    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on getrlimit calls.
3589    // Unset it here to make the switch case below work correctly.
3590    a1 &= ~_RLIMIT_POSIX_FLAG;
3591 #endif
3592 
3593    switch (a1) {
3594    case VKI_RLIMIT_NOFILE:
3595       ((struct vki_rlimit *)a2)->rlim_cur = VG_(fd_soft_limit);
3596       ((struct vki_rlimit *)a2)->rlim_max = VG_(fd_hard_limit);
3597       break;
3598 
3599    case VKI_RLIMIT_DATA:
3600       *((struct vki_rlimit *)a2) = VG_(client_rlimit_data);
3601       break;
3602 
3603    case VKI_RLIMIT_STACK:
3604       *((struct vki_rlimit *)a2) = VG_(client_rlimit_stack);
3605       break;
3606    }
3607 }
3608 
PRE(sys_old_getrlimit)3609 PRE(sys_old_getrlimit)
3610 {
3611    PRINT("sys_old_getrlimit ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )",
3612          ARG1, ARG2);
3613    PRE_REG_READ2(long, "old_getrlimit",
3614                  unsigned int, resource, struct rlimit *, rlim);
3615    PRE_MEM_WRITE( "old_getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3616 }
3617 
POST(sys_old_getrlimit)3618 POST(sys_old_getrlimit)
3619 {
3620    common_post_getrlimit(tid, ARG1, ARG2);
3621 }
3622 
PRE(sys_getrlimit)3623 PRE(sys_getrlimit)
3624 {
3625    PRINT("sys_getrlimit ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )", ARG1, ARG2);
3626    PRE_REG_READ2(long, "getrlimit",
3627                  unsigned int, resource, struct rlimit *, rlim);
3628    PRE_MEM_WRITE( "getrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
3629 }
3630 
POST(sys_getrlimit)3631 POST(sys_getrlimit)
3632 {
3633    common_post_getrlimit(tid, ARG1, ARG2);
3634 }
3635 
PRE(sys_getrusage)3636 PRE(sys_getrusage)
3637 {
3638    PRINT("sys_getrusage ( %ld, %#" FMT_REGWORD "x )", SARG1, ARG2);
3639    PRE_REG_READ2(long, "getrusage", int, who, struct rusage *, usage);
3640    PRE_MEM_WRITE( "getrusage(usage)", ARG2, sizeof(struct vki_rusage) );
3641 }
3642 
POST(sys_getrusage)3643 POST(sys_getrusage)
3644 {
3645    vg_assert(SUCCESS);
3646    if (RES == 0)
3647       POST_MEM_WRITE( ARG2, sizeof(struct vki_rusage) );
3648 }
3649 
PRE(sys_gettimeofday)3650 PRE(sys_gettimeofday)
3651 {
3652    PRINT("sys_gettimeofday ( %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",
3653          ARG1,ARG2);
3654    PRE_REG_READ2(long, "gettimeofday",
3655                  struct timeval *, tv, struct timezone *, tz);
3656    // GrP fixme does darwin write to *tz anymore?
3657    if (ARG1 != 0)
3658       PRE_timeval_WRITE( "gettimeofday(tv)", (Addr)ARG1 );
3659    if (ARG2 != 0)
3660       PRE_MEM_WRITE( "gettimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3661 }
3662 
POST(sys_gettimeofday)3663 POST(sys_gettimeofday)
3664 {
3665    vg_assert(SUCCESS);
3666    if (RES == 0) {
3667       if (ARG1 != 0)
3668          POST_timeval_WRITE( (Addr)ARG1 );
3669       if (ARG2 != 0)
3670 	 POST_MEM_WRITE( ARG2, sizeof(struct vki_timezone) );
3671    }
3672 }
3673 
PRE(sys_settimeofday)3674 PRE(sys_settimeofday)
3675 {
3676    PRINT("sys_settimeofday ( %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",
3677          ARG1,ARG2);
3678    PRE_REG_READ2(long, "settimeofday",
3679                  struct timeval *, tv, struct timezone *, tz);
3680    if (ARG1 != 0)
3681       PRE_timeval_READ( "settimeofday(tv)", (Addr)ARG1 );
3682    if (ARG2 != 0) {
3683       PRE_MEM_READ( "settimeofday(tz)", ARG2, sizeof(struct vki_timezone) );
3684       /* maybe should warn if tz->tz_dsttime is non-zero? */
3685    }
3686 }
3687 
PRE(sys_getuid)3688 PRE(sys_getuid)
3689 {
3690    PRINT("sys_getuid ( )");
3691    PRE_REG_READ0(long, "getuid");
3692 }
3693 
ML_(PRE_unknown_ioctl)3694 void ML_(PRE_unknown_ioctl)(ThreadId tid, UWord request, UWord arg)
3695 {
3696    /* We don't have any specific information on it, so
3697       try to do something reasonable based on direction and
3698       size bits.  The encoding scheme is described in
3699       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3700 
3701       According to Simon Hausmann, _IOC_READ means the kernel
3702       writes a value to the ioctl value passed from the user
3703       space and the other way around with _IOC_WRITE. */
3704 
3705 #if defined(VGO_solaris)
3706    /* Majority of Solaris ioctl requests does not honour direction hints. */
3707    UInt dir  = _VKI_IOC_NONE;
3708 #else
3709    UInt dir  = _VKI_IOC_DIR(request);
3710 #endif
3711    UInt size = _VKI_IOC_SIZE(request);
3712 
3713    if (SimHintiS(SimHint_lax_ioctls, VG_(clo_sim_hints))) {
3714       /*
3715        * Be very lax about ioctl handling; the only
3716        * assumption is that the size is correct. Doesn't
3717        * require the full buffer to be initialized when
3718        * writing.  Without this, using some device
3719        * drivers with a large number of strange ioctl
3720        * commands becomes very tiresome.
3721        */
3722    } else if (/* size == 0 || */ dir == _VKI_IOC_NONE) {
3723       static UWord unknown_ioctl[10];
3724       static Int moans = sizeof(unknown_ioctl) / sizeof(unknown_ioctl[0]);
3725 
3726       if (moans > 0 && !VG_(clo_xml)) {
3727          /* Check if have not already moaned for this request. */
3728          UInt i;
3729          for (i = 0; i < sizeof(unknown_ioctl)/sizeof(unknown_ioctl[0]); i++) {
3730             if (unknown_ioctl[i] == request)
3731                break;
3732             if (unknown_ioctl[i] == 0) {
3733                unknown_ioctl[i] = request;
3734                moans--;
3735                VG_(umsg)("Warning: noted but unhandled ioctl 0x%lx"
3736                          " with no size/direction hints.\n", request);
3737                VG_(umsg)("   This could cause spurious value errors to appear.\n");
3738                VG_(umsg)("   See README_MISSING_SYSCALL_OR_IOCTL for "
3739                          "guidance on writing a proper wrapper.\n" );
3740                //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3741                return;
3742             }
3743          }
3744       }
3745    } else {
3746       //VG_(message)(Vg_UserMsg, "UNKNOWN ioctl %#lx\n", request);
3747       //VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
3748       if ((dir & _VKI_IOC_WRITE) && size > 0)
3749          PRE_MEM_READ( "ioctl(generic)", arg, size);
3750       if ((dir & _VKI_IOC_READ) && size > 0)
3751          PRE_MEM_WRITE( "ioctl(generic)", arg, size);
3752    }
3753 }
3754 
ML_(POST_unknown_ioctl)3755 void ML_(POST_unknown_ioctl)(ThreadId tid, UInt res, UWord request, UWord arg)
3756 {
3757    /* We don't have any specific information on it, so
3758       try to do something reasonable based on direction and
3759       size bits.  The encoding scheme is described in
3760       /usr/include/asm/ioctl.h or /usr/include/sys/ioccom.h .
3761 
3762       According to Simon Hausmann, _IOC_READ means the kernel
3763       writes a value to the ioctl value passed from the user
3764       space and the other way around with _IOC_WRITE. */
3765 
3766    UInt dir  = _VKI_IOC_DIR(request);
3767    UInt size = _VKI_IOC_SIZE(request);
3768    if (size > 0 && (dir & _VKI_IOC_READ)
3769        && res == 0
3770        && arg != (Addr)NULL) {
3771       POST_MEM_WRITE(arg, size);
3772    }
3773 }
3774 
3775 /*
3776    If we're sending a SIGKILL to one of our own threads, then simulate
3777    it rather than really sending the signal, so that the target thread
3778    gets a chance to clean up.  Returns True if we did the killing (or
3779    no killing is necessary), and False if the caller should use the
3780    normal kill syscall.
3781 
3782    "pid" is any pid argument which can be passed to kill; group kills
3783    (< -1, 0), and owner kills (-1) are ignored, on the grounds that
3784    they'll most likely hit all the threads and we won't need to worry
3785    about cleanup.  In truth, we can't fully emulate these multicast
3786    kills.
3787 
3788    "tgid" is a thread group id.  If it is not -1, then the target
3789    thread must be in that thread group.
3790  */
ML_(do_sigkill)3791 Bool ML_(do_sigkill)(Int pid, Int tgid)
3792 {
3793    ThreadState *tst;
3794    ThreadId tid;
3795 
3796    if (pid <= 0)
3797       return False;
3798 
3799    tid = VG_(lwpid_to_vgtid)(pid);
3800    if (tid == VG_INVALID_THREADID)
3801       return False;		/* none of our threads */
3802 
3803    tst = VG_(get_ThreadState)(tid);
3804    if (tst == NULL || tst->status == VgTs_Empty)
3805       return False;		/* hm, shouldn't happen */
3806 
3807    if (tgid != -1 && tst->os_state.threadgroup != tgid)
3808       return False;		/* not the right thread group */
3809 
3810    /* Check to see that the target isn't already exiting. */
3811    if (!VG_(is_exiting)(tid)) {
3812       if (VG_(clo_trace_signals))
3813 	 VG_(message)(Vg_DebugMsg,
3814                       "Thread %u being killed with SIGKILL\n",
3815                       tst->tid);
3816 
3817       tst->exitreason = VgSrc_FatalSig;
3818       tst->os_state.fatalsig = VKI_SIGKILL;
3819 
3820       if (!VG_(is_running_thread)(tid))
3821 	 VG_(get_thread_out_of_syscall)(tid);
3822    }
3823 
3824    return True;
3825 }
3826 
PRE(sys_kill)3827 PRE(sys_kill)
3828 {
3829    PRINT("sys_kill ( %ld, %ld )", SARG1, SARG2);
3830    PRE_REG_READ2(long, "kill", int, pid, int, signal);
3831    if (!ML_(client_signal_OK)(ARG2)) {
3832       SET_STATUS_Failure( VKI_EINVAL );
3833       return;
3834    }
3835 
3836    /* If we're sending SIGKILL, check to see if the target is one of
3837       our threads and handle it specially. */
3838    if (ARG2 == VKI_SIGKILL && ML_(do_sigkill)(ARG1, -1))
3839       SET_STATUS_Success(0);
3840    else
3841       /* re syscall3: Darwin has a 3rd arg, which is a flag (boolean)
3842          affecting how posix-compliant the call is.  I guess it is
3843          harmless to pass the 3rd arg on other platforms; hence pass
3844          it on all. */
3845       SET_STATUS_from_SysRes( VG_(do_syscall3)(SYSNO, ARG1, ARG2, ARG3) );
3846 
3847    if (VG_(clo_trace_signals))
3848       VG_(message)(Vg_DebugMsg, "kill: sent signal %ld to pid %ld\n",
3849 		   SARG2, SARG1);
3850 
3851    /* This kill might have given us a pending signal.  Ask for a check once
3852       the syscall is done. */
3853    *flags |= SfPollAfter;
3854 }
3855 
PRE(sys_link)3856 PRE(sys_link)
3857 {
3858    *flags |= SfMayBlock;
3859    PRINT("sys_link ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x(%s) )", ARG1,
3860          (char*)(Addr)ARG1,ARG2,(char*)(Addr)ARG2);
3861    PRE_REG_READ2(long, "link", const char *, oldpath, const char *, newpath);
3862    PRE_MEM_RASCIIZ( "link(oldpath)", ARG1);
3863    PRE_MEM_RASCIIZ( "link(newpath)", ARG2);
3864 }
3865 
PRE(sys_newlstat)3866 PRE(sys_newlstat)
3867 {
3868    PRINT("sys_newlstat ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )", ARG1,
3869          (char*)(Addr)ARG1,ARG2);
3870    PRE_REG_READ2(long, "lstat", char *, file_name, struct stat *, buf);
3871    PRE_MEM_RASCIIZ( "lstat(file_name)", ARG1 );
3872    PRE_MEM_WRITE( "lstat(buf)", ARG2, sizeof(struct vki_stat) );
3873 }
3874 
POST(sys_newlstat)3875 POST(sys_newlstat)
3876 {
3877    vg_assert(SUCCESS);
3878    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
3879 }
3880 
PRE(sys_mkdir)3881 PRE(sys_mkdir)
3882 {
3883    *flags |= SfMayBlock;
3884    PRINT("sys_mkdir ( %#" FMT_REGWORD "x(%s), %ld )", ARG1,
3885          (HChar*)(Addr)ARG1, SARG2);
3886    PRE_REG_READ2(long, "mkdir", const char *, pathname, int, mode);
3887    PRE_MEM_RASCIIZ( "mkdir(pathname)", ARG1 );
3888 }
3889 
PRE(sys_mprotect)3890 PRE(sys_mprotect)
3891 {
3892    PRINT("sys_mprotect ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %"
3893          FMT_REGWORD "u )", ARG1, ARG2, ARG3);
3894    PRE_REG_READ3(long, "mprotect",
3895                  unsigned long, addr, vki_size_t, len, unsigned long, prot);
3896 
3897    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "mprotect")) {
3898       SET_STATUS_Failure( VKI_ENOMEM );
3899    }
3900 #if defined(VKI_PROT_GROWSDOWN)
3901    else
3902    if (ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP)) {
3903       /* Deal with mprotects on growable stack areas.
3904 
3905          The critical files to understand all this are mm/mprotect.c
3906          in the kernel and sysdeps/unix/sysv/linux/dl-execstack.c in
3907          glibc.
3908 
3909          The kernel provides PROT_GROWSDOWN and PROT_GROWSUP which
3910          round the start/end address of mprotect to the start/end of
3911          the underlying vma and glibc uses that as an easy way to
3912          change the protection of the stack by calling mprotect on the
3913          last page of the stack with PROT_GROWSDOWN set.
3914 
3915          The sanity check provided by the kernel is that the vma must
3916          have the VM_GROWSDOWN/VM_GROWSUP flag set as appropriate.  */
3917       UInt grows = ARG3 & (VKI_PROT_GROWSDOWN|VKI_PROT_GROWSUP);
3918       NSegment const *aseg = VG_(am_find_nsegment)(ARG1);
3919       NSegment const *rseg;
3920 
3921       vg_assert(aseg);
3922 
3923       if (grows == VKI_PROT_GROWSDOWN) {
3924          rseg = VG_(am_next_nsegment)( aseg, False/*backwards*/ );
3925          if (rseg
3926              && rseg->kind == SkResvn
3927              && rseg->smode == SmUpper
3928              && rseg->end+1 == aseg->start) {
3929             Addr end = ARG1 + ARG2;
3930             ARG1 = aseg->start;
3931             ARG2 = end - aseg->start;
3932             ARG3 &= ~VKI_PROT_GROWSDOWN;
3933          } else {
3934             SET_STATUS_Failure( VKI_EINVAL );
3935          }
3936       } else if (grows == VKI_PROT_GROWSUP) {
3937          rseg = VG_(am_next_nsegment)( aseg, True/*forwards*/ );
3938          if (rseg
3939              && rseg->kind == SkResvn
3940              && rseg->smode == SmLower
3941              && aseg->end+1 == rseg->start) {
3942             ARG2 = aseg->end - ARG1 + 1;
3943             ARG3 &= ~VKI_PROT_GROWSUP;
3944          } else {
3945             SET_STATUS_Failure( VKI_EINVAL );
3946          }
3947       } else {
3948          /* both GROWSUP and GROWSDOWN */
3949          SET_STATUS_Failure( VKI_EINVAL );
3950       }
3951    }
3952 #endif   // defined(VKI_PROT_GROWSDOWN)
3953 }
3954 
POST(sys_mprotect)3955 POST(sys_mprotect)
3956 {
3957    Addr a    = ARG1;
3958    SizeT len = ARG2;
3959    Int  prot = ARG3;
3960 
3961    ML_(notify_core_and_tool_of_mprotect)(a, len, prot);
3962 }
3963 
PRE(sys_munmap)3964 PRE(sys_munmap)
3965 {
3966    if (0) VG_(printf)("  munmap( %#" FMT_REGWORD "x )\n", ARG1);
3967    PRINT("sys_munmap ( %#" FMT_REGWORD "x, %llu )", ARG1,(ULong)ARG2);
3968    PRE_REG_READ2(long, "munmap", unsigned long, start, vki_size_t, length);
3969 
3970    if (!ML_(valid_client_addr)(ARG1, ARG2, tid, "munmap"))
3971       SET_STATUS_Failure( VKI_EINVAL );
3972 }
3973 
POST(sys_munmap)3974 POST(sys_munmap)
3975 {
3976    Addr  a   = ARG1;
3977    SizeT len = ARG2;
3978 
3979    ML_(notify_core_and_tool_of_munmap)( a, len );
3980 }
3981 
PRE(sys_mincore)3982 PRE(sys_mincore)
3983 {
3984    PRINT("sys_mincore ( %#" FMT_REGWORD "x, %llu, %#" FMT_REGWORD "x )",
3985          ARG1, (ULong)ARG2, ARG3);
3986    PRE_REG_READ3(long, "mincore",
3987                  unsigned long, start, vki_size_t, length,
3988                  unsigned char *, vec);
3989    PRE_MEM_WRITE( "mincore(vec)", ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3990 }
POST(sys_mincore)3991 POST(sys_mincore)
3992 {
3993    POST_MEM_WRITE( ARG3, VG_PGROUNDUP(ARG2) / VKI_PAGE_SIZE );
3994 }
3995 
PRE(sys_nanosleep)3996 PRE(sys_nanosleep)
3997 {
3998    *flags |= SfMayBlock|SfPostOnFail;
3999    PRINT("sys_nanosleep ( %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )", ARG1,ARG2);
4000    PRE_REG_READ2(long, "nanosleep",
4001                  struct timespec *, req, struct timespec *, rem);
4002    PRE_MEM_READ( "nanosleep(req)", ARG1, sizeof(struct vki_timespec) );
4003    if (ARG2 != 0)
4004       PRE_MEM_WRITE( "nanosleep(rem)", ARG2, sizeof(struct vki_timespec) );
4005 }
4006 
POST(sys_nanosleep)4007 POST(sys_nanosleep)
4008 {
4009    vg_assert(SUCCESS || FAILURE);
4010    if (ARG2 != 0 && FAILURE && ERR == VKI_EINTR)
4011       POST_MEM_WRITE( ARG2, sizeof(struct vki_timespec) );
4012 }
4013 
4014 #if defined(VGO_linux) || defined(VGO_solaris)
4015 /* Handles the case where the open is of /proc/self/auxv or
4016    /proc/<pid>/auxv, and just gives out a copy of the fd for the
4017    fake file we cooked up at startup (in m_main).  Also, seeks the
4018    cloned fd back to the start.
4019    Returns True if auxv open was handled (status is set). */
ML_(handle_auxv_open)4020 Bool ML_(handle_auxv_open)(SyscallStatus *status, const HChar *filename,
4021                            int flags)
4022 {
4023    HChar  name[30];   // large enough
4024 
4025    if (!ML_(safe_to_deref)((const void *) filename, 1))
4026       return False;
4027 
4028    /* Opening /proc/<pid>/auxv or /proc/self/auxv? */
4029    VG_(sprintf)(name, "/proc/%d/auxv", VG_(getpid)());
4030    if (!VG_STREQ(filename, name) && !VG_STREQ(filename, "/proc/self/auxv"))
4031       return False;
4032 
4033    /* Allow to open the file only for reading. */
4034    if (flags & (VKI_O_WRONLY | VKI_O_RDWR)) {
4035       SET_STATUS_Failure(VKI_EACCES);
4036       return True;
4037    }
4038 
4039 #  if defined(VGO_solaris)
4040    VG_(sprintf)(name, "/proc/self/fd/%d", VG_(cl_auxv_fd));
4041    SysRes sres = VG_(open)(name, flags, 0);
4042    SET_STATUS_from_SysRes(sres);
4043 #  else
4044    SysRes sres = VG_(dup)(VG_(cl_auxv_fd));
4045    SET_STATUS_from_SysRes(sres);
4046    if (!sr_isError(sres)) {
4047       OffT off = VG_(lseek)(sr_Res(sres), 0, VKI_SEEK_SET);
4048       if (off < 0)
4049          SET_STATUS_Failure(VKI_EMFILE);
4050    }
4051 #  endif
4052 
4053    return True;
4054 }
4055 #endif // defined(VGO_linux) || defined(VGO_solaris)
4056 
PRE(sys_open)4057 PRE(sys_open)
4058 {
4059    if (ARG2 & VKI_O_CREAT) {
4060       // 3-arg version
4061       PRINT("sys_open ( %#" FMT_REGWORD "x(%s), %ld, %ld )",ARG1,
4062             (HChar*)(Addr)ARG1, SARG2, SARG3);
4063       PRE_REG_READ3(long, "open",
4064                     const char *, filename, int, flags, int, mode);
4065    } else {
4066       // 2-arg version
4067       PRINT("sys_open ( %#" FMT_REGWORD "x(%s), %ld )",ARG1,
4068             (HChar*)(Addr)ARG1, SARG2);
4069       PRE_REG_READ2(long, "open",
4070                     const char *, filename, int, flags);
4071    }
4072    PRE_MEM_RASCIIZ( "open(filename)", ARG1 );
4073 
4074 #if defined(VGO_linux)
4075    /* Handle the case where the open is of /proc/self/cmdline or
4076       /proc/<pid>/cmdline, and just give it a copy of the fd for the
4077       fake file we cooked up at startup (in m_main).  Also, seek the
4078       cloned fd back to the start. */
4079    {
4080       HChar  name[30];   // large enough
4081       HChar* arg1s = (HChar*) (Addr)ARG1;
4082       SysRes sres;
4083 
4084       VG_(sprintf)(name, "/proc/%d/cmdline", VG_(getpid)());
4085       if (ML_(safe_to_deref)( arg1s, 1 )
4086           && (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, "/proc/self/cmdline"))) {
4087          sres = VG_(dup)( VG_(cl_cmdline_fd) );
4088          SET_STATUS_from_SysRes( sres );
4089          if (!sr_isError(sres)) {
4090             OffT off = VG_(lseek)( sr_Res(sres), 0, VKI_SEEK_SET );
4091             if (off < 0)
4092                SET_STATUS_Failure( VKI_EMFILE );
4093          }
4094          return;
4095       }
4096    }
4097 
4098    /* Handle also the case of /proc/self/auxv or /proc/<pid>/auxv. */
4099    if (ML_(handle_auxv_open)(status, (const HChar *)(Addr)ARG1, ARG2))
4100       return;
4101 #endif // defined(VGO_linux)
4102 
4103    /* Otherwise handle normally */
4104    *flags |= SfMayBlock;
4105 }
4106 
POST(sys_open)4107 POST(sys_open)
4108 {
4109    vg_assert(SUCCESS);
4110    if (!ML_(fd_allowed)(RES, "open", tid, True)) {
4111       VG_(close)(RES);
4112       SET_STATUS_Failure( VKI_EMFILE );
4113    } else {
4114       if (VG_(clo_track_fds))
4115          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)(Addr)ARG1);
4116    }
4117 }
4118 
PRE(sys_read)4119 PRE(sys_read)
4120 {
4121    *flags |= SfMayBlock;
4122    PRINT("sys_read ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
4123          FMT_REGWORD "u )", ARG1, ARG2, ARG3);
4124    PRE_REG_READ3(ssize_t, "read",
4125                  unsigned int, fd, char *, buf, vki_size_t, count);
4126 
4127    if (!ML_(fd_allowed)(ARG1, "read", tid, False))
4128       SET_STATUS_Failure( VKI_EBADF );
4129    else
4130       PRE_MEM_WRITE( "read(buf)", ARG2, ARG3 );
4131 }
4132 
POST(sys_read)4133 POST(sys_read)
4134 {
4135    vg_assert(SUCCESS);
4136    POST_MEM_WRITE( ARG2, RES );
4137 }
4138 
PRE(sys_write)4139 PRE(sys_write)
4140 {
4141    Bool ok;
4142    *flags |= SfMayBlock;
4143    PRINT("sys_write ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
4144          FMT_REGWORD "u )", ARG1, ARG2, ARG3);
4145    PRE_REG_READ3(ssize_t, "write",
4146                  unsigned int, fd, const char *, buf, vki_size_t, count);
4147    /* check to see if it is allowed.  If not, try for an exemption from
4148       --sim-hints=enable-outer (used for self hosting). */
4149    ok = ML_(fd_allowed)(ARG1, "write", tid, False);
4150    if (!ok && ARG1 == 2/*stderr*/
4151            && SimHintiS(SimHint_enable_outer, VG_(clo_sim_hints)))
4152       ok = True;
4153 #if defined(VGO_solaris)
4154    if (!ok && VG_(vfork_fildes_addr) != NULL
4155        && *VG_(vfork_fildes_addr) >= 0 && *VG_(vfork_fildes_addr) == ARG1)
4156       ok = True;
4157 #endif
4158    if (!ok)
4159       SET_STATUS_Failure( VKI_EBADF );
4160    else
4161       PRE_MEM_READ( "write(buf)", ARG2, ARG3 );
4162 }
4163 
PRE(sys_creat)4164 PRE(sys_creat)
4165 {
4166    *flags |= SfMayBlock;
4167    PRINT("sys_creat ( %#" FMT_REGWORD "x(%s), %ld )", ARG1,
4168          (HChar*)(Addr)ARG1, SARG2);
4169    PRE_REG_READ2(long, "creat", const char *, pathname, int, mode);
4170    PRE_MEM_RASCIIZ( "creat(pathname)", ARG1 );
4171 }
4172 
POST(sys_creat)4173 POST(sys_creat)
4174 {
4175    vg_assert(SUCCESS);
4176    if (!ML_(fd_allowed)(RES, "creat", tid, True)) {
4177       VG_(close)(RES);
4178       SET_STATUS_Failure( VKI_EMFILE );
4179    } else {
4180       if (VG_(clo_track_fds))
4181          ML_(record_fd_open_with_given_name)(tid, RES, (HChar*)(Addr)ARG1);
4182    }
4183 }
4184 
PRE(sys_poll)4185 PRE(sys_poll)
4186 {
4187    /* struct pollfd {
4188         int fd;           -- file descriptor
4189         short events;     -- requested events
4190         short revents;    -- returned events
4191       };
4192       int poll(struct pollfd *ufds, unsigned int nfds, int timeout)
4193    */
4194    UInt i;
4195    struct vki_pollfd* ufds = (struct vki_pollfd *)(Addr)ARG1;
4196    *flags |= SfMayBlock;
4197    PRINT("sys_poll ( %#" FMT_REGWORD "x, %" FMT_REGWORD "u, %ld )\n",
4198          ARG1, ARG2, SARG3);
4199    PRE_REG_READ3(long, "poll",
4200                  struct vki_pollfd *, ufds, unsigned int, nfds, long, timeout);
4201 
4202    for (i = 0; i < ARG2; i++) {
4203       PRE_MEM_READ( "poll(ufds.fd)",
4204                     (Addr)(&ufds[i].fd), sizeof(ufds[i].fd) );
4205       PRE_MEM_READ( "poll(ufds.events)",
4206                     (Addr)(&ufds[i].events), sizeof(ufds[i].events) );
4207       PRE_MEM_WRITE( "poll(ufds.revents)",
4208                      (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
4209    }
4210 }
4211 
POST(sys_poll)4212 POST(sys_poll)
4213 {
4214    if (RES >= 0) {
4215       UInt i;
4216       struct vki_pollfd* ufds = (struct vki_pollfd *)(Addr)ARG1;
4217       for (i = 0; i < ARG2; i++)
4218 	 POST_MEM_WRITE( (Addr)(&ufds[i].revents), sizeof(ufds[i].revents) );
4219    }
4220 }
4221 
PRE(sys_readlink)4222 PRE(sys_readlink)
4223 {
4224    FUSE_COMPATIBLE_MAY_BLOCK();
4225    Word saved = SYSNO;
4226 
4227    PRINT("sys_readlink ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x, %llu )",
4228          ARG1, (char*)(Addr)ARG1, ARG2, (ULong)ARG3);
4229    PRE_REG_READ3(long, "readlink",
4230                  const char *, path, char *, buf, int, bufsiz);
4231    PRE_MEM_RASCIIZ( "readlink(path)", ARG1 );
4232    PRE_MEM_WRITE( "readlink(buf)", ARG2,ARG3 );
4233 
4234 
4235    {
4236 #if defined(VGO_linux) || defined(VGO_solaris)
4237 #if defined(VGO_linux)
4238 #define PID_EXEPATH  "/proc/%d/exe"
4239 #define SELF_EXEPATH "/proc/self/exe"
4240 #define SELF_EXEFD   "/proc/self/fd/%d"
4241 #elif defined(VGO_solaris)
4242 #define PID_EXEPATH  "/proc/%d/path/a.out"
4243 #define SELF_EXEPATH "/proc/self/path/a.out"
4244 #define SELF_EXEFD   "/proc/self/path/%d"
4245 #endif
4246       /*
4247        * Handle the case where readlink is looking at /proc/self/exe or
4248        * /proc/<pid>/exe, or equivalent on Solaris.
4249        */
4250       HChar  name[30];   // large enough
4251       HChar* arg1s = (HChar*) (Addr)ARG1;
4252       VG_(sprintf)(name, PID_EXEPATH, VG_(getpid)());
4253       if (ML_(safe_to_deref)(arg1s, 1)
4254           && (VG_STREQ(arg1s, name) || VG_STREQ(arg1s, SELF_EXEPATH))) {
4255          VG_(sprintf)(name, SELF_EXEFD, VG_(cl_exec_fd));
4256          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, (UWord)name,
4257                                                          ARG2, ARG3));
4258       } else
4259 #endif
4260       {
4261          /* Normal case */
4262          SET_STATUS_from_SysRes( VG_(do_syscall3)(saved, ARG1, ARG2, ARG3));
4263       }
4264    }
4265 
4266    if (SUCCESS && RES > 0)
4267       POST_MEM_WRITE( ARG2, RES );
4268 }
4269 
PRE(sys_readv)4270 PRE(sys_readv)
4271 {
4272    Int i;
4273    struct vki_iovec * vec;
4274    *flags |= SfMayBlock;
4275    PRINT("sys_readv ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
4276          FMT_REGWORD "u )", ARG1, ARG2, ARG3);
4277    PRE_REG_READ3(ssize_t, "readv",
4278                  unsigned long, fd, const struct iovec *, vector,
4279                  unsigned long, count);
4280    if (!ML_(fd_allowed)(ARG1, "readv", tid, False)) {
4281       SET_STATUS_Failure( VKI_EBADF );
4282    } else {
4283       if ((Int)ARG3 >= 0)
4284          PRE_MEM_READ( "readv(vector)", ARG2, ARG3 * sizeof(struct vki_iovec) );
4285 
4286       if (ARG2 != 0) {
4287          /* ToDo: don't do any of the following if the vector is invalid */
4288          vec = (struct vki_iovec *)(Addr)ARG2;
4289          for (i = 0; i < (Int)ARG3; i++)
4290             PRE_MEM_WRITE( "readv(vector[...])",
4291                            (Addr)vec[i].iov_base, vec[i].iov_len );
4292       }
4293    }
4294 }
4295 
POST(sys_readv)4296 POST(sys_readv)
4297 {
4298    vg_assert(SUCCESS);
4299    if (RES > 0) {
4300       Int i;
4301       struct vki_iovec * vec = (struct vki_iovec *)(Addr)ARG2;
4302       Int remains = RES;
4303 
4304       /* RES holds the number of bytes read. */
4305       for (i = 0; i < (Int)ARG3; i++) {
4306 	 Int nReadThisBuf = vec[i].iov_len;
4307 	 if (nReadThisBuf > remains) nReadThisBuf = remains;
4308 	 POST_MEM_WRITE( (Addr)vec[i].iov_base, nReadThisBuf );
4309 	 remains -= nReadThisBuf;
4310 	 if (remains < 0) VG_(core_panic)("readv: remains < 0");
4311       }
4312    }
4313 }
4314 
PRE(sys_rename)4315 PRE(sys_rename)
4316 {
4317    FUSE_COMPATIBLE_MAY_BLOCK();
4318    PRINT("sys_rename ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x(%s) )", ARG1,
4319          (char*)(Addr)ARG1,ARG2,(char*)(Addr)ARG2);
4320    PRE_REG_READ2(long, "rename", const char *, oldpath, const char *, newpath);
4321    PRE_MEM_RASCIIZ( "rename(oldpath)", ARG1 );
4322    PRE_MEM_RASCIIZ( "rename(newpath)", ARG2 );
4323 }
4324 
PRE(sys_rmdir)4325 PRE(sys_rmdir)
4326 {
4327    *flags |= SfMayBlock;
4328    PRINT("sys_rmdir ( %#" FMT_REGWORD "x(%s) )", ARG1,(char*)(Addr)ARG1);
4329    PRE_REG_READ1(long, "rmdir", const char *, pathname);
4330    PRE_MEM_RASCIIZ( "rmdir(pathname)", ARG1 );
4331 }
4332 
PRE(sys_select)4333 PRE(sys_select)
4334 {
4335    *flags |= SfMayBlock;
4336    PRINT("sys_select ( %ld, %#" FMT_REGWORD "x, %#" FMT_REGWORD "x, %#"
4337          FMT_REGWORD "x, %#" FMT_REGWORD "x )", SARG1, ARG2, ARG3, ARG4, ARG5);
4338    PRE_REG_READ5(long, "select",
4339                  int, n, vki_fd_set *, readfds, vki_fd_set *, writefds,
4340                  vki_fd_set *, exceptfds, struct vki_timeval *, timeout);
4341    // XXX: this possibly understates how much memory is read.
4342    if (ARG2 != 0)
4343       PRE_MEM_READ( "select(readfds)",
4344 		     ARG2, ARG1/8 /* __FD_SETSIZE/8 */ );
4345    if (ARG3 != 0)
4346       PRE_MEM_READ( "select(writefds)",
4347 		     ARG3, ARG1/8 /* __FD_SETSIZE/8 */ );
4348    if (ARG4 != 0)
4349       PRE_MEM_READ( "select(exceptfds)",
4350 		     ARG4, ARG1/8 /* __FD_SETSIZE/8 */ );
4351    if (ARG5 != 0)
4352       PRE_timeval_READ( "select(timeout)", (Addr)ARG5 );
4353 }
4354 
PRE(sys_setgid)4355 PRE(sys_setgid)
4356 {
4357    PRINT("sys_setgid ( %" FMT_REGWORD "u )", ARG1);
4358    PRE_REG_READ1(long, "setgid", vki_gid_t, gid);
4359 }
4360 
PRE(sys_setsid)4361 PRE(sys_setsid)
4362 {
4363    PRINT("sys_setsid ( )");
4364    PRE_REG_READ0(long, "setsid");
4365 }
4366 
PRE(sys_setgroups)4367 PRE(sys_setgroups)
4368 {
4369    PRINT("setgroups ( %llu, %#" FMT_REGWORD "x )", (ULong)ARG1, ARG2);
4370    PRE_REG_READ2(long, "setgroups", int, size, vki_gid_t *, list);
4371    if (ARG1 > 0)
4372       PRE_MEM_READ( "setgroups(list)", ARG2, ARG1 * sizeof(vki_gid_t) );
4373 }
4374 
PRE(sys_setpgid)4375 PRE(sys_setpgid)
4376 {
4377    PRINT("setpgid ( %ld, %ld )", SARG1, SARG2);
4378    PRE_REG_READ2(long, "setpgid", vki_pid_t, pid, vki_pid_t, pgid);
4379 }
4380 
PRE(sys_setregid)4381 PRE(sys_setregid)
4382 {
4383    PRINT("sys_setregid ( %" FMT_REGWORD "u, %" FMT_REGWORD "u )", ARG1, ARG2);
4384    PRE_REG_READ2(long, "setregid", vki_gid_t, rgid, vki_gid_t, egid);
4385 }
4386 
PRE(sys_setreuid)4387 PRE(sys_setreuid)
4388 {
4389    PRINT("sys_setreuid ( 0x%" FMT_REGWORD "x, 0x%" FMT_REGWORD "x )",
4390          ARG1, ARG2);
4391    PRE_REG_READ2(long, "setreuid", vki_uid_t, ruid, vki_uid_t, euid);
4392 }
4393 
PRE(sys_setrlimit)4394 PRE(sys_setrlimit)
4395 {
4396    UWord arg1 = ARG1;
4397    PRINT("sys_setrlimit ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x )", ARG1, ARG2);
4398    PRE_REG_READ2(long, "setrlimit",
4399                  unsigned int, resource, struct rlimit *, rlim);
4400    PRE_MEM_READ( "setrlimit(rlim)", ARG2, sizeof(struct vki_rlimit) );
4401 
4402 #ifdef _RLIMIT_POSIX_FLAG
4403    // Darwin will sometimes set _RLIMIT_POSIX_FLAG on setrlimit calls.
4404    // Unset it here to make the if statements below work correctly.
4405    arg1 &= ~_RLIMIT_POSIX_FLAG;
4406 #endif
4407 
4408    if (!VG_(am_is_valid_for_client)(ARG2, sizeof(struct vki_rlimit),
4409                                     VKI_PROT_READ)) {
4410       SET_STATUS_Failure( VKI_EFAULT );
4411    }
4412    else if (((struct vki_rlimit *)(Addr)ARG2)->rlim_cur
4413             > ((struct vki_rlimit *)(Addr)ARG2)->rlim_max) {
4414       SET_STATUS_Failure( VKI_EINVAL );
4415    }
4416    else if (arg1 == VKI_RLIMIT_NOFILE) {
4417       if (((struct vki_rlimit *)(Addr)ARG2)->rlim_cur > VG_(fd_hard_limit) ||
4418           ((struct vki_rlimit *)(Addr)ARG2)->rlim_max != VG_(fd_hard_limit)) {
4419          SET_STATUS_Failure( VKI_EPERM );
4420       }
4421       else {
4422          VG_(fd_soft_limit) = ((struct vki_rlimit *)(Addr)ARG2)->rlim_cur;
4423          SET_STATUS_Success( 0 );
4424       }
4425    }
4426    else if (arg1 == VKI_RLIMIT_DATA) {
4427       if (((struct vki_rlimit *)(Addr)ARG2)->rlim_cur
4428            > VG_(client_rlimit_data).rlim_max ||
4429           ((struct vki_rlimit *)(Addr)ARG2)->rlim_max
4430            > VG_(client_rlimit_data).rlim_max) {
4431          SET_STATUS_Failure( VKI_EPERM );
4432       }
4433       else {
4434          VG_(client_rlimit_data) = *(struct vki_rlimit *)(Addr)ARG2;
4435          SET_STATUS_Success( 0 );
4436       }
4437    }
4438    else if (arg1 == VKI_RLIMIT_STACK && tid == 1) {
4439       if (((struct vki_rlimit *)(Addr)ARG2)->rlim_cur
4440            > VG_(client_rlimit_stack).rlim_max ||
4441           ((struct vki_rlimit *)(Addr)ARG2)->rlim_max
4442            > VG_(client_rlimit_stack).rlim_max) {
4443          SET_STATUS_Failure( VKI_EPERM );
4444       }
4445       else {
4446          /* Change the value of client_stack_szB to the rlim_cur value but
4447             only if it is smaller than the size of the allocated stack for the
4448             client.
4449             TODO: All platforms should set VG_(clstk_max_size) as part of their
4450                   setup_client_stack(). */
4451          if ((VG_(clstk_max_size) == 0)
4452              || (((struct vki_rlimit *) (Addr)ARG2)->rlim_cur <= VG_(clstk_max_size)))
4453             VG_(threads)[tid].client_stack_szB = ((struct vki_rlimit *)(Addr)ARG2)->rlim_cur;
4454 
4455          VG_(client_rlimit_stack) = *(struct vki_rlimit *)(Addr)ARG2;
4456          SET_STATUS_Success( 0 );
4457       }
4458    }
4459 }
4460 
PRE(sys_setuid)4461 PRE(sys_setuid)
4462 {
4463    PRINT("sys_setuid ( %" FMT_REGWORD "u )", ARG1);
4464    PRE_REG_READ1(long, "setuid", vki_uid_t, uid);
4465 }
4466 
PRE(sys_newstat)4467 PRE(sys_newstat)
4468 {
4469    FUSE_COMPATIBLE_MAY_BLOCK();
4470    PRINT("sys_newstat ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",
4471          ARG1,(char*)(Addr)ARG1,ARG2);
4472    PRE_REG_READ2(long, "stat", char *, file_name, struct stat *, buf);
4473    PRE_MEM_RASCIIZ( "stat(file_name)", ARG1 );
4474    PRE_MEM_WRITE( "stat(buf)", ARG2, sizeof(struct vki_stat) );
4475 }
4476 
POST(sys_newstat)4477 POST(sys_newstat)
4478 {
4479    POST_MEM_WRITE( ARG2, sizeof(struct vki_stat) );
4480 }
4481 
PRE(sys_statfs)4482 PRE(sys_statfs)
4483 {
4484    FUSE_COMPATIBLE_MAY_BLOCK();
4485    PRINT("sys_statfs ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",
4486          ARG1, (char*)(Addr)ARG1, ARG2);
4487    PRE_REG_READ2(long, "statfs", const char *, path, struct statfs *, buf);
4488    PRE_MEM_RASCIIZ( "statfs(path)", ARG1 );
4489    PRE_MEM_WRITE( "statfs(buf)", ARG2, sizeof(struct vki_statfs) );
4490 }
POST(sys_statfs)4491 POST(sys_statfs)
4492 {
4493    POST_MEM_WRITE( ARG2, sizeof(struct vki_statfs) );
4494 }
4495 
PRE(sys_statfs64)4496 PRE(sys_statfs64)
4497 {
4498    PRINT("sys_statfs64 ( %#" FMT_REGWORD "x(%s), %llu, %#" FMT_REGWORD "x )",
4499          ARG1, (char*)(Addr)ARG1, (ULong)ARG2, ARG3);
4500    PRE_REG_READ3(long, "statfs64",
4501                  const char *, path, vki_size_t, size, struct statfs64 *, buf);
4502    PRE_MEM_RASCIIZ( "statfs64(path)", ARG1 );
4503    PRE_MEM_WRITE( "statfs64(buf)", ARG3, ARG2 );
4504 }
POST(sys_statfs64)4505 POST(sys_statfs64)
4506 {
4507    POST_MEM_WRITE( ARG3, ARG2 );
4508 }
4509 
PRE(sys_symlink)4510 PRE(sys_symlink)
4511 {
4512    *flags |= SfMayBlock;
4513    PRINT("sys_symlink ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x(%s) )",
4514          ARG1, (char*)(Addr)ARG1, ARG2, (char*)(Addr)ARG2);
4515    PRE_REG_READ2(long, "symlink", const char *, oldpath, const char *, newpath);
4516    PRE_MEM_RASCIIZ( "symlink(oldpath)", ARG1 );
4517    PRE_MEM_RASCIIZ( "symlink(newpath)", ARG2 );
4518 }
4519 
PRE(sys_time)4520 PRE(sys_time)
4521 {
4522    /* time_t time(time_t *t); */
4523    PRINT("sys_time ( %#" FMT_REGWORD "x )",ARG1);
4524    PRE_REG_READ1(long, "time", int *, t);
4525    if (ARG1 != 0) {
4526       PRE_MEM_WRITE( "time(t)", ARG1, sizeof(vki_time_t) );
4527    }
4528 }
4529 
POST(sys_time)4530 POST(sys_time)
4531 {
4532    if (ARG1 != 0) {
4533       POST_MEM_WRITE( ARG1, sizeof(vki_time_t) );
4534    }
4535 }
4536 
PRE(sys_times)4537 PRE(sys_times)
4538 {
4539    PRINT("sys_times ( %#" FMT_REGWORD "x )", ARG1);
4540    PRE_REG_READ1(long, "times", struct tms *, buf);
4541    if (ARG1 != 0) {
4542       PRE_MEM_WRITE( "times(buf)", ARG1, sizeof(struct vki_tms) );
4543    }
4544 }
4545 
POST(sys_times)4546 POST(sys_times)
4547 {
4548    if (ARG1 != 0) {
4549       POST_MEM_WRITE( ARG1, sizeof(struct vki_tms) );
4550    }
4551 }
4552 
PRE(sys_umask)4553 PRE(sys_umask)
4554 {
4555    PRINT("sys_umask ( %ld )", SARG1);
4556    PRE_REG_READ1(long, "umask", int, mask);
4557 }
4558 
PRE(sys_unlink)4559 PRE(sys_unlink)
4560 {
4561    *flags |= SfMayBlock;
4562    PRINT("sys_unlink ( %#" FMT_REGWORD "x(%s) )", ARG1,(char*)(Addr)ARG1);
4563    PRE_REG_READ1(long, "unlink", const char *, pathname);
4564    PRE_MEM_RASCIIZ( "unlink(pathname)", ARG1 );
4565 }
4566 
4567 #if !defined(VGO_dragonfly)
PRE(sys_newuname)4568 PRE(sys_newuname)
4569 {
4570    PRINT("sys_newuname ( %#" FMT_REGWORD "x )", ARG1);
4571    PRE_REG_READ1(long, "uname", struct new_utsname *, buf);
4572    PRE_MEM_WRITE( "uname(buf)", ARG1, sizeof(struct vki_new_utsname) );
4573 }
4574 
POST(sys_newuname)4575 POST(sys_newuname)
4576 {
4577    if (ARG1 != 0) {
4578       POST_MEM_WRITE( ARG1, sizeof(struct vki_new_utsname) );
4579    }
4580 }
4581 #endif
4582 
PRE(sys_waitpid)4583 PRE(sys_waitpid)
4584 {
4585    *flags |= SfMayBlock;
4586    PRINT("sys_waitpid ( %ld, %#" FMT_REGWORD "x, %ld )", SARG1, ARG2, SARG3);
4587    PRE_REG_READ3(long, "waitpid",
4588                  vki_pid_t, pid, unsigned int *, status, int, options);
4589 
4590    if (ARG2 != (Addr)NULL)
4591       PRE_MEM_WRITE( "waitpid(status)", ARG2, sizeof(int) );
4592 }
4593 
POST(sys_waitpid)4594 POST(sys_waitpid)
4595 {
4596    if (ARG2 != (Addr)NULL)
4597       POST_MEM_WRITE( ARG2, sizeof(int) );
4598 }
4599 
PRE(sys_wait4)4600 PRE(sys_wait4)
4601 {
4602    *flags |= SfMayBlock;
4603    PRINT("sys_wait4 ( %ld, %#" FMT_REGWORD "x, %ld, %#" FMT_REGWORD "x )",
4604          SARG1, ARG2, SARG3, ARG4);
4605 
4606    PRE_REG_READ4(long, "wait4",
4607                  vki_pid_t, pid, unsigned int *, status, int, options,
4608                  struct rusage *, rusage);
4609    if (ARG2 != (Addr)NULL)
4610       PRE_MEM_WRITE( "wait4(status)", ARG2, sizeof(int) );
4611    if (ARG4 != (Addr)NULL)
4612       PRE_MEM_WRITE( "wait4(rusage)", ARG4, sizeof(struct vki_rusage) );
4613 }
4614 
POST(sys_wait4)4615 POST(sys_wait4)
4616 {
4617    if (ARG2 != (Addr)NULL)
4618       POST_MEM_WRITE( ARG2, sizeof(int) );
4619    if (ARG4 != (Addr)NULL)
4620       POST_MEM_WRITE( ARG4, sizeof(struct vki_rusage) );
4621 }
4622 
PRE(sys_writev)4623 PRE(sys_writev)
4624 {
4625    Int i;
4626    struct vki_iovec * vec;
4627    *flags |= SfMayBlock;
4628    PRINT("sys_writev ( %" FMT_REGWORD "u, %#" FMT_REGWORD "x, %"
4629          FMT_REGWORD "u )", ARG1, ARG2, ARG3);
4630    PRE_REG_READ3(ssize_t, "writev",
4631                  unsigned long, fd, const struct iovec *, vector,
4632                  unsigned long, count);
4633    if (!ML_(fd_allowed)(ARG1, "writev", tid, False)) {
4634       SET_STATUS_Failure( VKI_EBADF );
4635    } else {
4636       if ((Int)ARG3 >= 0)
4637          PRE_MEM_READ( "writev(vector)",
4638                        ARG2, ARG3 * sizeof(struct vki_iovec) );
4639       if (ARG2 != 0) {
4640          /* ToDo: don't do any of the following if the vector is invalid */
4641          vec = (struct vki_iovec *)(Addr)ARG2;
4642          for (i = 0; i < (Int)ARG3; i++)
4643             PRE_MEM_READ( "writev(vector[...])",
4644                            (Addr)vec[i].iov_base, vec[i].iov_len );
4645       }
4646    }
4647 }
4648 
PRE(sys_utimes)4649 PRE(sys_utimes)
4650 {
4651    FUSE_COMPATIBLE_MAY_BLOCK();
4652    PRINT("sys_utimes ( %#" FMT_REGWORD "x(%s), %#" FMT_REGWORD "x )",
4653          ARG1, (char*)(Addr)ARG1, ARG2);
4654    PRE_REG_READ2(long, "utimes", char *, filename, struct timeval *, tvp);
4655    PRE_MEM_RASCIIZ( "utimes(filename)", ARG1 );
4656    if (ARG2 != 0) {
4657       PRE_timeval_READ( "utimes(tvp[0])", (Addr)ARG2 );
4658       PRE_timeval_READ( "utimes(tvp[1])",
4659                         (Addr)ARG2+sizeof(struct vki_timeval) );
4660    }
4661 }
4662 
PRE(sys_acct)4663 PRE(sys_acct)
4664 {
4665    PRINT("sys_acct ( %#" FMT_REGWORD "x(%s) )", ARG1,(char*)(Addr)ARG1);
4666    PRE_REG_READ1(long, "acct", const char *, filename);
4667    PRE_MEM_RASCIIZ( "acct(filename)", ARG1 );
4668 }
4669 
PRE(sys_pause)4670 PRE(sys_pause)
4671 {
4672    *flags |= SfMayBlock;
4673    PRINT("sys_pause ( )");
4674    PRE_REG_READ0(long, "pause");
4675 }
4676 
PRE(sys_sigaltstack)4677 PRE(sys_sigaltstack)
4678 {
4679    PRINT("sigaltstack ( %#" FMT_REGWORD "x, %#" FMT_REGWORD "x )",ARG1,ARG2);
4680    PRE_REG_READ2(int, "sigaltstack",
4681                  const vki_stack_t *, ss, vki_stack_t *, oss);
4682    if (ARG1 != 0) {
4683       const vki_stack_t *ss = (vki_stack_t *)(Addr)ARG1;
4684       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_sp, sizeof(ss->ss_sp) );
4685       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_flags, sizeof(ss->ss_flags) );
4686       PRE_MEM_READ( "sigaltstack(ss)", (Addr)&ss->ss_size, sizeof(ss->ss_size) );
4687    }
4688    if (ARG2 != 0) {
4689       PRE_MEM_WRITE( "sigaltstack(oss)", ARG2, sizeof(vki_stack_t) );
4690    }
4691 
4692    /* Be safe. */
4693    if (ARG1 && !ML_(safe_to_deref((void*)(Addr)ARG1, sizeof(vki_stack_t)))) {
4694       SET_STATUS_Failure(VKI_EFAULT);
4695       return;
4696    }
4697    if (ARG2 && !ML_(safe_to_deref((void*)(Addr)ARG2, sizeof(vki_stack_t)))) {
4698       SET_STATUS_Failure(VKI_EFAULT);
4699       return;
4700    }
4701 
4702    SET_STATUS_from_SysRes(
4703       VG_(do_sys_sigaltstack) (tid, (vki_stack_t*)(Addr)ARG1,
4704                               (vki_stack_t*)(Addr)ARG2)
4705    );
4706 }
POST(sys_sigaltstack)4707 POST(sys_sigaltstack)
4708 {
4709    vg_assert(SUCCESS);
4710    if (RES == 0 && ARG2 != 0)
4711       POST_MEM_WRITE( ARG2, sizeof(vki_stack_t));
4712 }
4713 
PRE(sys_sethostname)4714 PRE(sys_sethostname)
4715 {
4716    PRINT("sys_sethostname ( %#" FMT_REGWORD "x, %ld )", ARG1, SARG2);
4717    PRE_REG_READ2(long, "sethostname", char *, name, int, len);
4718    PRE_MEM_READ( "sethostname(name)", ARG1, ARG2 );
4719 }
4720 
4721 #undef PRE
4722 #undef POST
4723 
4724 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris) || defined(VGO_dragonfly)
4725 
4726 /*--------------------------------------------------------------------*/
4727 /*--- end                                                          ---*/
4728 /*--------------------------------------------------------------------*/
4729