1import claripy
2import logging
3import itertools
4
5from .memory_mixins import DefaultMemory
6from ..state_plugins.plugin import SimStatePlugin
7from ..state_plugins.sim_action_object import SimActionObject
8from .. import sim_options
9
10l = logging.getLogger(name=__name__)
11
12file_counter = itertools.count()
13dialogue_counter = itertools.count()
14
15class Flags: # pylint: disable=W0232,
16    O_RDONLY = 0
17    O_WRONLY = 1
18    O_RDWR = 2
19    O_ACCMODE = 3 # bitmask for read/write mode
20    O_APPEND = 4096
21    O_ASYNC = 64
22    O_CLOEXEC = 512
23    # TODO mode for this flag
24    O_CREAT = 256
25    O_DIRECT = 262144
26    O_DIRECTORY = 2097152
27    O_EXCL = 2048
28    O_LARGEFILE = 1048576
29    O_NOATIME = 16777216
30    O_NOCTTY = 1024
31    O_NOFOLLOW = 4194304
32    O_NONBLOCK = 8192
33    O_NODELAY = 8192
34    O_SYNC = 67174400
35    O_TRUNC = 1024
36
37
38def _deps_unpack(a):
39    if isinstance(a, SimActionObject):
40        return a.ast, a.reg_deps, a.tmp_deps
41    else:
42        return a, None, None
43
44class SimFileBase(SimStatePlugin):
45    """
46    SimFiles are the storage mechanisms used by SimFileDescriptors.
47
48    Different types of SimFiles can have drastically different interfaces, and as a result there's not much that can be
49    specified on this base class. All the read and write methods take a ``pos`` argument, which may have different
50    semantics per-class. ``0`` will always be a valid position to use, though, and the next position you should use
51    is part of the return tuple.
52
53    Some simfiles are "streams", meaning that the position that reads come from is determined not by the position you
54    pass in (it will in fact be ignored), but by an internal variable. This is stored as ``.pos`` if you care to read
55    it. Don't write to it. The same lack-of-semantics applies to this field as well.
56
57    :ivar name:     The name of the file. Purely for cosmetic purposes
58    :ivar ident:    The identifier of the file, typically autogenerated from the name and a nonce. Purely for cosmetic
59                    purposes, but does appear in symbolic values autogenerated in the file.
60    :ivar seekable: Bool indicating whether seek operations on this file should succeed. If this is True, then ``pos``
61                    must be a number of bytes from the start of the file.
62    :ivar writable: Bool indicating whether writing to this file is allowed.
63    :ivar pos:      If the file is a stream, this will be the current position. Otherwise, None.
64    :ivar concrete: Whether or not this file contains mostly concrete data. Will be used by some SimProcedures to
65                    choose how to handle variable-length operations like fgets.
66    """
67
68    seekable = False
69    pos = None
70
71    def __init__(self, name=None, writable=True, ident=None, concrete=False, **kwargs):
72        self.name = name
73        self.ident = ident
74        self.writable = writable
75        self.concrete = concrete
76
77        if ident is None:
78            self.ident = self.make_ident(self.name)
79
80        if 'memory_id' in kwargs:
81            kwargs['memory_id'] = self.ident
82        super().__init__(**kwargs)
83
84    @staticmethod
85    def make_ident(name):
86        if name is None:
87            return 'file'
88
89        if type(name) is str:
90            name = name.encode()
91
92        def generate():
93            consecutive_bad = 0
94            for ch in name:
95                if 0x20 <= ch <= 0x7e:
96                    consecutive_bad = 0
97                    yield chr(ch)
98                elif consecutive_bad < 3:
99                    consecutive_bad += 1
100                    yield '?'
101
102        nice_name = ''.join(generate())
103        return 'file_%d_%s' % (next(file_counter), nice_name)
104
105    def concretize(self, **kwargs):
106        """
107        Return a concretization of the contents of the file. The type of the return value of this method will vary
108        depending on which kind of SimFile you're using.
109        """
110        raise NotImplementedError
111
112    def read(self, pos, size, **kwargs):
113        """
114        Read some data from the file.
115
116        :param pos:     The offset in the file to read from.
117        :param size:    The size to read. May be symbolic.
118        :return:        A tuple of the data read (a bitvector of the length that is the maximum length of the read), the actual size of the read, and the new file position pointer.
119        """
120        raise NotImplementedError
121
122    def write(self, pos, data, size=None, **kwargs):
123        """
124        Write some data to the file.
125
126        :param pos:     The offset in the file to write to. May be ignored if the file is a stream or device.
127        :param data:    The data to write as a bitvector
128        :param size:    The optional size of the data to write. If not provided will default to the length of the data.
129                        Must be constrained to less than or equal to the size of the data.
130        :return:        The new file position pointer.
131        """
132        raise NotImplementedError
133
134    @property
135    def size(self):
136        """
137        The number of data bytes stored by the file at present. May be a symbolic value.
138        """
139        raise NotImplementedError
140
141    @DefaultMemory.memo
142    def copy(self, memo):
143        o = super().copy()
144        o.ident = self.ident
145        o.name = self.name
146        o.ident = self.ident
147        o.writable = self.writable
148        o.concrete = self.concrete
149        return o
150
151
152class SimFile(SimFileBase, DefaultMemory):  # TODO: pick a better base class omg
153    """
154    The normal SimFile is meant to model files on disk. It subclasses SimSymbolicMemory so loads and stores to/from
155    it are very simple.
156
157    :param name:        The name of the file
158    :param content:     Optional initial content for the file as a string or bitvector
159    :param size:        Optional size of the file. If content is not specified, it defaults to zero
160    :param has_end:     Whether the size boundary is treated as the end of the file or a frontier at which new content
161                        will be generated. If unspecified, will pick its value based on options.FILES_HAVE_EOF. Another
162                        caveat is that if the size is also unspecified this value will default to False.
163    :param seekable:    Optional bool indicating whether seek operations on this file should succeed, default True.
164    :param writable:    Whether writing to this file is allowed
165    :param concrete:    Whether or not this file contains mostly concrete data. Will be used by some SimProcedures to
166                        choose how to handle variable-length operations like fgets.
167
168    :ivar has_end:      Whether this file has an EOF
169    """
170    def __init__(self, name=None, content=None, size=None, has_end=None, seekable=True, writable=True, ident=None, concrete=None, **kwargs):
171        kwargs['memory_id'] = kwargs.get('memory_id', 'file')
172        super().__init__(name=name, writable=writable, ident=ident, **kwargs)
173        self._size = size
174        self.has_end = has_end
175        self.seekable = seekable
176
177        # this is hacky because we need to work around not having a state yet
178        content = _deps_unpack(content)[0]
179        if type(content) is bytes:
180            if concrete is None: concrete = True
181            content = claripy.BVV(content)
182        elif type(content) is str:
183            if concrete is None: concrete = True
184            content = claripy.BVV(content.encode())
185        elif content is None:
186            pass
187        elif isinstance(content, claripy.Bits):
188            if concrete is None and not content.symbolic: concrete = True
189            pass
190        else:
191            raise TypeError("Can't handle SimFile content of type %s" % type(content))
192
193        if concrete is None:
194            concrete = False
195        self.concrete = concrete
196
197        if content is not None:
198            self.__content = content
199
200            if self._size is None:
201                self._size = len(content) // 8
202        else:
203            if self._size is None:
204                self._size = 0
205                if has_end is None:
206                    self.has_end = False
207
208    @property
209    def category(self): # override trying to determine from self.id to allow arbitrary idents
210        return 'file'
211
212    def set_state(self, state):
213        super().set_state(state)
214        try:
215            content = self.__content
216        except AttributeError:
217            pass
218        else:
219            self.store(0, content)
220            del self.__content
221
222        if self.has_end is None:
223            self.has_end = sim_options.FILES_HAVE_EOF in state.options
224
225        if type(self._size) is int:
226            self._size = claripy.BVV(self._size, state.arch.bits)
227        elif len(self._size) != state.arch.bits:
228            raise TypeError("SimFile size must be a bitvector of size %d (arch.bits)" % state.arch.bits)
229
230    @property
231    def size(self):
232        return self._size
233
234    def concretize(self, **kwargs):
235        """
236        Return a concretization of the contents of the file, as a flat bytestring.
237        """
238        size = self.state.solver.min(self._size, **kwargs)
239        data = self.load(0, size)
240
241        kwargs['cast_to'] = kwargs.get('cast_to', bytes)
242        kwargs['extra_constraints'] = tuple(kwargs.get('extra_constraints', ())) + (self._size == size,)
243        return self.state.solver.eval(data, **kwargs)
244
245    def read(self, pos, size, **kwargs):
246        disable_actions = kwargs.pop('disable_actions', False)
247        inspect = kwargs.pop('inspect', True)
248
249        # Step 1: figure out a reasonable concrete size to use for the memory load
250        # since we don't want to concretize anything
251        if self.state.solver.symbolic(size):
252            try:
253                passed_max_size = self.state.solver.max(size, extra_constraints=(size < self.state.libc.max_packet_size,))
254            except SimSolverError:
255                passed_max_size = self.state.solver.min(size)
256                l.warning("Symbolic read size is too large for threshold - concretizing to min (%d)", passed_max_size)
257                self.state.solver.add(size == passed_max_size)
258        else:
259            passed_max_size = self.state.solver.eval(size)
260            if passed_max_size > 2**13:
261                l.warning("Program performing extremely large reads")
262
263        # Step 2.1: check for the possibility of EOFs
264        # If it's not possible to EOF (because there's no EOF), this is very simple!
265        if not self.has_end:
266            # bump the storage size as we read
267            self._size = self.state.solver.If(size + pos > self._size, size + pos, self._size)
268            return self.load(pos, passed_max_size, disable_actions=disable_actions, inspect=inspect), size, size + pos
269
270        # Step 2.2: check harder for the possibility of EOFs
271        # This is the size if we're reading to the end of the file
272        distance_to_eof = self._size - pos
273        distance_to_eof = self.state.solver.If(self.state.solver.SLE(distance_to_eof, 0), 0, distance_to_eof)
274
275        # try to frontload some constraint solving to see if it's impossible for this read to EOF
276        if self.state.solver.satisfiable(extra_constraints=(size > distance_to_eof,)):
277            # it's possible to EOF
278            # final size = min(passed_size, max(distance_to_eof, 0))
279            real_size = self.state.solver.If(size >= distance_to_eof, distance_to_eof, size)
280
281            return self.load(pos, passed_max_size, disable_actions=disable_actions, inspect=inspect), \
282                   real_size, real_size + pos
283        else:
284            # it's not possible to EOF
285            # we don't need to constrain or min/max the output size because there are already constraints asserting
286            # that the total filesize is pretty big
287            # note: this assumes that constraints cannot be removed
288            return self.load(pos, passed_max_size, disable_actions=disable_actions, inspect=inspect), size, size + pos
289
290    def write(self, pos, data, size=None, events=True, **kwargs):
291        if events:
292            self.state.history.add_event('fs_write', filename=self.name, data=data, size=size, pos=pos)
293
294        data = _deps_unpack(data)[0]
295        if size is None:
296            size = len(data) // self.state.arch.byte_width if isinstance(data, claripy.Bits) else len(data)
297        # \(_^^)/
298        self.store(pos, data, size=size)
299        new_end = _deps_unpack(pos + size)[0] # decline to store SAO
300        self._size = self.state.solver.If(new_end > self._size, new_end, self._size)
301        return new_end
302
303    @SimStatePlugin.memo
304    def copy(self, memo):
305        o = super().copy(memo)
306        o.name = self.name
307        o._size = self._size
308        o.has_end = self.has_end
309        o.seekable = self.seekable
310        o.writable = self.writable
311        o.concrete = self.concrete
312        return o
313
314    def merge(self, others, merge_conditions, common_ancestor=None): # pylint: disable=unused-argument
315        if not all(type(o) is type(self) for o in others):
316            raise SimMergeError("Cannot merge files of disparate type")
317
318        if any(o.has_end != self.has_end for o in others):
319            raise SimMergeError("Cannot merge files where some have ends and some don't")
320
321        self._size = self.state.solver.ite_cases(zip(merge_conditions[1:], (o._size for o in others)), self._size)
322
323        return super().merge(others, merge_conditions, common_ancestor=common_ancestor)
324
325    def widen(self, _):
326        raise SimMergeError("Widening the filesystem is unsupported")
327
328
329class SimFileStream(SimFile):
330    """
331    A specialized SimFile that uses a flat memory backing, but functions as a stream, tracking its position internally.
332
333    The pos argument to the read and write methods will be ignored, and will return None. Instead, there is an
334    attribute ``pos`` on the file itself, which will give you what you want.
335
336    :param name:    The name of the file, for cosmetic purposes
337    :param pos:     The initial position of the file, default zero
338    :param kwargs:  Any other keyword arguments will go on to the SimFile constructor.
339
340    :ivar pos:      The current position in the file.
341    """
342
343    def __init__(self, name=None, content=None, pos=0, **kwargs):
344        super().__init__(name=name, content=content, **kwargs)
345        self.pos = pos
346
347    def set_state(self, state):
348        super().set_state(state)
349        if type(self.pos) is int:
350            self.pos = state.solver.BVV(self.pos, state.arch.bits)
351        elif len(self.pos) != state.arch.bits:
352            raise TypeError("SimFileStream position must be a bitvector of size %d (arch.bits)" % state.arch.bits)
353
354    def read(self, pos, size, **kwargs):
355        no_stream = kwargs.pop('no_stream', False)
356        if not no_stream:
357            pos = self.pos
358        data, size, pos = super().read(pos, size, **kwargs)
359        if not no_stream:
360            self.pos = pos
361        return data, size, pos
362
363    def write(self, _, data, size=None, **kwargs):
364        self.pos = super().write(self.pos, data, size, **kwargs)
365        return None
366
367    @SimStatePlugin.memo
368    def copy(self, memo):
369        c = super().copy(memo)
370        c.pos = self.pos
371        return c
372
373    def merge(self, others, merge_conditions, common_ancestor=None): # pylint: disable=unused-argument
374        self.pos = self.state.solver.ite_cases(zip(merge_conditions[1:], [o.pos for o in others]), self.pos)
375        return super().merge(others, merge_conditions, common_ancestor=common_ancestor)
376
377
378class SimPackets(SimFileBase):
379    """
380    The SimPackets is meant to model inputs whose content is delivered a series of asynchronous chunks. The data is
381    stored as a list of read or write results. For symbolic sizes, state.libc.max_packet_size will be respected. If
382    the SHORT_READS option is enabled, reads will return a symbolic size constrained to be less than or equal to the
383    requested size.
384
385    A SimPackets cannot be used for both reading and writing - for socket objects that can be both read and written to
386    you should use a file descriptor to multiplex the read and write operations into two separate file storage
387    mechanisms.
388
389    :param name:        The name of the file, for cosmetic purposes
390    :param write_mode:  Whether this file is opened in read or write mode. If this is unspecified it will be
391                        autodetected.
392    :param content:     Some initial content to use for the file. Can be a list of bytestrings or a list of tuples of
393                        content ASTs and size ASTs.
394
395    :ivar write_mode:   See the eponymous parameter
396    :ivar content:      A list of packets, as tuples of content ASTs and size ASTs.
397    """
398    def __init__(self, name, write_mode=None, content=None, writable=True, ident=None, **kwargs):
399        super().__init__(name, writable=writable, ident=ident, **kwargs)
400
401        self.write_mode = write_mode
402        self.content = content
403
404        if self.content is None:
405            self.content = []
406        else:
407            self.content = [
408                    x if type(x) is tuple \
409                    else (x, len(x) // 8) if isinstance(x, claripy.Bits) \
410                    else (x.ast, len(x) // 8) if isinstance(x, SimActionObject) \
411                    else (claripy.BVV(x), len(x)) if type(x) is bytes \
412                    else None \
413                    for x in self.content]
414            if any(x is None for x in self.content):
415                raise TypeError("Bad type in initial SimPacket content")
416
417    def set_state(self, state):
418        super().set_state(state)
419        # sanitize the lengths in self.content now that we know the wordsize
420        for i, (data, length) in enumerate(self.content):
421            if type(length) is int:
422                self.content[i] = (data, claripy.BVV(length, state.arch.bits))
423            elif len(length) < state.arch.bits:
424                self.content[i] = (data, length.zero_extend(state.arch.bits - len(length)))
425            elif len(length) != state.arch.bits:
426                raise TypeError('Bad bitvector size for length in SimPackets.content')
427
428    @property
429    def size(self):
430        return sum(x[1] for x in self.content)
431
432    def concretize(self, **kwargs):
433        """
434        Returns a list of the packets read or written as bytestrings.
435        """
436        lengths = [self.state.solver.eval(x[1], **kwargs) for x in self.content]
437        kwargs['cast_to'] = bytes
438        return [b'' if i == 0 else self.state.solver.eval(x[0][i*self.state.arch.byte_width-1:], **kwargs) for i, x in zip(lengths, self.content)]
439
440    def read(self, pos, size, **kwargs):
441        """
442        Read a packet from the stream.
443
444        :param int pos:     The packet number to read from the sequence of the stream. May be None to append to the stream.
445        :param size:        The size to read. May be symbolic.
446        :param short_reads: Whether to replace the size with a symbolic value constrained to less than or equal to the original size. If unspecified, will be chosen based on the state option.
447        :return:            A tuple of the data read (a bitvector of the length that is the maximum length of the read) and the actual size of the read.
448        """
449        short_reads = kwargs.pop('short_reads', None)
450
451        # sanity check on read/write modes
452        if self.write_mode is None:
453            self.write_mode = False
454        elif self.write_mode is True:
455            raise SimFileError("Cannot read and write to the same SimPackets")
456
457        # sanity check on packet number and determine if data is already present
458        if pos is None:
459            pos = len(self.content)
460        if pos < 0:
461            raise SimFileError("SimPacket.read(%d): Negative packet number?" % pos)
462        elif pos > len(self.content):
463            raise SimFileError("SimPacket.read(%d): Packet number is past frontier of %d?" % (pos, len(self.content)))
464        elif pos != len(self.content):
465            _, realsize = self.content[pos]
466            self.state.solver.add(realsize <= size)  # assert that the packet fits within the read request
467            if not self.state.solver.satisfiable():
468                raise SimFileError("SimPackets could not fit the current packet into the read request of %s bytes: %s" % (size, self.content[pos]))
469            return self.content[pos] + (pos+1,)
470
471        # typecheck
472        if type(size) is int:
473            size = self.state.solver.BVV(size, self.state.arch.bits)
474
475        # The read is on the frontier. let's generate a new packet.
476        orig_size = size
477        max_size = None
478
479        # if short reads are enabled, replace size with a symbol
480        if short_reads is True or (short_reads is None and sim_options.SHORT_READS in self.state.options):
481            size = self.state.solver.BVS('packetsize_%d_%s' % (len(self.content), self.ident), self.state.arch.bits, key=('file', self.ident, 'packetsize', len(self.content)))
482            self.state.solver.add(size <= orig_size)
483
484        # figure out the maximum size of the read
485        if not self.state.solver.symbolic(size):
486            max_size = self.state.solver.eval(size)
487        elif self.state.solver.satisfiable(extra_constraints=(size <= self.state.libc.max_packet_size,)):
488            l.info("Constraining symbolic packet size to be less than %d", self.state.libc.max_packet_size)
489            if not self.state.solver.is_true(orig_size <= self.state.libc.max_packet_size):
490                self.state.solver.add(size <= self.state.libc.max_packet_size)
491            if not self.state.solver.symbolic(orig_size):
492                max_size = min(self.state.solver.eval(orig_size), self.state.libc.max_packet_size)
493            else:
494                max_size = self.state.solver.max(size)
495        else:
496            max_size = self.state.solver.min(size)
497            l.warning("Could not constrain symbolic packet size to <= %d; using minimum %d for size", self.state.libc.max_packet_size, max_size)
498            self.state.solver.add(size == max_size)
499
500        # generate the packet data and return it
501        data = self.state.solver.BVS('packet_%d_%s' % (len(self.content), self.ident), max_size * self.state.arch.byte_width, key=('file', self.ident, 'packet', len(self.content)))
502        packet = (data, size)
503        self.content.append(packet)
504        return packet + (pos+1,)
505
506    def write(self, pos, data, size=None, events=True, **kwargs):
507        """
508        Write a packet to the stream.
509
510        :param int pos:     The packet number to write in the sequence of the stream. May be None to append to the stream.
511        :param data:        The data to write, as a string or bitvector.
512        :param size:        The optional size to write. May be symbolic; must be constrained to at most the size of data.
513        :return:            The next packet to use after this
514        """
515        if events:
516            self.state.history.add_event('fs_write', filename=self.name, data=data, size=size, pos=pos)
517
518        # sanity check on read/write modes
519        if self.write_mode is None:
520            self.write_mode = True
521        elif self.write_mode is False:
522            raise SimFileError("Cannot read and write to the same SimPackets")
523
524        data = _deps_unpack(data)[0]
525        if type(data) is bytes:
526            data = claripy.BVV(data)
527        if size is None:
528            size = len(data) // self.state.arch.byte_width if isinstance(data, claripy.Bits) else len(data)
529        if type(size) is int:
530            size = self.state.solver.BVV(size, self.state.arch.bits)
531
532        # sanity check on packet number and determine if data is already present
533        if pos < 0:
534            raise SimFileError("SimPacket.write(%d): Negative packet number?" % pos)
535        elif pos > len(self.content):
536            raise SimFileError("SimPacket.write(%d): Packet number is past frontier of %d?" % (pos, len(self.content)))
537        elif pos != len(self.content):
538            realdata, realsize = self.content[pos]
539            maxlen = max(len(realdata), len(data))
540            self.state.solver.add(realdata[maxlen-1:0] == data[maxlen-1:0])
541            self.state.solver.add(size == realsize)
542            if not self.state.solver.satisfiable():
543                raise SimFileError("Packet write equality constraints made state unsatisfiable???")
544            return pos+1
545
546        # write it out!
547        self.content.append((_deps_unpack(data)[0], size))
548        return pos+1
549
550    @SimStatePlugin.memo
551    def copy(self, memo): # pylint: disable=unused-argument
552        return type(self)(name=self.name, write_mode=self.write_mode, content=self.content, ident=self.ident, concrete=self.concrete)
553
554    def merge(self, others, merge_conditions, common_ancestor=None): # pylint: disable=unused-argument
555        for o in others:
556            if o.write_mode is None:
557                continue
558            elif self.write_mode is None:
559                self.write_mode = o.write_mode
560            elif self.write_mode is not o.write_mode:
561                raise SimMergeError("Cannot merge SimPackets with disparate write_mode")
562
563        for o in others:
564            if len(o.content) != len(self.content):
565                raise SimMergeError("Cannot merge SimPackets with disparate number of packets")
566
567        for i, default in enumerate(self.content):
568            max_data_length = max(len(default[0]), max(len(o.content[i][0]) for o in others))
569            merged_data = self.state.solver.ite_cases(
570                zip(
571                    merge_conditions[1:],
572                    (o.content[i][0].concat(claripy.BVV(0, max_data_length - len(o.content[i][0]))) for o in others)
573                ), default[0])
574            merged_size = self.state.solver.ite_cases(zip(merge_conditions[1:], (o.content[i][1] for o in others)), default[1])
575            self.content[i] = (merged_data, merged_size)
576
577        return True
578
579    def widen(self, _):
580        raise SimMergeError("Widening the filesystem is unsupported")
581
582
583class SimPacketsStream(SimPackets):
584    """
585    A specialized SimPackets that tracks its position internally.
586
587    The pos argument to the read and write methods will be ignored, and will return None. Instead, there is an
588    attribute ``pos`` on the file itself, which will give you what you want.
589
590    :param name:    The name of the file, for cosmetic purposes
591    :param pos:     The initial position of the file, default zero
592    :param kwargs:  Any other keyword arguments will go on to the SimPackets constructor.
593
594    :ivar pos:      The current position in the file.
595    """
596    def __init__(self, name, pos=0, **kwargs):
597        super().__init__(name, **kwargs)
598        self.pos = pos
599
600    def read(self, pos, size, **kwargs):
601        no_stream = kwargs.pop('no_stream', False)
602        if not no_stream:
603            pos = self.pos
604        data, size, pos = super().read(pos, size, **kwargs)
605        if not no_stream:
606            self.pos = pos
607        return data, size, pos
608
609    def write(self, _, data, size=None, **kwargs):
610        self.pos = super().write(self.pos, data, size, **kwargs)
611        return None
612
613    @SimStatePlugin.memo
614    def copy(self, memo):
615        c = super().copy(memo)
616        c.pos = self.pos
617        return c
618
619    def merge(self, others, merge_conditions, common_ancestor=None): # pylint: disable=unused-argument
620        if any(o.pos != self.pos for o in others):
621            raise SimMergeError("Can't merge SimPacketsStreams with disparate positions")
622        return super().merge(others, merge_conditions, common_ancestor=common_ancestor)
623
624
625class SimFileDescriptorBase(SimStatePlugin):
626    """
627    The base class for implementations of POSIX file descriptors.
628
629    All file descriptors should respect the CONCRETIZE_SYMBOLIC_{READ,WRITE}_SIZES state options.
630    """
631
632    def read(self, pos, size, **kwargs):
633        """
634        Reads some data from the file, storing it into memory.
635
636        :param pos:     The address to write the read data into memory
637        :param size:    The requested length of the read
638        :return:        The real length of the read
639        """
640        data, realsize = self.read_data(size, **kwargs)
641        if not self.state.solver.is_true(realsize == 0):
642            self.state.memory.store(pos, data, size=realsize)
643        return realsize
644
645    def write(self, pos, size, **kwargs):
646        """
647        Writes some data, loaded from the state, into the file.
648
649        :param pos:     The address to read the data to write from in memory
650        :param size:    The requested size of the write
651        :return:        The real length of the write
652        """
653        if type(pos) is str:
654            raise TypeError("SimFileDescriptor.write takes an address and size. Did you mean write_data?")
655
656        # Find a reasonable concrete size for the load since we don't want to concretize anything
657        # This is copied from SimFile.read
658        # TODO: refactor into a generic concretization strategy?
659        if self.state.solver.symbolic(size):
660            try:
661                passed_max_size = self.state.solver.max(size, extra_constraints=(size < self.state.libc.max_packet_size,))
662            except SimSolverError:
663                passed_max_size = self.state.solver.min(size)
664                l.warning("Symbolic write size is too large for threshold - concretizing to min (%d)", passed_max_size)
665                self.state.solver.add(size == passed_max_size)
666        else:
667            passed_max_size = self.state.solver.eval(size)
668            if passed_max_size > 2**13:
669                l.warning("Program performing extremely large write")
670
671        data = self.state.memory.load(pos, passed_max_size)
672        return self.write_data(data, size, **kwargs)
673
674    def read_data(self, size, **kwargs):
675        """
676        Reads some data from the file, returning the data.
677
678        :param size:    The requested length of the read
679        :return:        A tuple of the data read and the real length of the read
680        """
681        raise NotImplementedError
682
683    def write_data(self, data, size=None, **kwargs):
684        """
685        Write some data, provided as an argument into the file.
686
687        :param data:    A bitvector to write into the file
688        :param size:    The requested size of the write (may be symbolic)
689        :return:        The real length of the write
690        """
691        raise NotImplementedError
692
693    def seek(self, offset, whence='start'):
694        """
695        Seek the file descriptor to a different position in the file.
696
697        :param offset:  The offset to seek to, interpreted according to whence
698        :param whence:  What the offset is relative to; one of the strings "start", "current", or "end"
699        :return:        A symbolic boolean describing whether the seek succeeded or not
700        """
701        raise NotImplementedError
702
703    def tell(self):
704        """
705        Return the current position, or None if the concept doesn't make sense for the given file.
706        """
707        raise NotImplementedError
708
709    def eof(self):
710        """
711        Return the EOF status. May be a symbolic boolean.
712        """
713        raise NotImplementedError
714
715    def size(self):
716        """
717        Return the size of the data stored in the file in bytes, or None if the concept doesn't make sense for the
718        given file.
719        """
720        raise NotImplementedError
721
722    @property
723    def read_storage(self):
724        """
725        Return the SimFile backing reads from this fd
726        """
727        raise NotImplementedError
728
729    @property
730    def write_storage(self):
731        """
732        Return the SimFile backing writes to this fd
733        """
734        raise NotImplementedError
735
736    @property
737    def read_pos(self):
738        """
739        Return the current position of the read file pointer.
740
741        If the underlying read file is a stream, this will return the position of the stream. Otherwise, will return
742        the position of the file descriptor in the file.
743        """
744        raise NotImplementedError
745
746    @property
747    def write_pos(self):
748        """
749        Return the current position of the read file pointer.
750
751        If the underlying read file is a stream, this will return the position of the stream. Otherwise, will return
752        the position of the file descriptor in the file.
753        """
754        raise NotImplementedError
755
756    def concretize(self, **kwargs):
757        """
758        Return a concretizeation of the data in the underlying file. Has different return types to represent differnt
759        data structures on a per-class basis.
760
761        Any arguments passed to this will be passed onto state.solver.eval.
762        """
763        raise NotImplementedError
764
765    def _prep_read(self, size):
766        return self._prep_generic(size, True)
767    def _prep_write(self, size):
768        return self._prep_generic(size, False)
769
770    def _prep_generic(self, size, is_read):
771        option = sim_options.CONCRETIZE_SYMBOLIC_FILE_READ_SIZES if is_read else sim_options.CONCRETIZE_SYMBOLIC_WRITE_SIZES
772        string = 'read' if is_read else 'write'
773        # check if we need to concretize the length
774        if option in self.state.options and self.state.solver.symbolic(size):
775            try:
776                size = self.state.solver.max(size, extra_constraints=(size <= self.state.libc.max_packet_size,))
777            except SimSolverError:
778                size = self.state.solver.min(size)
779            l.info("Concretizing symbolic %s size to %d", string, size)
780
781        return size
782
783
784class SimFileDescriptor(SimFileDescriptorBase):
785    """
786    A simple file descriptor forwarding reads and writes to a SimFile. Contains information about
787    the current opened state of the file, such as the flags or (if relevant) the current position.
788
789    :ivar file:     The SimFile described to by this descriptor
790    :ivar flags:    The mode that the file descriptor was opened with, a bitfield of flags
791    """
792    def __init__(self, simfile, flags=0):
793        super().__init__()
794        self.file = simfile
795        self._pos = 0
796        self.flags = flags
797
798    def read_data(self, size, **kwargs):
799        size = self._prep_read(size)
800        data, realsize, self._pos = self.file.read(self._pos, size)
801        return data, realsize
802
803    def write_data(self, data, size=None, **kwargs):
804        if self.flags & Flags.O_APPEND and self.file.seekable:
805            self._pos = self.file.size
806
807        data = _deps_unpack(data)[0]
808        if size is None:
809            size = len(data) // self.state.arch.byte_width if isinstance(data, claripy.Bits) else len(data)
810
811        size = self._prep_write(size)
812        self._pos = self.file.write(self._pos, data, size)
813        return size
814
815    def seek(self, offset, whence='start'):
816        if not self.file.seekable:
817            return claripy.false
818
819        if type(offset) is int:
820            offset = self.state.solver.BVV(offset, self.state.arch.bits)
821
822        if whence == 'start':
823            new_pos = offset
824        elif whence == 'current':
825            new_pos = self._pos + offset
826        elif whence == 'end':
827            new_pos = self.file.size + offset
828
829        success_condition = self.state.solver.And(self.state.solver.SGE(new_pos, 0), self.state.solver.SLE(new_pos, self.file.size))
830        self._pos = _deps_unpack(self.state.solver.If(success_condition, new_pos, self._pos))[0]
831        return success_condition
832
833    def eof(self):
834        if not self.file.seekable:
835            return claripy.false
836        if not getattr(self.file, 'has_end', True):
837            return claripy.false
838        return self._pos == self.file.size
839
840    def tell(self):
841        if not self.file.seekable:
842            return None
843        return self._pos
844
845    def size(self):
846        return self.file.size
847
848    def concretize(self, **kwargs):
849        """
850        Return a concretization of the underlying file. Returns whatever format is preferred by the file.
851        """
852        return self.file.concretize(**kwargs)
853
854    @property
855    def read_storage(self):
856        return self.file
857    @property
858    def write_storage(self):
859        return self.file
860    @property
861    def read_pos(self):
862        if self.file.pos is not None:
863            return self.file.pos
864        return self._pos
865    @property
866    def write_pos(self):
867        if self.file.pos is not None:
868            return self.file.pos
869        return self._pos
870
871    def set_state(self, state):
872        self.file.set_state(state)
873        super().set_state(state)
874
875    @SimStatePlugin.memo
876    def copy(self, memo):
877        c = type(self)(self.file.copy(memo), self.flags)
878        c._pos = self._pos
879        return c
880
881    def merge(self, others, merge_conditions, common_ancestor=None): # pylint: disable=unused-argument
882        # do NOT merge file content - descriptors do not have ownership, prevent duplicate merging
883        if not all(type(o) is type(self) for o in others):
884            l.error("Cannot merge SimFileDescriptors of disparate types")
885            return False
886        if not all(o.flags == self.flags for o in others):
887            l.error("Cannot merge SimFileDescriptors of disparate flags")
888            return False
889
890        if type(self._pos) is int and all(type(o._pos) is int for o in others):
891                # TODO: we can do slightly better for packet-based things by having some packets have a "guard condition"
892                # which makes them zero length if they're not merged in
893            if any(o._pos != self._pos for o in others):
894                raise SimMergeError("Cannot merge SimFileDescriptors over SimPackets with disparate number of packets")
895        elif self._pos is None and all(o._pos is None for o in others):
896            pass
897        elif self._pos is None or any(o._pos is None for o in others):
898            raise SimMergeError("Cannot merge SimFileDescriptors with inconsistent None-position - please report this!")
899        else:
900            self._pos = self.state.solver.ite_cases(zip(merge_conditions[1:], (o._pos for o in others)), self._pos)
901
902        return True
903
904    def widen(self, _):
905        raise SimMergeError("Widening the filesystem is unsupported")
906
907
908class SimFileDescriptorDuplex(SimFileDescriptorBase):
909    """
910    A file descriptor that refers to two file storage mechanisms, one to read from and one to write to. As a result,
911    operations like seek, eof, etc no longer make sense.
912
913    :param read_file:   The SimFile to read from
914    :param write_file:  The SimFile to write to
915    """
916    def __init__(self, read_file, write_file):
917        super().__init__()
918        self._read_file = read_file
919        self._write_file = write_file
920
921        self._read_pos = 0
922        self._write_pos = 0
923
924    def read_data(self, size, **kwargs):
925        size = self._prep_read(size)
926        data, realsize, self._read_pos = self._read_file.read(self._read_pos, size)
927        return data, realsize
928
929    def write_data(self, data, size=None, **kwargs):
930        data = _deps_unpack(data)[0]
931        if size is None:
932            size = len(data) // self.state.arch.byte_width if isinstance(data, claripy.Bits) else len(data)
933
934        size = self._prep_write(size)
935        self._write_pos = self._write_file.write(self._write_pos, data, size)
936        return size
937
938    def set_state(self, state):
939        self._read_file.set_state(state)
940        self._write_file.set_state(state)
941        super().set_state(state)
942
943    def eof(self):
944        # the thing that makes the most sense is for this to refer to the read eof status...
945        if not self._read_file.seekable:
946            return claripy.false
947        if not getattr(self._read_file, 'has_end', True):
948            return claripy.false
949        return self._read_pos == self._read_file.size
950
951    def tell(self):
952        return None
953
954    def seek(self, offset, whence='start'):
955        return claripy.false
956
957    def size(self):
958        return None
959
960    def concretize(self, **kwargs):
961        """
962        Return a concretization of the underlying files, as a tuple of (read file, write file).
963        """
964        return (self._read_file.concretize(**kwargs), self._write_file.concretize(**kwargs))
965
966    @property
967    def read_storage(self):
968        return self._read_file
969    @property
970    def write_storage(self):
971        return self._write_file
972    @property
973    def read_pos(self):
974        if self._read_file.pos is not None:
975            return self._read_file.pos
976        return self._read_pos
977    @property
978    def write_pos(self):
979        if self._write_file.pos is not None:
980            return self._write_file.pos
981        return self._write_pos
982
983    @SimStatePlugin.memo
984    def copy(self, memo):
985        c = type(self)(self._read_file.copy(memo), self._write_file.copy(memo))
986        c._read_pos = self._read_pos
987        c._write_pos = self._write_pos
988        return c
989
990    def merge(self, others, merge_conditions, common_ancestor=None): # pylint: disable=unused-argument
991        # do NOT merge storage mechanisms here - fs and posix handle that
992        if not all(type(o) is type(self) for o in others):
993            raise SimMergeError("Cannot merge SimFileDescriptors of disparate types")
994
995        if type(self._read_pos) is int and all(type(o._read_pos) is int for o in others):
996            if any(o._read_pos != self._read_pos for o in others):
997                raise SimMergeError("Cannot merge SimFileDescriptors over SimPackets with disparate number of packets")
998        elif self._read_pos is None and all(o._read_pos is None for o in others):
999            pass
1000        elif self._read_pos is None or any(o._read_pos is None for o in others):
1001            raise SimMergeError("Cannot merge SimFileDescriptors with inconsistent None-position - please report this!")
1002        else:
1003            self._read_pos = self.state.solver.ite_cases(zip(merge_conditions[1:], (o._read_pos for o in others)), self._read_pos)
1004
1005        if type(self._write_pos) is int and all(type(o._write_pos) is int for o in others):
1006            if any(o._write_pos != self._write_pos for o in others):
1007                raise SimMergeError("Cannot merge SimFileDescriptors over SimPackets with disparate number of packets")
1008        elif self._write_pos is None and all(o._write_pos is None for o in others):
1009            pass
1010        elif self._write_pos is None or any(o._write_pos is None for o in others):
1011            raise SimMergeError("Cannot merge SimFileDescriptors with inconsistent None-position - please report this!")
1012        else:
1013            self._write_pos = self.state.solver.ite_cases(zip(merge_conditions[1:], (o._write_pos for o in others)), self._write_pos)
1014
1015        return True
1016
1017    def widen(self, _):
1018        raise SimMergeError("Widening the filesystem is unsupported")
1019
1020
1021class SimPacketsSlots(SimFileBase):
1022    """
1023    SimPacketsSlots is the new SimDialogue, if you've ever seen that before.
1024
1025    The idea is that in some cases, the only thing you really care about is getting the lengths of reads right, and
1026    some of them should be short reads, and some of them should be truncated. You provide to this class a list of read
1027    lengths, and it figures out the length of each read, and delivers some content.
1028
1029    This class will NOT respect the position argument you pass it - this storage is not stateless.
1030    """
1031
1032    seekable = False
1033
1034    def __init__(self, name, read_sizes, ident=None, **kwargs):
1035        super().__init__(name, writable=False, ident=ident)
1036
1037        self.read_sizes = read_sizes
1038        self.read_data = []
1039
1040    def concretize(self, **kwargs):
1041        return [self.state.solver.eval(var, cast_to=bytes, **kwargs) for var in self.read_data]
1042
1043    def read(self, pos, size, **kwargs):
1044        if not self.read_sizes:
1045            return self.state.BVV(0, 0), 0, None
1046
1047        try:
1048            req_size = self.state.solver.eval_one(size)
1049        except SimSolverError:
1050            raise SimFileError("SimPacketsSlots can't handle multivalued read sizes")
1051
1052        avail_size = self.read_sizes[0]
1053
1054        if avail_size > req_size:
1055            # chop the packet in half
1056            real_size = req_size
1057            self.read_sizes[0] -= req_size
1058        else:
1059            # short read or full size read
1060            real_size = avail_size
1061            self.read_sizes.pop(0)
1062
1063        data = self.state.solver.BVS('packet_%d_%s' % (len(self.read_data), self.ident), real_size*self.state.arch.byte_width, key=('file', self.ident, 'packet', len(self.read_data)))
1064        self.read_data.append(data)
1065        return data, real_size, None
1066
1067    def write(self, pos, data, size=None, **kwargs):
1068        raise SimFileError("Trying to write to SimPacketsSlots? Illegal")
1069
1070    @property
1071    def size(self):
1072        return sum(len(x) for x in self.read_data) // self.state.arch.byte_width
1073
1074    @SimStatePlugin.memo
1075    def copy(self, memo): # pylint: disable=unused-argument
1076        o = type(self)(self.name, self.read_sizes, ident=self.ident)
1077        o.read_data = list(self.read_data)
1078        return o
1079
1080    def merge(self, others, merge_conditions, common_ancestor=None): # pylint: disable=unused-argument
1081        if any(self.read_sizes != o.read_sizes for o in others):
1082            raise SimMergeError("Can't merge SimPacketsSlots with disparate reads")
1083        already_read_sizes = [len(x) for x in self.read_data]
1084        if any(already_read_sizes != [len(x) for x in o.read_data] for o in others):
1085            raise SimMergeError("Can't merge SimPacketsSlots with disparate reads")
1086
1087        for i, default_var in self.read_data:
1088            self.read_data[i] = self.state.solver.ite_cases(zip(merge_conditions[1:], [o.read_data[i] for o in others]), default_var)
1089
1090        return True
1091
1092    def widen(self, _):
1093        raise SimMergeError("Widening the filesystem is unsupported")
1094
1095
1096from ..errors import SimMergeError, SimFileError, SimSolverError
1097