1"""Classes to aid in working with the STIX2 patterning language."""
2
3import base64
4import binascii
5import datetime
6import re
7
8import six
9
10from .utils import parse_into_datetime
11
12
13def escape_quotes_and_backslashes(s):
14    return s.replace(u'\\', u'\\\\').replace(u"'", u"\\'")
15
16
17def quote_if_needed(x):
18    if isinstance(x, six.string_types):
19        if x.find("-") != -1:
20            if not x.startswith("'"):
21                return "'" + x + "'"
22    return x
23
24
25class _Constant(object):
26    pass
27
28
29class StringConstant(_Constant):
30    """Pattern string constant
31
32    Args:
33        value (str): string value
34    """
35
36    def __init__(self, value, from_parse_tree=False):
37        self.needs_to_be_quoted = not from_parse_tree
38        self.value = value
39
40    def __str__(self):
41        return "'%s'" % (escape_quotes_and_backslashes(self.value) if self.needs_to_be_quoted else self.value)
42
43
44class TimestampConstant(_Constant):
45    """Pattern timestamp constant
46
47    Args:
48        value (datetime.datetime OR str): if string, must be a timestamp string
49    """
50    def __init__(self, value):
51        try:
52            self.value = parse_into_datetime(value)
53        except Exception:
54            raise ValueError("Must be a datetime object or timestamp string.")
55
56    def __str__(self):
57        return "t%s" % repr(self.value)
58
59
60class IntegerConstant(_Constant):
61    """Pattern interger constant
62
63    Args:
64        value (int): integer value
65    """
66    def __init__(self, value):
67        try:
68            self.value = int(value)
69        except Exception:
70            raise ValueError("must be an integer.")
71
72    def __str__(self):
73        return "%s" % self.value
74
75
76class FloatConstant(_Constant):
77    def __init__(self, value):
78        try:
79            self.value = float(value)
80        except Exception:
81            raise ValueError("must be a float.")
82
83    def __str__(self):
84        return "%s" % self.value
85
86
87class BooleanConstant(_Constant):
88    """Pattern boolean constant
89
90    Args:
91       value (str OR int):
92           (str) 'true', 't' for True; 'false', 'f' for False
93           (int) 1 for True; 0 for False
94    """
95    def __init__(self, value):
96        if isinstance(value, bool):
97            self.value = value
98            return
99
100        trues = ['true', 't', '1']
101        falses = ['false', 'f', '0']
102        try:
103            if value.lower() in trues:
104                self.value = True
105                return
106            elif value.lower() in falses:
107                self.value = False
108                return
109        except AttributeError:
110            if value == 1:
111                self.value = True
112                return
113            elif value == 0:
114                self.value = False
115                return
116
117        raise ValueError("must be a boolean value.")
118
119    def __str__(self):
120        return str(self.value).lower()
121
122
123_HASH_REGEX = {
124    "MD5": (r"^[a-fA-F0-9]{32}$", "MD5"),
125    "MD6": (r"^[a-fA-F0-9]{32}|[a-fA-F0-9]{40}|[a-fA-F0-9]{56}|[a-fA-F0-9]{64}|[a-fA-F0-9]{96}|[a-fA-F0-9]{128}$", "MD6"),
126    "RIPEMD160": (r"^[a-fA-F0-9]{40}$", "RIPEMD-160"),
127    "SHA1": (r"^[a-fA-F0-9]{40}$", "SHA-1"),
128    "SHA224": (r"^[a-fA-F0-9]{56}$", "SHA-224"),
129    "SHA256": (r"^[a-fA-F0-9]{64}$", "SHA-256"),
130    "SHA384": (r"^[a-fA-F0-9]{96}$", "SHA-384"),
131    "SHA512": (r"^[a-fA-F0-9]{128}$", "SHA-512"),
132    "SHA3224": (r"^[a-fA-F0-9]{56}$", "SHA3-224"),
133    "SHA3256": (r"^[a-fA-F0-9]{64}$", "SHA3-256"),
134    "SHA3384": (r"^[a-fA-F0-9]{96}$", "SHA3-384"),
135    "SHA3512": (r"^[a-fA-F0-9]{128}$", "SHA3-512"),
136    "SSDEEP": (r"^[a-zA-Z0-9/+:.]{1,128}$", "SSDEEP"),
137    "WHIRLPOOL": (r"^[a-fA-F0-9]{128}$", "WHIRLPOOL"),
138    "TLSH": (r"^[a-fA-F0-9]{70}$", "TLSH"),
139}
140
141
142class HashConstant(StringConstant):
143    """Pattern hash constant
144
145    Args:
146        value (str): hash value
147        type (str): hash algorithm name. Supported hash algorithms:
148            "MD5", "MD6", RIPEMD160", "SHA1", "SHA224", "SHA256",
149            "SHA384", "SHA512", "SHA3224", "SHA3256", "SHA3384",
150            "SHA3512", "SSDEEP", "WHIRLPOOL"
151    """
152    def __init__(self, value, type):
153        key = type.upper().replace('-', '')
154        if key in _HASH_REGEX:
155            vocab_key = _HASH_REGEX[key][1]
156            if not re.match(_HASH_REGEX[key][0], value):
157                raise ValueError("'%s' is not a valid %s hash" % (value, vocab_key))
158            super(HashConstant, self).__init__(value)
159
160
161class BinaryConstant(_Constant):
162    """Pattern binary constant
163
164    Args:
165        value (str): base64 encoded string value
166    """
167
168    def __init__(self, value, from_parse_tree=False):
169        # support with or without a 'b'
170        if from_parse_tree:
171            m = re.match("^b'(.+)'$", value)
172            if m:
173                value = m.group(1)
174        try:
175            base64.b64decode(value)
176            self.value = value
177        except (binascii.Error, TypeError):
178            raise ValueError("must contain a base64 encoded string")
179
180    def __str__(self):
181        return "b'%s'" % self.value
182
183
184class HexConstant(_Constant):
185    """Pattern hexadecimal constant
186
187    Args:
188        value (str): hexadecimal value
189    """
190    def __init__(self, value, from_parse_tree=False):
191        # support with or without an 'h'
192        if not from_parse_tree and re.match('^([a-fA-F0-9]{2})+$', value):
193            self.value = value
194        else:
195            m = re.match("^h'(([a-fA-F0-9]{2})+)'$", value)
196            if m:
197                self.value = m.group(1)
198            else:
199                raise ValueError("must contain an even number of hexadecimal characters")
200
201    def __str__(self):
202        return "h'%s'" % self.value
203
204
205class ListConstant(_Constant):
206    """Pattern list constant
207
208    Args:
209        value (list): list of values
210    """
211    def __init__(self, values):
212        # handle _Constants or make a _Constant
213        self.value = [x if isinstance(x, _Constant) else make_constant(x) for x in values]
214
215    def __str__(self):
216        return "(" + ", ".join(["%s" % x for x in self.value]) + ")"
217
218
219def make_constant(value):
220    """Convert value to Pattern constant, best effort attempt
221    at determining root value type and corresponding conversion
222
223    Args:
224        value: value to convert to Pattern constant
225    """
226    if isinstance(value, _Constant):
227        return value
228
229    try:
230        return TimestampConstant(value)
231    except (ValueError, TypeError):
232        pass
233
234    if isinstance(value, str):
235        return StringConstant(value)
236    elif isinstance(value, bool):
237        return BooleanConstant(value)
238    elif isinstance(value, int):
239        return IntegerConstant(value)
240    elif isinstance(value, float):
241        return FloatConstant(value)
242    elif isinstance(value, list):
243        return ListConstant(value)
244    else:
245        raise ValueError("Unable to create a constant from %s" % value)
246
247
248class _ObjectPathComponent(object):
249    @staticmethod
250    def create_ObjectPathComponent(component_name):
251        # first case is to handle if component_name was quoted
252        if isinstance(component_name, StringConstant):
253            return BasicObjectPathComponent(component_name.value, False)
254        elif component_name.endswith("_ref"):
255            return ReferenceObjectPathComponent(component_name)
256        elif component_name.find("[") != -1:
257            parse1 = component_name.split("[")
258            return ListObjectPathComponent(parse1[0], parse1[1][:-1])
259        else:
260            return BasicObjectPathComponent(component_name, False)
261
262    def __str__(self):
263        return quote_if_needed(self.property_name)
264
265
266class BasicObjectPathComponent(_ObjectPathComponent):
267    """Basic object path component (for an observation or expression)
268
269    By "Basic", implies that the object path component is not a
270    list, object reference or futher referenced property, i.e. terminal
271    component
272
273    Args:
274        property_name (str): object property name
275        is_key (bool): is dictionary key, default: False
276    """
277    def __init__(self, property_name, is_key):
278        self.property_name = property_name
279        # TODO: set is_key to True if this component is a dictionary key
280        # self.is_key = is_key
281
282
283class ListObjectPathComponent(_ObjectPathComponent):
284    """List object path component (for an observation or expression)
285
286    Args:
287        property_name (str): list object property name
288        index (int): index of the list property's value that is specified
289    """
290    def __init__(self, property_name, index):
291        self.property_name = property_name
292        self.index = index
293
294    def __str__(self):
295        return "%s[%s]" % (quote_if_needed(self.property_name), self.index)
296
297
298class ReferenceObjectPathComponent(_ObjectPathComponent):
299    """Reference object path component (for an observation or expression)
300
301    Args:
302        reference_property_name (str): reference object property name
303    """
304    def __init__(self, reference_property_name):
305        self.property_name = reference_property_name
306
307
308class ObjectPath(object):
309    """Pattern operand object (property) path
310
311    Args:
312        object_type_name (str): name of object type for corresponding object path component
313        property_path (_ObjectPathComponent OR str): object path
314    """
315    def __init__(self, object_type_name, property_path):
316        self.object_type_name = object_type_name
317        self.property_path = [
318            x if isinstance(x, _ObjectPathComponent) else
319            _ObjectPathComponent.create_ObjectPathComponent(x)
320            for x in property_path
321        ]
322
323    def __str__(self):
324        return "%s:%s" % (self.object_type_name, ".".join(["%s" % quote_if_needed(x) for x in self.property_path]))
325
326    def merge(self, other):
327        """Extend the object property with that of the supplied object property path"""
328        self.property_path.extend(other.property_path)
329        return self
330
331    @staticmethod
332    def make_object_path(lhs):
333        """Create ObjectPath from string encoded object path
334
335        Args:
336            lhs (str): object path of left-hand-side component of expression
337        """
338        path_as_parts = lhs.split(":")
339        return ObjectPath(path_as_parts[0], path_as_parts[1].split("."))
340
341
342class _PatternExpression(object):
343    pass
344
345
346class _ComparisonExpression(_PatternExpression):
347    """Pattern Comparison Expression
348
349    Args:
350        operator (str): operator of comparison expression
351        lhs (ObjectPath OR str): object path of left-hand-side component of expression
352        rhs (ObjectPath OR str): object path of right-hand-side component of expression
353        negated (bool): comparison expression negated. Default: False
354    """
355    def __init__(self, operator, lhs, rhs, negated=False):
356        if operator == "=" and isinstance(rhs, (ListConstant, list)):
357            self.operator = "IN"
358        else:
359            self.operator = operator
360        if isinstance(lhs, ObjectPath):
361            self.lhs = lhs
362        else:
363            self.lhs = ObjectPath.make_object_path(lhs)
364        if isinstance(rhs, _Constant):
365            self.rhs = rhs
366        else:
367            self.rhs = make_constant(rhs)
368        self.negated = negated
369        self.root_types = {self.lhs.object_type_name}
370
371    def __str__(self):
372        if self.negated:
373            return "%s NOT %s %s" % (self.lhs, self.operator, self.rhs)
374        else:
375            return "%s %s %s" % (self.lhs, self.operator, self.rhs)
376
377
378class EqualityComparisonExpression(_ComparisonExpression):
379    """Pattern Equality Comparison Expression
380
381    Args:
382        lhs (ObjectPath OR str): object path of left-hand-side component of expression
383        rhs (ObjectPath OR str): object path of right-hand-side component of expression
384        negated (bool): comparison expression negated. Default: False
385    """
386    def __init__(self, lhs, rhs, negated=False):
387        super(EqualityComparisonExpression, self).__init__("=", lhs, rhs, negated)
388
389
390class GreaterThanComparisonExpression(_ComparisonExpression):
391    """Pattern Greater-than Comparison Expression
392
393    Args:
394        lhs (ObjectPath OR str): object path of left-hand-side component of expression
395        rhs (ObjectPath OR str): object path of right-hand-side component of expression
396        negated (bool): comparison expression negated. Default: False
397    """
398    def __init__(self, lhs, rhs, negated=False):
399        super(GreaterThanComparisonExpression, self).__init__(">", lhs, rhs, negated)
400
401
402class LessThanComparisonExpression(_ComparisonExpression):
403    """Pattern Less-than Comparison Expression
404
405    Args:
406        lhs (ObjectPath OR str): object path of left-hand-side component of expression
407        rhs (ObjectPath OR str): object path of right-hand-side component of expression
408        negated (bool): comparison expression negated. Default: False
409    """
410    def __init__(self, lhs, rhs, negated=False):
411        super(LessThanComparisonExpression, self).__init__("<", lhs, rhs, negated)
412
413
414class GreaterThanEqualComparisonExpression(_ComparisonExpression):
415    """Pattern Greater-Than-or-Equal-to Comparison Expression
416
417    Args:
418        lhs (ObjectPath OR str): object path of left-hand-side component of expression
419        rhs (ObjectPath OR str): object path of right-hand-side component of expression
420        negated (bool): comparison expression negated. Default: False
421    """
422    def __init__(self, lhs, rhs, negated=False):
423        super(GreaterThanEqualComparisonExpression, self).__init__(">=", lhs, rhs, negated)
424
425
426class LessThanEqualComparisonExpression(_ComparisonExpression):
427    """Pattern Less-Than-or-Equal-to Comparison Expression
428
429    Args:
430        lhs (ObjectPath OR str): object path of left-hand-side component of expression
431        rhs (ObjectPath OR str): object path of right-hand-side component of expression
432        negated (bool): comparison expression negated. Default: False
433    """
434
435    def __init__(self, lhs, rhs, negated=False):
436        super(LessThanEqualComparisonExpression, self).__init__("<=", lhs, rhs, negated)
437
438
439class InComparisonExpression(_ComparisonExpression):
440    """'in' Comparison Expression
441
442    Args:
443        lhs (ObjectPath OR str): object path of left-hand-side component of expression
444        rhs (ObjectPath OR str): object path of right-hand-side component of expression
445        negated (bool): comparison expression negated. Default: False
446    """
447    def __init__(self, lhs, rhs, negated=False):
448        super(InComparisonExpression, self).__init__("IN", lhs, rhs, negated)
449
450
451class LikeComparisonExpression(_ComparisonExpression):
452    """'like' Comparison Expression
453
454    Args:
455        lhs (ObjectPath OR str): object path of left-hand-side component of expression
456        rhs (ObjectPath OR str): object path of right-hand-side component of expression
457        negated (bool): comparison expression negated. Default: False
458    """
459
460    def __init__(self, lhs, rhs, negated=False):
461        super(LikeComparisonExpression, self).__init__("LIKE", lhs, rhs, negated)
462
463
464class MatchesComparisonExpression(_ComparisonExpression):
465    """'Matches' Comparison Expression
466
467    Args:
468        lhs (ObjectPath OR str): object path of left-hand-side component of expression
469        rhs (ObjectPath OR str): object path of right-hand-side component of expression
470        negated (bool): comparison expression negated. Default: False
471    """
472    def __init__(self, lhs, rhs, negated=False):
473        super(MatchesComparisonExpression, self).__init__("MATCHES", lhs, rhs, negated)
474
475
476class IsSubsetComparisonExpression(_ComparisonExpression):
477    """ 'is subset' Comparison Expression
478
479    Args:
480        lhs (ObjectPath OR str): object path of left-hand-side component of expression
481        rhs (ObjectPath OR str): object path of right-hand-side component of expression
482        negated (bool): comparison expression negated. Default: False
483    """
484    def __init__(self, lhs, rhs, negated=False):
485        super(IsSubsetComparisonExpression, self).__init__("ISSUBSET", lhs, rhs, negated)
486
487
488class IsSupersetComparisonExpression(_ComparisonExpression):
489    """ 'is super set' Comparison Expression
490
491    Args:
492        lhs (ObjectPath OR str): object path of left-hand-side component of expression
493        rhs (ObjectPath OR str): object path of right-hand-side component of expression
494        negated (bool): comparison expression negated. Default: False
495    """
496    def __init__(self, lhs, rhs, negated=False):
497        super(IsSupersetComparisonExpression, self).__init__("ISSUPERSET", lhs, rhs, negated)
498
499
500class _BooleanExpression(_PatternExpression):
501    """Boolean Pattern Expression
502
503    Args:
504        operator (str): boolean operator
505        operands (list): boolean operands
506    """
507    def __init__(self, operator, operands):
508        self.operator = operator
509        self.operands = list(operands)
510        for arg in operands:
511            if not hasattr(self, "root_types"):
512                self.root_types = arg.root_types
513            elif operator == "AND":
514                self.root_types &= arg.root_types
515            else:
516                self.root_types |= arg.root_types
517
518            if not self.root_types:
519                raise ValueError("All operands to an 'AND' expression must be satisfiable with the same object type")
520
521    def __str__(self):
522        sub_exprs = []
523        for o in self.operands:
524            sub_exprs.append(str(o))
525        return (" " + self.operator + " ").join(sub_exprs)
526
527
528class AndBooleanExpression(_BooleanExpression):
529    """'AND' Boolean Pattern Expression. Only use if both operands are of
530    the same root object.
531
532    Args:
533        operands (list): AND operands
534    """
535    def __init__(self, operands):
536        super(AndBooleanExpression, self).__init__("AND", operands)
537
538
539class OrBooleanExpression(_BooleanExpression):
540    """'OR' Boolean Pattern Expression. Only use if both operands are of the same root object
541
542    Args:
543        operands (list): OR operands
544    """
545    def __init__(self, operands):
546        super(OrBooleanExpression, self).__init__("OR", operands)
547
548
549class ObservationExpression(_PatternExpression):
550    """Observation Expression
551
552    Args:
553        operand (str): observation expression operand
554    """
555    def __init__(self, operand):
556        self.operand = operand
557
558    def __str__(self):
559        return "%s" % self.operand if isinstance(self.operand, (ObservationExpression, _CompoundObservationExpression)) else "[%s]" % self.operand
560
561
562class _CompoundObservationExpression(_PatternExpression):
563    """Compound Observation Expression
564
565    Args:
566        operator (str): compound observation operator
567        operands (str): compound observation operands
568    """
569    def __init__(self, operator, operands):
570        self.operator = operator
571        self.operands = operands
572
573    def __str__(self):
574        sub_exprs = []
575        for o in self.operands:
576            sub_exprs.append("%s" % o)
577        return (" " + self.operator + " ").join(sub_exprs)
578
579
580class AndObservationExpression(_CompoundObservationExpression):
581    """'AND' Compound Observation Pattern Expression
582
583    Args:
584        operands (str): compound observation operands
585    """
586    def __init__(self, operands):
587        super(AndObservationExpression, self).__init__("AND", operands)
588
589
590class OrObservationExpression(_CompoundObservationExpression):
591    """Pattern 'OR' Compound Observation Expression
592
593    Args:
594        operands (str): compound observation operands
595    """
596    def __init__(self, operands):
597        super(OrObservationExpression, self).__init__("OR", operands)
598
599
600class FollowedByObservationExpression(_CompoundObservationExpression):
601    """Pattern 'Followed by' Compound Observation Expression
602
603    Args:
604        operands (str): compound observation operands
605    """
606    def __init__(self, operands):
607        super(FollowedByObservationExpression, self).__init__("FOLLOWEDBY", operands)
608
609
610class ParentheticalExpression(_PatternExpression):
611    """Pattern Parenthetical Observation Expression
612
613    Args:
614       exp (str): observation expression
615    """
616    def __init__(self, exp):
617        self.expression = exp
618        if hasattr(exp, "root_types"):
619            self.root_types = exp.root_types
620
621    def __str__(self):
622        return "(%s)" % self.expression
623
624
625class _ExpressionQualifier(_PatternExpression):
626    pass
627
628
629class RepeatQualifier(_ExpressionQualifier):
630    """Pattern Repeat Qualifier
631
632    Args:
633        times_to_repeat (int): times the qualifiers is repeated
634    """
635    def __init__(self, times_to_repeat):
636        if isinstance(times_to_repeat, IntegerConstant):
637            self.times_to_repeat = times_to_repeat
638        elif isinstance(times_to_repeat, int):
639            self.times_to_repeat = IntegerConstant(times_to_repeat)
640        else:
641            raise ValueError("%s is not a valid argument for a Repeat Qualifier" % times_to_repeat)
642
643    def __str__(self):
644        return "REPEATS %s TIMES" % self.times_to_repeat
645
646
647class WithinQualifier(_ExpressionQualifier):
648    """Pattern 'Within' Qualifier
649
650    Args:
651        number_of_seconds (int): seconds value for 'within' qualifier
652    """
653    def __init__(self, number_of_seconds):
654        if isinstance(number_of_seconds, IntegerConstant):
655            self.number_of_seconds = number_of_seconds
656        elif isinstance(number_of_seconds, int):
657            self.number_of_seconds = IntegerConstant(number_of_seconds)
658        else:
659            raise ValueError("%s is not a valid argument for a Within Qualifier" % number_of_seconds)
660
661    def __str__(self):
662        return "WITHIN %s SECONDS" % self.number_of_seconds
663
664
665class StartStopQualifier(_ExpressionQualifier):
666    """Pattern Start/Stop Qualifier
667
668    Args:
669        start_time (TimestampConstant OR datetime.date): start timestamp for qualifier
670        stop_time (TimestampConstant OR datetime.date): stop timestamp for qualifier
671    """
672    def __init__(self, start_time, stop_time):
673        if isinstance(start_time, TimestampConstant):
674            self.start_time = start_time
675        elif isinstance(start_time, datetime.date):
676            self.start_time = TimestampConstant(start_time)
677        elif isinstance(start_time, StringConstant):
678            self.start_time = StringConstant(start_time.value)
679        else:
680            raise ValueError("%s is not a valid argument for a Start/Stop Qualifier" % start_time)
681        if isinstance(stop_time, TimestampConstant):
682            self.stop_time = stop_time
683        elif isinstance(stop_time, datetime.date):
684            self.stop_time = TimestampConstant(stop_time)
685        elif isinstance(stop_time, StringConstant):
686            self.stop_time = StringConstant(stop_time.value)
687        else:
688            raise ValueError("%s is not a valid argument for a Start/Stop Qualifier" % stop_time)
689
690    def __str__(self):
691        return "START %s STOP %s" % (self.start_time, self.stop_time)
692
693
694class QualifiedObservationExpression(_PatternExpression):
695    """Pattern Qualified Observation Expression
696
697    Args:
698        observation_expression (PatternExpression OR _CompoundObservationExpression OR ): pattern expression
699        qualifier (_ExpressionQualifier): pattern expression qualifier
700    """
701    def __init__(self, observation_expression, qualifier):
702        self.observation_expression = observation_expression
703        self.qualifier = qualifier
704
705    def __str__(self):
706        return "%s %s" % (self.observation_expression, self.qualifier)
707