1from __future__ import unicode_literals, print_function, absolute_import, division, generators, nested_scopes
2import logging
3import six
4from six.moves import xrange
5from itertools import *  # noqa
6from .exceptions import JSONPathError
7
8# Get logger name
9logger = logging.getLogger(__name__)
10
11# Turn on/off the automatic creation of id attributes
12# ... could be a kwarg pervasively but uses are rare and simple today
13auto_id_field = None
14
15NOT_SET = object()
16LIST_KEY = object()
17
18
19class JSONPath(object):
20    """
21    The base class for JSONPath abstract syntax; those
22    methods stubbed here are the interface to supported
23    JSONPath semantics.
24    """
25
26    def find(self, data):
27        """
28        All `JSONPath` types support `find()`, which returns an iterable of `DatumInContext`s.
29        They keep track of the path followed to the current location, so if the calling code
30        has some opinion about that, it can be passed in here as a starting point.
31        """
32        raise NotImplementedError()
33
34    def find_or_create(self, data):
35        return self.find(data)
36
37    def update(self, data, val):
38        """
39        Returns `data` with the specified path replaced by `val`. Only updates
40        if the specified path exists.
41        """
42
43        raise NotImplementedError()
44
45    def update_or_create(self, data, val):
46        return self.update(data, val)
47
48    def filter(self, fn, data):
49        """
50        Returns `data` with the specified path filtering nodes according
51        the filter evaluation result returned by the filter function.
52
53        Arguments:
54            fn (function): unary function that accepts one argument
55                and returns bool.
56            data (dict|list|tuple): JSON object to filter.
57        """
58
59        raise NotImplementedError()
60
61    def child(self, child):
62        """
63        Equivalent to Child(self, next) but with some canonicalization
64        """
65        if isinstance(self, This) or isinstance(self, Root):
66            return child
67        elif isinstance(child, This):
68            return self
69        elif isinstance(child, Root):
70            return child
71        else:
72            return Child(self, child)
73
74    def make_datum(self, value):
75        if isinstance(value, DatumInContext):
76            return value
77        else:
78            return DatumInContext(value, path=Root(), context=None)
79
80
81class DatumInContext(object):
82    """
83    Represents a datum along a path from a context.
84
85    Essentially a zipper but with a structure represented by JsonPath,
86    and where the context is more of a parent pointer than a proper
87    representation of the context.
88
89    For quick-and-dirty work, this proxies any non-special attributes
90    to the underlying datum, but the actual datum can (and usually should)
91    be retrieved via the `value` attribute.
92
93    To place `datum` within another, use `datum.in_context(context=..., path=...)`
94    which extends the path. If the datum already has a context, it places the entire
95    context within that passed in, so an object can be built from the inside
96    out.
97    """
98    @classmethod
99    def wrap(cls, data):
100        if isinstance(data, cls):
101            return data
102        else:
103            return cls(data)
104
105    def __init__(self, value, path=None, context=None):
106        self.value = value
107        self.path = path or This()
108        self.context = None if context is None else DatumInContext.wrap(context)
109
110    def in_context(self, context, path):
111        context = DatumInContext.wrap(context)
112
113        if self.context:
114            return DatumInContext(value=self.value, path=self.path, context=context.in_context(path=path, context=context))
115        else:
116            return DatumInContext(value=self.value, path=path, context=context)
117
118    @property
119    def full_path(self):
120        return self.path if self.context is None else self.context.full_path.child(self.path)
121
122    @property
123    def id_pseudopath(self):
124        """
125        Looks like a path, but with ids stuck in when available
126        """
127        try:
128            pseudopath = Fields(str(self.value[auto_id_field]))
129        except (TypeError, AttributeError, KeyError): # This may not be all the interesting exceptions
130            pseudopath = self.path
131
132        if self.context:
133            return self.context.id_pseudopath.child(pseudopath)
134        else:
135            return pseudopath
136
137    def __repr__(self):
138        return '%s(value=%r, path=%r, context=%r)' % (self.__class__.__name__, self.value, self.path, self.context)
139
140    def __eq__(self, other):
141        return isinstance(other, DatumInContext) and other.value == self.value and other.path == self.path and self.context == other.context
142
143
144class AutoIdForDatum(DatumInContext):
145    """
146    This behaves like a DatumInContext, but the value is
147    always the path leading up to it, not including the "id",
148    and with any "id" fields along the way replacing the prior
149    segment of the path
150
151    For example, it will make "foo.bar.id" return a datum
152    that behaves like DatumInContext(value="foo.bar", path="foo.bar.id").
153
154    This is disabled by default; it can be turned on by
155    settings the `auto_id_field` global to a value other
156    than `None`.
157    """
158
159    def __init__(self, datum, id_field=None):
160        """
161        Invariant is that datum.path is the path from context to datum. The auto id
162        will either be the id in the datum (if present) or the id of the context
163        followed by the path to the datum.
164
165        The path to this datum is always the path to the context, the path to the
166        datum, and then the auto id field.
167        """
168        self.datum = datum
169        self.id_field = id_field or auto_id_field
170
171    @property
172    def value(self):
173        return str(self.datum.id_pseudopath)
174
175    @property
176    def path(self):
177        return self.id_field
178
179    @property
180    def context(self):
181        return self.datum
182
183    def __repr__(self):
184        return '%s(%r)' % (self.__class__.__name__, self.datum)
185
186    def in_context(self, context, path):
187        return AutoIdForDatum(self.datum.in_context(context=context, path=path))
188
189    def __eq__(self, other):
190        return isinstance(other, AutoIdForDatum) and other.datum == self.datum and self.id_field == other.id_field
191
192
193class Root(JSONPath):
194    """
195    The JSONPath referring to the "root" object. Concrete syntax is '$'.
196    The root is the topmost datum without any context attached.
197    """
198
199    def find(self, data):
200        if not isinstance(data, DatumInContext):
201            return [DatumInContext(data, path=Root(), context=None)]
202        else:
203            if data.context is None:
204                return [DatumInContext(data.value, context=None, path=Root())]
205            else:
206                return Root().find(data.context)
207
208    def update(self, data, val):
209        return val
210
211    def filter(self, fn, data):
212        return data if fn(data) else None
213
214    def __str__(self):
215        return '$'
216
217    def __repr__(self):
218        return 'Root()'
219
220    def __eq__(self, other):
221        return isinstance(other, Root)
222
223
224class This(JSONPath):
225    """
226    The JSONPath referring to the current datum. Concrete syntax is '@'.
227    """
228
229    def find(self, datum):
230        return [DatumInContext.wrap(datum)]
231
232    def update(self, data, val):
233        return val
234
235    def filter(self, fn, data):
236        return data if fn(data) else None
237
238    def __str__(self):
239        return '`this`'
240
241    def __repr__(self):
242        return 'This()'
243
244    def __eq__(self, other):
245        return isinstance(other, This)
246
247
248class Child(JSONPath):
249    """
250    JSONPath that first matches the left, then the right.
251    Concrete syntax is <left> '.' <right>
252    """
253
254    def __init__(self, left, right):
255        self.left = left
256        self.right = right
257
258    def find(self, datum):
259        """
260        Extra special case: auto ids do not have children,
261        so cut it off right now rather than auto id the auto id
262        """
263
264        return [submatch
265                for subdata in self.left.find(datum)
266                if not isinstance(subdata, AutoIdForDatum)
267                for submatch in self.right.find(subdata)]
268
269    def update(self, data, val):
270        for datum in self.left.find(data):
271            self.right.update(datum.value, val)
272        return data
273
274    def find_or_create(self, datum):
275        datum = DatumInContext.wrap(datum)
276        submatches = []
277        for subdata in self.left.find_or_create(datum):
278            if isinstance(subdata, AutoIdForDatum):
279                # Extra special case: auto ids do not have children,
280                # so cut it off right now rather than auto id the auto id
281                continue
282            for submatch in self.right.find_or_create(subdata):
283                submatches.append(submatch)
284        return submatches
285
286    def update_or_create(self, data, val):
287        for datum in self.left.find_or_create(data):
288            self.right.update_or_create(datum.value, val)
289        return _clean_list_keys(data)
290
291    def filter(self, fn, data):
292        for datum in self.left.find(data):
293            self.right.filter(fn, datum.value)
294        return data
295
296    def __eq__(self, other):
297        return isinstance(other, Child) and self.left == other.left and self.right == other.right
298
299    def __str__(self):
300        return '%s.%s' % (self.left, self.right)
301
302    def __repr__(self):
303        return '%s(%r, %r)' % (self.__class__.__name__, self.left, self.right)
304
305
306class Parent(JSONPath):
307    """
308    JSONPath that matches the parent node of the current match.
309    Will crash if no such parent exists.
310    Available via named operator `parent`.
311    """
312
313    def find(self, datum):
314        datum = DatumInContext.wrap(datum)
315        return [datum.context]
316
317    def __eq__(self, other):
318        return isinstance(other, Parent)
319
320    def __str__(self):
321        return '`parent`'
322
323    def __repr__(self):
324        return 'Parent()'
325
326
327class Where(JSONPath):
328    """
329    JSONPath that first matches the left, and then
330    filters for only those nodes that have
331    a match on the right.
332
333    WARNING: Subject to change. May want to have "contains"
334    or some other better word for it.
335    """
336
337    def __init__(self, left, right):
338        self.left = left
339        self.right = right
340
341    def find(self, data):
342        return [subdata for subdata in self.left.find(data) if self.right.find(subdata)]
343
344    def update(self, data, val):
345        for datum in self.find(data):
346            datum.path.update(data, val)
347        return data
348
349    def filter(self, fn, data):
350        for datum in self.find(data):
351            datum.path.filter(fn, datum.value)
352        return data
353
354    def __str__(self):
355        return '%s where %s' % (self.left, self.right)
356
357    def __eq__(self, other):
358        return isinstance(other, Where) and other.left == self.left and other.right == self.right
359
360class Descendants(JSONPath):
361    """
362    JSONPath that matches first the left expression then any descendant
363    of it which matches the right expression.
364    """
365
366    def __init__(self, left, right):
367        self.left = left
368        self.right = right
369
370    def find(self, datum):
371        # <left> .. <right> ==> <left> . (<right> | *..<right> | [*]..<right>)
372        #
373        # With with a wonky caveat that since Slice() has funky coercions
374        # we cannot just delegate to that equivalence or we'll hit an
375        # infinite loop. So right here we implement the coercion-free version.
376
377        # Get all left matches into a list
378        left_matches = self.left.find(datum)
379        if not isinstance(left_matches, list):
380            left_matches = [left_matches]
381
382        def match_recursively(datum):
383            right_matches = self.right.find(datum)
384
385            # Manually do the * or [*] to avoid coercion and recurse just the right-hand pattern
386            if isinstance(datum.value, list):
387                recursive_matches = [submatch
388                                     for i in range(0, len(datum.value))
389                                     for submatch in match_recursively(DatumInContext(datum.value[i], context=datum, path=Index(i)))]
390
391            elif isinstance(datum.value, dict):
392                recursive_matches = [submatch
393                                     for field in datum.value.keys()
394                                     for submatch in match_recursively(DatumInContext(datum.value[field], context=datum, path=Fields(field)))]
395
396            else:
397                recursive_matches = []
398
399            return right_matches + list(recursive_matches)
400
401        # TODO: repeatable iterator instead of list?
402        return [submatch
403                for left_match in left_matches
404                for submatch in match_recursively(left_match)]
405
406    def is_singular(self):
407        return False
408
409    def update(self, data, val):
410        # Get all left matches into a list
411        left_matches = self.left.find(data)
412        if not isinstance(left_matches, list):
413            left_matches = [left_matches]
414
415        def update_recursively(data):
416            # Update only mutable values corresponding to JSON types
417            if not (isinstance(data, list) or isinstance(data, dict)):
418                return
419
420            self.right.update(data, val)
421
422            # Manually do the * or [*] to avoid coercion and recurse just the right-hand pattern
423            if isinstance(data, list):
424                for i in range(0, len(data)):
425                    update_recursively(data[i])
426
427            elif isinstance(data, dict):
428                for field in data.keys():
429                    update_recursively(data[field])
430
431        for submatch in left_matches:
432            update_recursively(submatch.value)
433
434        return data
435
436    def filter(self, fn, data):
437        # Get all left matches into a list
438        left_matches = self.left.find(data)
439        if not isinstance(left_matches, list):
440            left_matches = [left_matches]
441
442        def filter_recursively(data):
443            # Update only mutable values corresponding to JSON types
444            if not (isinstance(data, list) or isinstance(data, dict)):
445                return
446
447            self.right.filter(fn, data)
448
449            # Manually do the * or [*] to avoid coercion and recurse just the right-hand pattern
450            if isinstance(data, list):
451                for i in range(0, len(data)):
452                    filter_recursively(data[i])
453
454            elif isinstance(data, dict):
455                for field in data.keys():
456                    filter_recursively(data[field])
457
458        for submatch in left_matches:
459            filter_recursively(submatch.value)
460
461        return data
462
463    def __str__(self):
464        return '%s..%s' % (self.left, self.right)
465
466    def __eq__(self, other):
467        return isinstance(other, Descendants) and self.left == other.left and self.right == other.right
468
469    def __repr__(self):
470        return '%s(%r, %r)' % (self.__class__.__name__, self.left, self.right)
471
472
473class Union(JSONPath):
474    """
475    JSONPath that returns the union of the results of each match.
476    This is pretty shoddily implemented for now. The nicest semantics
477    in case of mismatched bits (list vs atomic) is to put
478    them all in a list, but I haven't done that yet.
479
480    WARNING: Any appearance of this being the _concatenation_ is
481    coincidence. It may even be a bug! (or laziness)
482    """
483    def __init__(self, left, right):
484        self.left = left
485        self.right = right
486
487    def is_singular(self):
488        return False
489
490    def find(self, data):
491        return self.left.find(data) + self.right.find(data)
492
493class Intersect(JSONPath):
494    """
495    JSONPath for bits that match *both* patterns.
496
497    This can be accomplished a couple of ways. The most
498    efficient is to actually build the intersected
499    AST as in building a state machine for matching the
500    intersection of regular languages. The next
501    idea is to build a filtered data and match against
502    that.
503    """
504    def __init__(self, left, right):
505        self.left = left
506        self.right = right
507
508    def is_singular(self):
509        return False
510
511    def find(self, data):
512        raise NotImplementedError()
513
514
515class Fields(JSONPath):
516    """
517    JSONPath referring to some field of the current object.
518    Concrete syntax ix comma-separated field names.
519
520    WARNING: If '*' is any of the field names, then they will
521    all be returned.
522    """
523
524    def __init__(self, *fields):
525        self.fields = fields
526
527    @staticmethod
528    def get_field_datum(datum, field, create):
529        if field == auto_id_field:
530            return AutoIdForDatum(datum)
531        try:
532            field_value = datum.value.get(field, NOT_SET)
533            if field_value is NOT_SET:
534                if create:
535                    datum.value[field] = field_value = {}
536                else:
537                    return None
538            return DatumInContext(field_value, path=Fields(field), context=datum)
539        except (TypeError, AttributeError):
540            return None
541
542    def reified_fields(self, datum):
543        if '*' not in self.fields:
544            return self.fields
545        else:
546            try:
547                fields = tuple(datum.value.keys())
548                return fields if auto_id_field is None else fields + (auto_id_field,)
549            except AttributeError:
550                return ()
551
552    def find(self, datum):
553        return self._find_base(datum, create=False)
554
555    def find_or_create(self, datum):
556        return self._find_base(datum, create=True)
557
558    def _find_base(self, datum, create):
559        datum = DatumInContext.wrap(datum)
560        field_data = [self.get_field_datum(datum, field, create)
561                      for field in self.reified_fields(datum)]
562        return [fd for fd in field_data if fd is not None]
563
564    def update(self, data, val):
565        return self._update_base(data, val, create=False)
566
567    def update_or_create(self, data, val):
568        return self._update_base(data, val, create=True)
569
570    def _update_base(self, data, val, create):
571        if data is not None:
572            for field in self.reified_fields(DatumInContext.wrap(data)):
573                if field not in data and create:
574                    data[field] = {}
575                if field in data:
576                    if hasattr(val, '__call__'):
577                        val(data[field], data, field)
578                    else:
579                        data[field] = val
580        return data
581
582    def filter(self, fn, data):
583        if data is not None:
584            for field in self.reified_fields(DatumInContext.wrap(data)):
585                if field in data:
586                    if fn(data[field]):
587                        data.pop(field)
588        return data
589
590    def __str__(self):
591        return ','.join(map(str, self.fields))
592
593    def __repr__(self):
594        return '%s(%s)' % (self.__class__.__name__, ','.join(map(repr, self.fields)))
595
596    def __eq__(self, other):
597        return isinstance(other, Fields) and tuple(self.fields) == tuple(other.fields)
598
599
600class Index(JSONPath):
601    """
602    JSONPath that matches indices of the current datum, or none if not large enough.
603    Concrete syntax is brackets.
604
605    WARNING: If the datum is None or not long enough, it will not crash but will not match anything.
606    NOTE: For the concrete syntax of `[*]`, the abstract syntax is a Slice() with no parameters (equiv to `[:]`
607    """
608
609    def __init__(self, index):
610        self.index = index
611
612    def find(self, datum):
613        return self._find_base(datum, create=False)
614
615    def find_or_create(self, datum):
616        return self._find_base(datum, create=True)
617
618    def _find_base(self, datum, create):
619        datum = DatumInContext.wrap(datum)
620        if create:
621            if datum.value == {}:
622                datum.value = _create_list_key(datum.value)
623            self._pad_value(datum.value)
624        if datum.value and len(datum.value) > self.index:
625            return [DatumInContext(datum.value[self.index], path=self, context=datum)]
626        else:
627            return []
628
629    def update(self, data, val):
630        return self._update_base(data, val, create=False)
631
632    def update_or_create(self, data, val):
633        return self._update_base(data, val, create=True)
634
635    def _update_base(self, data, val, create):
636        if create:
637            if data == {}:
638                data = _create_list_key(data)
639            self._pad_value(data)
640        if hasattr(val, '__call__'):
641            val.__call__(data[self.index], data, self.index)
642        elif len(data) > self.index:
643            data[self.index] = val
644        return data
645
646    def filter(self, fn, data):
647        if fn(data[self.index]):
648            data.pop(self.index)  # relies on mutation :(
649        return data
650
651    def __eq__(self, other):
652        return isinstance(other, Index) and self.index == other.index
653
654    def __str__(self):
655        return '[%i]' % self.index
656
657    def __repr__(self):
658        return '%s(index=%r)' % (self.__class__.__name__, self.index)
659
660    def _pad_value(self, value):
661        if len(value) <= self.index:
662            pad = self.index - len(value) + 1
663            value += [{} for __ in range(pad)]
664
665
666class Slice(JSONPath):
667    """
668    JSONPath matching a slice of an array.
669
670    Because of a mismatch between JSON and XML when schema-unaware,
671    this always returns an iterable; if the incoming data
672    was not a list, then it returns a one element list _containing_ that
673    data.
674
675    Consider these two docs, and their schema-unaware translation to JSON:
676
677    <a><b>hello</b></a> ==> {"a": {"b": "hello"}}
678    <a><b>hello</b><b>goodbye</b></a> ==> {"a": {"b": ["hello", "goodbye"]}}
679
680    If there were a schema, it would be known that "b" should always be an
681    array (unless the schema were wonky, but that is too much to fix here)
682    so when querying with JSON if the one writing the JSON knows that it
683    should be an array, they can write a slice operator and it will coerce
684    a non-array value to an array.
685
686    This may be a bit unfortunate because it would be nice to always have
687    an iterator, but dictionaries and other objects may also be iterable,
688    so this is the compromise.
689    """
690    def __init__(self, start=None, end=None, step=None):
691        self.start = start
692        self.end = end
693        self.step = step
694
695    def find(self, datum):
696        datum = DatumInContext.wrap(datum)
697
698        # Used for catching null value instead of empty list in path
699        if not datum.value:
700            return []
701        # Here's the hack. If it is a dictionary or some kind of constant,
702        # put it in a single-element list
703        if (isinstance(datum.value, dict) or isinstance(datum.value, six.integer_types) or isinstance(datum.value, six.string_types)):
704            return self.find(DatumInContext([datum.value], path=datum.path, context=datum.context))
705
706        # Some iterators do not support slicing but we can still
707        # at least work for '*'
708        if self.start == None and self.end == None and self.step == None:
709            return [DatumInContext(datum.value[i], path=Index(i), context=datum) for i in xrange(0, len(datum.value))]
710        else:
711            return [DatumInContext(datum.value[i], path=Index(i), context=datum) for i in range(0, len(datum.value))[self.start:self.end:self.step]]
712
713    def update(self, data, val):
714        for datum in self.find(data):
715            datum.path.update(data, val)
716        return data
717
718    def filter(self, fn, data):
719        while True:
720            length = len(data)
721            for datum in self.find(data):
722                data = datum.path.filter(fn, data)
723                if len(data) < length:
724                    break
725
726            if length == len(data):
727                break
728        return data
729
730    def __str__(self):
731        if self.start == None and self.end == None and self.step == None:
732            return '[*]'
733        else:
734            return '[%s%s%s]' % (self.start or '',
735                                   ':%d'%self.end if self.end else '',
736                                   ':%d'%self.step if self.step else '')
737
738    def __repr__(self):
739        return '%s(start=%r,end=%r,step=%r)' % (self.__class__.__name__, self.start, self.end, self.step)
740
741    def __eq__(self, other):
742        return isinstance(other, Slice) and other.start == self.start and self.end == other.end and other.step == self.step
743
744
745def _create_list_key(dict_):
746    """
747    Adds a list to a dictionary by reference and returns the list.
748
749    See `_clean_list_keys()`
750    """
751    dict_[LIST_KEY] = new_list = [{}]
752    return new_list
753
754
755def _clean_list_keys(dict_):
756    """
757    Replace {LIST_KEY: ['foo', 'bar']} with ['foo', 'bar'].
758
759    >>> _clean_list_keys({LIST_KEY: ['foo', 'bar']})
760    ['foo', 'bar']
761
762    """
763    for key, value in dict_.items():
764        if isinstance(value, dict):
765            dict_[key] = _clean_list_keys(value)
766        elif isinstance(value, list):
767            dict_[key] = [_clean_list_keys(v) if isinstance(v, dict) else v
768                          for v in value]
769    if LIST_KEY in dict_:
770        return dict_[LIST_KEY]
771    return dict_
772