1# -*- coding: utf-8 -*-
2"""Implements a lazy JSON file class that wraps around json data."""
3import io
4import json
5import weakref
6import contextlib
7import collections.abc as cabc
8
9
10def _to_json_with_size(obj, offset=0, sort_keys=False):
11    if isinstance(obj, str):
12        s = json.dumps(obj)
13        o = offset
14        n = size = len(s.encode())  # size in bytes
15    elif isinstance(obj, cabc.Mapping):
16        s = "{"
17        j = offset + 1
18        o = {}
19        size = {}
20        items = sorted(obj.items()) if sort_keys else obj.items()
21        for key, val in items:
22            s_k, o_k, n_k, size_k = _to_json_with_size(
23                key, offset=j, sort_keys=sort_keys
24            )
25            s += s_k + ": "
26            j += n_k + 2
27            s_v, o_v, n_v, size_v = _to_json_with_size(
28                val, offset=j, sort_keys=sort_keys
29            )
30            o[key] = o_v
31            size[key] = size_v
32            s += s_v + ", "
33            j += n_v + 2
34        if s.endswith(", "):
35            s = s[:-2]
36        s += "}\n"
37        n = len(s)
38        o["__total__"] = offset
39        size["__total__"] = n
40    elif isinstance(obj, cabc.Sequence):
41        s = "["
42        j = offset + 1
43        o = []
44        size = []
45        for x in obj:
46            s_x, o_x, n_x, size_x = _to_json_with_size(x, offset=j, sort_keys=sort_keys)
47            o.append(o_x)
48            size.append(size_x)
49            s += s_x + ", "
50            j += n_x + 2
51        if s.endswith(", "):
52            s = s[:-2]
53        s += "]\n"
54        n = len(s)
55        o.append(offset)
56        size.append(n)
57    else:
58        s = json.dumps(obj, sort_keys=sort_keys)
59        o = offset
60        n = size = len(s)
61    return s, o, n, size
62
63
64def index(obj, sort_keys=False):
65    """Creates an index for a JSON file."""
66    idx = {}
67    json_obj = _to_json_with_size(obj, sort_keys=sort_keys)
68    s, idx["offsets"], _, idx["sizes"] = json_obj
69    return s, idx
70
71
72JSON_FORMAT = """{{"locs": [{iloc:>10}, {ilen:>10}, {dloc:>10}, {dlen:>10}],
73 "index": {index},
74 "data": {data}
75}}
76"""
77
78
79def dumps(obj, sort_keys=False):
80    """Dumps an object to JSON with an index."""
81    data, idx = index(obj, sort_keys=sort_keys)
82    jdx = json.dumps(idx, sort_keys=sort_keys)
83    iloc = 69
84    ilen = len(jdx)
85    dloc = iloc + ilen + 11
86    dlen = len(data)
87    s = JSON_FORMAT.format(
88        index=jdx, data=data, iloc=iloc, ilen=ilen, dloc=dloc, dlen=dlen
89    )
90    return s
91
92
93def ljdump(obj, fp, sort_keys=False):
94    """Dumps an object to JSON file."""
95    s = dumps(obj, sort_keys=sort_keys)
96    fp.write(s)
97
98
99class LJNode(cabc.Mapping, cabc.Sequence):
100    """A proxy node for JSON nodes. Acts as both sequence and mapping."""
101
102    def __init__(self, offsets, sizes, root):
103        """Parameters
104        ----------
105        offsets : dict, list, or int
106            offsets of corresponding data structure, in bytes
107        sizes : dict, list, or int
108            sizes of corresponding data structure, in bytes
109        root : weakref.proxy of LazyJSON
110            weakref back to root node, which should be a LazyJSON object.
111        """
112        self.offsets = offsets
113        self.sizes = sizes
114        self.root = root
115        self.is_mapping = isinstance(self.offsets, cabc.Mapping)
116        self.is_sequence = isinstance(self.offsets, cabc.Sequence)
117
118    def __len__(self):
119        # recall that for maps, the '__total__' key is added and for
120        # sequences the last element represents the total size/offset.
121        return len(self.sizes) - 1
122
123    def load(self):
124        """Returns the Python data structure represented by the node."""
125        if self.is_mapping:
126            offset = self.offsets["__total__"]
127            size = self.sizes["__total__"]
128        elif self.is_sequence:
129            offset = self.offsets[-1]
130            size = self.sizes[-1]
131        elif isinstance(self.offsets, int):
132            offset = self.offsets
133            size = self.sizes
134        return self._load_or_node(offset, size)
135
136    def _load_or_node(self, offset, size):
137        if isinstance(offset, int):
138            with self.root._open(newline="\n") as f:
139                f.seek(self.root.dloc + offset)
140                s = f.read(size)
141            val = json.loads(s)
142        elif isinstance(offset, (cabc.Mapping, cabc.Sequence)):
143            val = LJNode(offset, size, self.root)
144        else:
145            raise TypeError("incorrect types for offset node")
146        return val
147
148    def _getitem_mapping(self, key):
149        if key == "__total__":
150            raise KeyError('"__total__" is a special LazyJSON key!')
151        offset = self.offsets[key]
152        size = self.sizes[key]
153        return self._load_or_node(offset, size)
154
155    def _getitem_sequence(self, key):
156        if isinstance(key, int):
157            rtn = self._load_or_node(self.offsets[key], self.sizes[key])
158        elif isinstance(key, slice):
159            key = slice(*key.indices(len(self)))
160            rtn = list(map(self._load_or_node, self.offsets[key], self.sizes[key]))
161        else:
162            raise TypeError("only integer indexing available")
163        return rtn
164
165    def __getitem__(self, key):
166        if self.is_mapping:
167            rtn = self._getitem_mapping(key)
168        elif self.is_sequence:
169            rtn = self._getitem_sequence(key)
170        else:
171            raise NotImplementedError
172        return rtn
173
174    def __iter__(self):
175        if self.is_mapping:
176            keys = set(self.offsets.keys())
177            keys.discard("__total__")
178            yield from iter(keys)
179        elif self.is_sequence:
180            i = 0
181            n = len(self)
182            while i < n:
183                yield self._load_or_node(self.offsets[i], self.sizes[i])
184                i += 1
185        else:
186            raise NotImplementedError
187
188
189class LazyJSON(LJNode):
190    """Represents a lazy json file. Can be used like a normal Python
191    dict or list.
192    """
193
194    def __init__(self, f, reopen=True):
195        """Parameters
196        ----------
197        f : file handle or str
198            JSON file to open.
199        reopen : bool, optional
200            Whether new file handle should be opened for each load.
201        """
202        self._f = f
203        self.reopen = reopen
204        if not reopen and isinstance(f, str):
205            self._f = open(f, "r", newline="\n")
206        self._load_index()
207        self.root = weakref.proxy(self)
208        self.is_mapping = isinstance(self.offsets, cabc.Mapping)
209        self.is_sequence = isinstance(self.offsets, cabc.Sequence)
210
211    def __del__(self):
212        self.close()
213
214    def close(self):
215        """Close the file handle, if appropriate."""
216        if not self.reopen and isinstance(self._f, io.IOBase):
217            try:
218                self._f.close()
219            except OSError:
220                pass
221
222    @contextlib.contextmanager
223    def _open(self, *args, **kwargs):
224        if self.reopen and isinstance(self._f, str):
225            f = open(self._f, *args, **kwargs)
226            yield f
227            f.close()
228        else:
229            yield self._f
230
231    def _load_index(self):
232        """Loads the index from the start of the file."""
233        with self._open(newline="\n") as f:
234            # read in the location data
235            f.seek(9)
236            locs = f.read(48)
237            locs = json.loads(locs)
238            self.iloc, self.ilen, self.dloc, self.dlen = locs
239            # read in the index
240            f.seek(self.iloc)
241            idx = f.read(self.ilen)
242            idx = json.loads(idx)
243        self.offsets = idx["offsets"]
244        self.sizes = idx["sizes"]
245
246    def __enter__(self):
247        return self
248
249    def __exit__(self, exc_type, exc_value, traceback):
250        self.close()
251