1# distutils: language=c++
2
3from libcpp.vector cimport vector
4from libc.stdint cimport uint8_t, uint64_t
5
6import json as jsonlib
7import numpy as np
8
9cimport lazperf
10cimport numpy as np
11np.import_array()
12
13def get_lazperf_type(size, t):
14    if t == 'floating':
15        if size == 8:
16            return lazperf.Double
17        else:
18            return lazperf.Float
19    if t == 'unsigned':
20        if size == 8:
21            return lazperf.Unsigned64
22        elif size == 4:
23            return lazperf.Unsigned32
24        elif size == 2:
25            return lazperf.Unsigned16
26        elif size == 1:
27            return lazperf.Unsigned8
28        else:
29            raise Exception("Unexpected type size '%s' for unsigned type" % size)
30    if t == 'signed':
31        if size == 8:
32            return lazperf.Signed64
33        elif size == 4:
34            return lazperf.Signed32
35        elif size == 2:
36            return lazperf.Signed16
37        elif size == 1:
38            return lazperf.Signed8
39        else:
40            raise Exception("Unexpected type size '%s' for signed type" % size)
41
42def buildNumpyDescription(schema):
43    """Given a Greyhound schema, convert it into a numpy dtype description
44    http://docs.scipy.org/doc/numpy/reference/generated/numpy.dtype.html
45    """
46    formats = []
47    names = []
48
49    try:
50        schema[0]['type']
51    except:
52        schema = jsonlib.loads(schema)
53
54    for s in schema:
55        t = s['type']
56        if t == 'floating':
57            t = 'f'
58        elif t == 'unsigned':
59            t = 'u'
60        else:
61            t = 'i'
62
63        f = '%s%d' % (t, int(s['size']))
64        names.append(s['name'])
65        formats.append(f)
66    return np.dtype({'names': names, 'formats': formats})
67
68def buildGreyhoundDescription(dtype):
69    """Given a numpy dtype, return a Greyhound schema"""
70    output = []
71    for t in dtype.descr:
72        name = t[0]
73        dt = dtype.fields[name]
74        size = dt[0].itemsize
75        tname = dt[0].name
76
77        entry = {}
78        if 'float' in tname:
79            entry['type'] = 'floating'
80        elif 'uint' in tname:
81            entry['type'] = 'unsigned'
82        else:
83            entry['type'] = 'signed'
84
85        entry['size'] = size
86        entry['name'] = name
87        output.append(entry)
88    return output
89
90
91cdef class PyCompressor:
92    """ Class to compress points in the laz format using a json schema or numpy dtype
93    to describe the point format
94    """
95    cdef lazperf.Compressor *thisptr      # hold a c++ instance which we're wrapping
96    cdef public str jsondata
97    cdef vector[uint8_t] *v
98
99    def __init__(self, object schema):
100        """
101        schema: numpy dtype or json string of the point schema
102        """
103        self.v = new vector[uint8_t]()
104        self.thisptr = new lazperf.Compressor(self.v[0])
105
106        try:
107            self.jsondata = jsonlib.dumps(buildGreyhoundDescription(schema))
108        except AttributeError:
109            self.jsondata = schema
110        self.add_dimensions(self.jsondata)
111
112    def compress(self, np.ndarray arr not None):
113        """ Compresses points and return the result as a numpy array of bytes
114        """
115
116        cdef np.ndarray[uint8_t, ndim=1, mode="c"] view
117        view = arr.view(dtype=np.uint8)
118
119        point_count = self.thisptr.compress(view.data, view.shape[0])
120        self.done()
121        return self.get_data()
122
123    cdef get_data(self):
124        cdef const vector[uint8_t]* v = self.thisptr.data()
125        cdef np.ndarray[uint8_t, ndim=1, mode="c"] arr = np.zeros(v.size(), dtype=np.uint8)
126
127        self.thisptr.copy_data_to(<uint8_t*> arr.data)
128        return arr
129
130    def done(self):
131        self.thisptr.done()
132
133    def add_dimensions(self, jsondata):
134
135        data = None
136        try:
137            jsondata[0]['type']
138            data = jsondata
139        except:
140            data = jsonlib.loads(jsondata)
141
142        for dim in data:
143            t = get_lazperf_type(dim['size'], dim['type'])
144            self.thisptr.add_dimension(t)
145
146    def __dealloc__(self):
147        del self.v
148        del self.thisptr
149
150
151cdef class PyDecompressor:
152    """ Class to decompress laz points using a json schema/numpy dtype to
153    describe the point format
154    """
155    cdef lazperf.Decompressor *thisptr      # hold a c++ instance which we're wrapping
156    cdef public str jsondata
157
158    def __init__(self, np.ndarray[uint8_t, ndim=1, mode="c"] compressed_points not None, object schema):
159        """
160        compressed_points: buffer of compressed_points
161        schema: numpy dtype or json string of the point schema
162        """
163        try:
164            self.jsondata = jsonlib.dumps(buildGreyhoundDescription(schema))
165        except AttributeError:
166            self.jsondata = schema
167
168        self.thisptr = new lazperf.Decompressor(
169            <const uint8_t*>compressed_points.data, compressed_points.shape[0])
170        self.add_dimensions(self.jsondata)
171
172    def decompress(self, size_t num_points):
173        """ decompress points
174
175        returns the numpy structured array of the decompressed points
176        """
177        cdef size_t point_size = self.thisptr.getPointSize()
178        cdef np.ndarray[uint8_t, ndim=1, mode="c"] out = np.zeros(num_points * point_size, np.uint8)
179
180        point_count = self.thisptr.decompress(out.data, out.shape[0])
181        return out.view(dtype=buildNumpyDescription(self.jsondata))
182
183    def add_dimensions(self, jsondata):
184        data = None
185        try:
186            jsondata[0]['type']
187            data = jsondata
188        except:
189            data = jsonlib.loads(jsondata)
190
191        for dim in data:
192            t = get_lazperf_type(dim['size'], dim['type'])
193            self.thisptr.add_dimension(t)
194
195
196    def __dealloc__(self):
197        del self.thisptr
198
199cdef class PyRecordSchema:
200    """ This class is used to represent a LAS record schema
201    This RecordSchema is nessecary for the LazVlr to be able to compress
202    points meant to be written in a LAZ file.
203    """
204    cdef lazperf.record_schema schema
205
206    def __init__(self):
207        pass
208
209    def add_point(self):
210        self.schema.push(lazperf.record_item.point())
211
212    def add_gps_time(self):
213        self.schema.push(lazperf.record_item.gpstime())
214
215    def add_rgb(self):
216        self.schema.push(lazperf.record_item.rgb())
217
218    def add_extra_bytes(self, size_t count):
219        self.schema.push(lazperf.record_item.eb(count))
220
221cdef class PyLazVlr:
222    """ Wraps a Lazperf's LazVlr class.
223    This class is meant to give access to the Laszip's vlr raw record_data
224    to allow writers to write LAZ files with its corresponding laszip vlr.
225    """
226    cdef lazperf.laz_vlr vlr
227    cdef public PyRecordSchema schema
228
229    def __init__(self, PyRecordSchema schema):
230        self.schema = schema
231        self.vlr = lazperf.laz_vlr.from_schema(schema.schema)
232
233    def data(self):
234        """ returns the laszip vlr record_data as a numpy array of bytes
235        to be written in the VLR section of a LAZ compressed file.
236        """
237        cdef size_t vlr_size = self.vlr.size()
238        cdef np.ndarray[uint8_t, ndim=1, mode="c"] arr = np.ndarray(vlr_size, dtype=np.uint8)
239
240        self.vlr.extract(arr.data)
241        return arr
242
243    def data_size(self):
244        """ Returns the number of bytes in the lazvlr record_data
245        """
246        return self.vlr.size()
247
248
249
250cdef class PyVLRDecompressor:
251    """ Class to decompress laz points stored in a .laz file using the
252    Laszip vlr's record_data
253    """
254    cdef lazperf.VlrDecompressor *thisptr      # hold a c++ instance which we're wrapping
255
256    def __init__(
257            self,
258            np.ndarray[uint8_t, ndim=1, mode="c"] compressed_points not None,
259            size_t point_size,
260            np.ndarray[uint8_t, ndim=1, mode="c"] vlr not None
261        ):
262        """
263        compressed_points: buffer of points to be decompressed
264        vlr: laszip vlr's record_data as an array of bytes
265        """
266        cdef const uint8_t *p_compressed =  <const uint8_t*> compressed_points.data
267        self.thisptr = new lazperf.VlrDecompressor(
268            p_compressed, compressed_points.shape[0], point_size, vlr.data)
269
270    def decompress_points(self, size_t point_count):
271        """ decompress the points
272
273        returns the decompressed data as an array of bytes
274        """
275        cdef size_t point_size = self.thisptr.getPointSize()
276        cdef np.ndarray[uint8_t, ndim=1, mode="c"] point_out = np.zeros(point_size, dtype=np.uint8)
277        cdef np.ndarray[uint8_t, ndim=1, mode="c"] points_uncompressed = np.zeros(point_count * point_size, dtype=np.uint8)
278        cdef size_t i = 0
279        cdef size_t begin = 0
280        cdef size_t end = 0
281
282        # Cython's memory views are needed to get the true C speed when slicing
283        cdef uint8_t [:] points_view  = points_uncompressed
284        cdef uint8_t [:] out_view = point_out
285
286        for _ in range(point_count):
287            self.thisptr.decompress(point_out.data)
288            end = begin + point_size
289            points_view[begin:end] = out_view
290            begin = end
291
292        return points_uncompressed
293
294    def __dealloc__(self):
295        del self.thisptr
296
297cdef class PyVLRCompressor:
298    """ Class to compress las points into laz format with the record schema
299    from a laszip vlr, this is meant to be used by LAZ file writers
300    """
301    cdef lazperf.VlrCompressor *thisptr;
302
303    def __init__(self, PyRecordSchema py_record_schema, uint64_t offset):
304        """
305        py_record_schema: The schema of the point format
306        offset: offset to the point data (same as the las header field).
307        This is needed because the first 8 bytes of the compressed points is an offset to the
308        chunk table relative to the start of Las file. (Or you could pass in offset=0 and modify the
309        8 bytes yourself)
310        """
311        self.thisptr = new lazperf.VlrCompressor(py_record_schema.schema, offset)
312
313
314    def compress(self, np.ndarray arr):
315        """ Returns the compressed points as a numpy array of bytes
316        """
317        cdef np.ndarray[char, ndim=1, mode="c"] view
318        view = arr.view(np.uint8)
319
320        cdef char *ptr = arr.data
321        cdef size_t point_size = self.thisptr.getPointSize()
322        cdef size_t num_bytes = arr.shape[0]
323        cdef size_t point_count = num_bytes / point_size
324        cdef double float_count = <double> num_bytes / point_size
325
326        if arr.shape[0] % point_size != 0:
327            raise ValueError("The number of bytes ({}) is not divisible by the point size ({})"
328            " it gives {} points".format(num_bytes, point_size, float_count))
329
330        for i in range(point_count):
331            self.thisptr.compress(ptr)
332            ptr += point_size
333
334        self.thisptr.done()
335        return self.get_data()
336
337    cdef get_data(self):
338        cdef const vector[uint8_t]* v = self.thisptr.data()
339        cdef np.ndarray[uint8_t, ndim=1, mode="c"] arr = np.ndarray(v.size(), dtype=np.uint8)
340        self.thisptr.copy_data_to(<uint8_t*>arr.data)
341        return arr
342
343    def __dealloc__(self):
344        del self.thisptr
345
346