1 /*
2  * Copyright (c) 2018, SUSE LLC.
3  *
4  * This program is licensed under the BSD license, read LICENSE.BSD
5  * for further information
6  */
7 
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <fcntl.h>
12 #include <zstd.h>
13 
14 #include "chksum.h"
15 #include "util.h"
16 #include "solv_zchunk.h"
17 
18 #define MAX_HDR_SIZE  0xffffff00
19 #define MAX_CHUNK_CNT 0x0fffffff
20 
21 #undef VERIFY_DATA_CHKSUM
22 
23 struct solv_zchunk {
24   FILE *fp;
25   unsigned char *hdr;
26   unsigned char *hdr_end;
27 
28   unsigned int flags;	/* header flags */
29   unsigned int comp;	/* compression type */
30 
31   unsigned int hdr_chk_type;	/* header + data checksum */
32   unsigned int hdr_chk_len;
33   Id hdr_chk_id;
34 
35   unsigned int chunk_chk_type;	/* chunk checksum */
36   unsigned int chunk_chk_len;
37   Id chunk_chk_id;
38 
39   Chksum *data_chk;	/* for data checksum verification */
40   unsigned char *data_chk_ptr;
41 
42   unsigned int streamid;	/* stream we are reading */
43   unsigned int nchunks;		/* chunks left */
44   unsigned char *chunks;
45 
46   ZSTD_DCtx *dctx;
47   ZSTD_DDict *ddict;
48 
49   int eof;
50   unsigned char *buf;
51   unsigned int buf_used;
52   unsigned int buf_avail;
53 };
54 
55 /* return 32bit compressed integer. returns NULL on overflow. */
56 static unsigned char *
getuint(unsigned char * p,unsigned char * endp,unsigned int * dp)57 getuint(unsigned char *p, unsigned char *endp, unsigned int *dp)
58 {
59   if (!p || p >= endp)
60     return 0;
61   if (p < endp && (*p & 0x80) != 0)
62     {
63       *dp = p[0] ^ 0x80;
64       return p + 1;
65     }
66   if (++p < endp && (*p & 0x80) != 0)
67     {
68       *dp = p[-1] ^ ((p[0] ^ 0x80) << 7);
69       return p + 1;
70     }
71   if (++p < endp && (*p & 0x80) != 0)
72     {
73       *dp = p[-2] ^ (p[-1] << 7) ^ ((p[0] ^ 0x80) << 14);
74       return p + 1;
75     }
76   if (++p < endp && (*p & 0x80) != 0)
77     {
78       *dp = p[-3] ^ (p[-2] << 7) ^ (p[-1] << 14) ^ ((p[0] ^ 0x80) << 21);
79       return p + 1;
80     }
81   if (++p < endp && (*p & 0xf0) == 0x80)
82     {
83       *dp = p[-4] ^ (p[-3] << 7) ^ (p[-2] << 14) ^ (p[-1] << 21) ^ ((p[0] ^ 0x80) << 28);
84       return p + 1;
85     }
86   return 0;
87 }
88 
89 static unsigned char *
getchksum(unsigned char * p,unsigned char * endp,unsigned int * typep,unsigned int * lenp,Id * idp)90 getchksum(unsigned char *p, unsigned char *endp, unsigned int *typep, unsigned int *lenp, Id *idp)
91 {
92   if ((p = getuint(p, endp, typep)) == 0)
93     return 0;
94   switch (*typep)
95     {
96     case 0:
97       *lenp = 20;
98       *idp = REPOKEY_TYPE_SHA1;
99       return p;
100     case 1:
101       *lenp = 32;
102       *idp = REPOKEY_TYPE_SHA256;
103       return p;
104     case 2:
105       *lenp = 64;
106       *idp = REPOKEY_TYPE_SHA512;
107       return p;
108     case 3:
109       *lenp = 16;
110       *idp = REPOKEY_TYPE_SHA512;
111       return p;
112     default:
113       break;
114     }
115   return 0;
116 }
117 
118 static int
skip_bytes(FILE * fp,size_t skip,Chksum * chk)119 skip_bytes(FILE *fp, size_t skip, Chksum *chk)
120 {
121   unsigned char buf[4096];
122   while (skip)
123     {
124       size_t bite = skip > sizeof(buf) ? sizeof(buf) : skip;
125       if (fread(buf, bite, 1, fp) != 1)
126 	return 0;
127       if (chk)
128 	solv_chksum_add(chk, buf, bite);
129       skip -= bite;
130     }
131   return 1;
132 }
133 
134 static int
nextchunk(struct solv_zchunk * zck,unsigned int streamid)135 nextchunk(struct solv_zchunk *zck, unsigned int streamid)
136 {
137   unsigned char *p = zck->chunks;
138   unsigned char *chunk_chk_ptr;
139   unsigned int sid, chunk_len, uncompressed_len;
140   unsigned char *cbuf;
141 
142   /* free old buffer */
143   zck->buf = solv_free(zck->buf);
144   zck->buf_avail = 0;
145   zck->buf_used = 0;
146 
147   for (;;)
148     {
149       if (zck->nchunks == 0)
150 	{
151 	  zck->chunks = p;
152 	  return 1;		/* EOF reached */
153 	}
154       if (p >= zck->hdr_end)
155 	return 0;
156       sid = streamid ? 1 : 0;
157       /* check if this is the correct stream */
158       if ((zck->flags & 1) != 0 && (p = getuint(p, zck->hdr_end, &sid)) == 0)
159 	return 0;
160       chunk_chk_ptr = p;	/* remember for verification */
161       p += zck->chunk_chk_len;
162       if (p >= zck->hdr_end)
163 	return 0;
164       if ((p = getuint(p, zck->hdr_end, &chunk_len)) == 0)
165 	return 0;
166       if ((p = getuint(p, zck->hdr_end, &uncompressed_len)) == 0)
167 	return 0;
168       zck->nchunks--;
169       if (sid == streamid)
170 	break;
171       /* skip the chunk, but the dict chunk must come first */
172       if (streamid == 0 || skip_bytes(zck->fp, chunk_len, zck->data_chk) == 0)
173 	return 0;
174     }
175   zck->chunks = p;
176 
177   /* ok, read the compressed chunk */
178   if (!chunk_len)
179     return uncompressed_len ? 0 : 1;
180   cbuf = solv_malloc(chunk_len);
181   if (fread(cbuf, chunk_len, 1, zck->fp) != 1)
182     {
183       solv_free(cbuf);
184       return 0;
185     }
186   if (zck->data_chk)
187     solv_chksum_add(zck->data_chk, cbuf, chunk_len);
188 
189   /* verify the chunk checksum */
190   if (zck->chunk_chk_id)
191     {
192       Chksum *chk = solv_chksum_create(zck->chunk_chk_id);
193       if (!chk)
194 	{
195 	  solv_free(cbuf);
196 	  return 0;
197 	}
198       solv_chksum_add(chk, cbuf, chunk_len);
199       if (memcmp(solv_chksum_get(chk, 0), chunk_chk_ptr, zck->chunk_chk_len) != 0)
200 	{
201 	  solv_chksum_free(chk, 0);
202 	  solv_free(cbuf);
203 	  return 0;
204 	}
205       solv_chksum_free(chk, 0);
206     }
207 
208   /* uncompress */
209   if (zck->comp == 0)
210     {
211       /* not compressed */
212       if (chunk_len != uncompressed_len)
213 	{
214 	  solv_free(cbuf);
215 	  return 0;
216 	}
217       zck->buf = cbuf;
218       zck->buf_avail = uncompressed_len;
219       return 1;
220     }
221   if (zck->comp == 2)
222     {
223       /* zstd compressed */
224       size_t r;
225       zck->buf = solv_malloc(uncompressed_len + 1);	/* +1 so we can detect too large frames */
226       if (zck->ddict)
227 	r = ZSTD_decompress_usingDDict(zck->dctx, zck->buf, uncompressed_len + 1, cbuf, chunk_len, zck->ddict);
228       else
229 	r = ZSTD_decompressDCtx(zck->dctx, zck->buf, uncompressed_len + 1, cbuf, chunk_len);
230       solv_free(cbuf);
231       if (r != uncompressed_len)
232 	return 0;
233       zck->buf_avail = uncompressed_len;
234       return 1;
235     }
236   solv_free(cbuf);
237   return 0;
238 }
239 
240 static inline struct solv_zchunk *
open_error(struct solv_zchunk * zck)241 open_error(struct solv_zchunk *zck)
242 {
243   solv_zchunk_close(zck);
244   return 0;
245 }
246 
247 struct solv_zchunk *
solv_zchunk_open(FILE * fp,unsigned int streamid)248 solv_zchunk_open(FILE *fp, unsigned int streamid)
249 {
250   struct solv_zchunk *zck;
251   unsigned char *p;
252   unsigned int hdr_size;	/* preface + index + signatures */
253   unsigned int lead_size;
254   unsigned int preface_size;
255   unsigned int index_size;
256 
257   zck = solv_calloc(1, sizeof(*zck));
258 
259   /* read and parse the lead, read the complete header */
260   zck->hdr = solv_calloc(15, 1);
261   zck->hdr_end = zck->hdr + 15;
262   if (fread(zck->hdr, 15, 1, fp) != 1 || memcmp(zck->hdr, "\000ZCK1", 5) != 0)
263     return open_error(zck);
264   p = zck->hdr + 5;
265   if ((p = getchksum(p, zck->hdr_end, &zck->hdr_chk_type, &zck->hdr_chk_len, &zck->hdr_chk_id)) == 0)
266     return open_error(zck);
267   if ((p = getuint(p, zck->hdr_end, &hdr_size)) == 0 || hdr_size > MAX_HDR_SIZE)
268     return open_error(zck);
269   lead_size = p - zck->hdr + zck->hdr_chk_len;
270   zck->hdr = solv_realloc(zck->hdr, lead_size + hdr_size);
271   zck->hdr_end = zck->hdr + lead_size + hdr_size;
272   if (fread(zck->hdr + 15, lead_size + hdr_size - 15, 1, fp) != 1)
273     return open_error(zck);
274 
275   /* verify header checksum to guard against corrupt files */
276   if (zck->hdr_chk_id)
277     {
278       Chksum *chk = solv_chksum_create(zck->hdr_chk_id);
279       if (!chk)
280 	return open_error(zck);
281       solv_chksum_add(chk, zck->hdr, lead_size - zck->hdr_chk_len);
282       solv_chksum_add(chk, zck->hdr + lead_size, hdr_size);
283       if (memcmp(solv_chksum_get(chk, 0), zck->hdr + (lead_size - zck->hdr_chk_len), zck->hdr_chk_len) != 0)
284 	{
285 	  solv_chksum_free(chk, 0);
286 	  return open_error(zck);
287 	}
288       solv_chksum_free(chk, 0);
289     }
290 
291   /* parse preface: data chksum, flags, compression */
292   p = zck->hdr + lead_size;
293   if (p + zck->hdr_chk_len > zck->hdr_end)
294     return open_error(zck);
295   zck->data_chk_ptr = p;
296   p += zck->hdr_chk_len;
297 #ifdef VERIFY_DATA_CHKSUM
298   if (zck->hdr_chk_id && (zck->data_chk = solv_chksum_create(zck->hdr_chk_id)) == 0)
299     return open_error(zck);
300 #endif
301   if ((p = getuint(p, zck->hdr_end, &zck->flags)) == 0)
302     return open_error(zck);
303   if ((zck->flags & ~(3)) != 0)
304     return open_error(zck);
305   if ((p = getuint(p, zck->hdr_end, &zck->comp)) == 0 || (zck->comp != 0 && zck->comp != 2))
306     return open_error(zck);	/* only uncompressed + zstd supported */
307   /* skip all optional elements if present */
308   if ((zck->flags & 2) != 0)
309     {
310       unsigned int nopt, lopt;
311       if ((p = getuint(p, zck->hdr_end, &nopt)) == 0)
312         return open_error(zck);
313       for (; nopt != 0; nopt--)
314 	{
315 	  if ((p = getuint(p, zck->hdr_end, &lopt)) == 0)
316             return open_error(zck);
317 	  if ((p = getuint(p, zck->hdr_end, &lopt)) == 0)
318             return open_error(zck);
319 	  if (p + lopt > zck->hdr_end)
320 	    return open_error(zck);
321 	  p += lopt;
322 	}
323     }
324 
325   preface_size = p - (zck->hdr + lead_size);
326 
327   /* parse index: index size, index chksum type, num chunks, chunk data  */
328   if ((p = getuint(p, zck->hdr_end, &index_size)) == 0)
329     return open_error(zck);
330   if (hdr_size < preface_size + index_size)
331     return open_error(zck);
332   if ((p = getchksum(p, zck->hdr_end, &zck->chunk_chk_type, &zck->chunk_chk_len, &zck->chunk_chk_id)) == 0)
333     return open_error(zck);
334   if ((p = getuint(p, zck->hdr_end, &zck->nchunks)) == 0 || zck->nchunks > MAX_CHUNK_CNT)
335     return open_error(zck);
336 
337   /* setup decompressor */
338   if (zck->comp == 2)
339     {
340       if ((zck->dctx = ZSTD_createDCtx()) == 0)
341 	return open_error(zck);
342     }
343 
344   zck->fp = fp;
345   zck->chunks = p;
346   zck->streamid = streamid;
347   if (streamid == 0)
348     {
349       zck->nchunks = zck->nchunks ? 1 : 0;	/* limit to dict chunk */
350       return zck;
351     }
352 
353   /* setup dictionary */
354   if (!nextchunk(zck, 0))
355     {
356       zck->fp = 0;
357       return open_error(zck);
358     }
359   if (zck->comp == 2 && zck->buf_avail)
360     {
361       if ((zck->ddict = ZSTD_createDDict(zck->buf, zck->buf_avail)) == 0)
362 	{
363 	  zck->fp = 0;
364 	  return open_error(zck);
365 	}
366     }
367   zck->buf = solv_free(zck->buf);
368   zck->buf_used = 0;
369   zck->buf_avail = 0;
370 
371   /* ready to read the rest of the chunks */
372   return zck;
373 }
374 
375 ssize_t
solv_zchunk_read(struct solv_zchunk * zck,char * buf,size_t len)376 solv_zchunk_read(struct solv_zchunk *zck, char *buf, size_t len)
377 {
378   size_t n = 0;
379   if (!zck || zck->eof == 2)
380     return -1;
381   while (n < len && !zck->eof)
382     {
383       unsigned int bite;
384       while (!zck->buf_avail)
385 	{
386 	  if (!zck->nchunks)
387 	    {
388 	      /* verify data checksum if requested */
389 	      if (zck->streamid != 0 && zck->data_chk && memcmp(solv_chksum_get(zck->data_chk, 0), zck->data_chk_ptr, zck->hdr_chk_len) != 0) {
390 	        zck->eof = 2;
391 	        return -1;
392 	      }
393 	      zck->eof = 1;
394 	      return n;
395 	    }
396 	  if (!nextchunk(zck, zck->streamid))
397 	    {
398 	      zck->eof = 2;
399 	      return -1;
400 	    }
401 	}
402       bite = len - n > zck->buf_avail ? zck->buf_avail : len - n;
403       memcpy(buf + n, zck->buf + zck->buf_used, bite);
404       n += bite;
405       zck->buf_used += bite;
406       zck->buf_avail -= bite;
407     }
408   return n;
409 }
410 
411 int
solv_zchunk_close(struct solv_zchunk * zck)412 solv_zchunk_close(struct solv_zchunk *zck)
413 {
414   if (zck->data_chk)
415     solv_chksum_free(zck->data_chk, 0);
416   if (zck->ddict)
417     ZSTD_freeDDict(zck->ddict);
418   if (zck->dctx)
419     ZSTD_freeDCtx(zck->dctx);
420   solv_free(zck->hdr);
421   solv_free(zck->buf);
422   if (zck->fp)
423     fclose(zck->fp);
424   solv_free(zck);
425   return 0;
426 }
427