1 /*
2 * Zlib (RFC1950 / RFC1951) compression for PuTTY.
3 *
4 * There will no doubt be criticism of my decision to reimplement
5 * Zlib compression from scratch instead of using the existing zlib
6 * code. People will cry `reinventing the wheel'; they'll claim
7 * that the `fundamental basis of OSS' is code reuse; they'll want
8 * to see a really good reason for me having chosen not to use the
9 * existing code.
10 *
11 * Well, here are my reasons. Firstly, I don't want to link the
12 * whole of zlib into the PuTTY binary; PuTTY is justifiably proud
13 * of its small size and I think zlib contains a lot of unnecessary
14 * baggage for the kind of compression that SSH requires.
15 *
16 * Secondly, I also don't like the alternative of using zlib.dll.
17 * Another thing PuTTY is justifiably proud of is its ease of
18 * installation, and the last thing I want to do is to start
19 * mandating DLLs. Not only that, but there are two _kinds_ of
20 * zlib.dll kicking around, one with C calling conventions on the
21 * exported functions and another with WINAPI conventions, and
22 * there would be a significant danger of getting the wrong one.
23 *
24 * Thirdly, there seems to be a difference of opinion on the IETF
25 * secsh mailing list about the correct way to round off a
26 * compressed packet and start the next. In particular, there's
27 * some talk of switching to a mechanism zlib isn't currently
28 * capable of supporting (see below for an explanation). Given that
29 * sort of uncertainty, I thought it might be better to have code
30 * that will support even the zlib-incompatible worst case.
31 *
32 * Fourthly, it's a _second implementation_. Second implementations
33 * are fundamentally a Good Thing in standardisation efforts. The
34 * difference of opinion mentioned above has arisen _precisely_
35 * because there has been only one zlib implementation and
36 * everybody has used it. I don't intend that this should happen
37 * again.
38 */
39
40 #include <stdlib.h>
41 #include <string.h>
42 #include <assert.h>
43
44 #include "defs.h"
45 #include "ssh.h"
46
47 /* ----------------------------------------------------------------------
48 * Basic LZ77 code. This bit is designed modularly, so it could be
49 * ripped out and used in a different LZ77 compressor. Go to it,
50 * and good luck :-)
51 */
52
53 struct LZ77InternalContext;
54 struct LZ77Context {
55 struct LZ77InternalContext *ictx;
56 void *userdata;
57 void (*literal) (struct LZ77Context * ctx, unsigned char c);
58 void (*match) (struct LZ77Context * ctx, int distance, int len);
59 };
60
61 /*
62 * Initialise the private fields of an LZ77Context. It's up to the
63 * user to initialise the public fields.
64 */
65 static int lz77_init(struct LZ77Context *ctx);
66
67 /*
68 * Supply data to be compressed. Will update the private fields of
69 * the LZ77Context, and will call literal() and match() to output.
70 * If `compress' is false, it will never emit a match, but will
71 * instead call literal() for everything.
72 */
73 static void lz77_compress(struct LZ77Context *ctx,
74 const unsigned char *data, int len);
75
76 /*
77 * Modifiable parameters.
78 */
79 #define WINSIZE 32768 /* window size. Must be power of 2! */
80 #define HASHMAX 2039 /* one more than max hash value */
81 #define MAXMATCH 32 /* how many matches we track */
82 #define HASHCHARS 3 /* how many chars make a hash */
83
84 /*
85 * This compressor takes a less slapdash approach than the
86 * gzip/zlib one. Rather than allowing our hash chains to fall into
87 * disuse near the far end, we keep them doubly linked so we can
88 * _find_ the far end, and then every time we add a new byte to the
89 * window (thus rolling round by one and removing the previous
90 * byte), we can carefully remove the hash chain entry.
91 */
92
93 #define INVALID -1 /* invalid hash _and_ invalid offset */
94 struct WindowEntry {
95 short next, prev; /* array indices within the window */
96 short hashval;
97 };
98
99 struct HashEntry {
100 short first; /* window index of first in chain */
101 };
102
103 struct Match {
104 int distance, len;
105 };
106
107 struct LZ77InternalContext {
108 struct WindowEntry win[WINSIZE];
109 unsigned char data[WINSIZE];
110 int winpos;
111 struct HashEntry hashtab[HASHMAX];
112 unsigned char pending[HASHCHARS];
113 int npending;
114 };
115
lz77_hash(const unsigned char * data)116 static int lz77_hash(const unsigned char *data)
117 {
118 return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX;
119 }
120
lz77_init(struct LZ77Context * ctx)121 static int lz77_init(struct LZ77Context *ctx)
122 {
123 struct LZ77InternalContext *st;
124 int i;
125
126 st = snew(struct LZ77InternalContext);
127 if (!st)
128 return 0;
129
130 ctx->ictx = st;
131
132 for (i = 0; i < WINSIZE; i++)
133 st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
134 for (i = 0; i < HASHMAX; i++)
135 st->hashtab[i].first = INVALID;
136 st->winpos = 0;
137
138 st->npending = 0;
139
140 return 1;
141 }
142
lz77_advance(struct LZ77InternalContext * st,unsigned char c,int hash)143 static void lz77_advance(struct LZ77InternalContext *st,
144 unsigned char c, int hash)
145 {
146 int off;
147
148 /*
149 * Remove the hash entry at winpos from the tail of its chain,
150 * or empty the chain if it's the only thing on the chain.
151 */
152 if (st->win[st->winpos].prev != INVALID) {
153 st->win[st->win[st->winpos].prev].next = INVALID;
154 } else if (st->win[st->winpos].hashval != INVALID) {
155 st->hashtab[st->win[st->winpos].hashval].first = INVALID;
156 }
157
158 /*
159 * Create a new entry at winpos and add it to the head of its
160 * hash chain.
161 */
162 st->win[st->winpos].hashval = hash;
163 st->win[st->winpos].prev = INVALID;
164 off = st->win[st->winpos].next = st->hashtab[hash].first;
165 st->hashtab[hash].first = st->winpos;
166 if (off != INVALID)
167 st->win[off].prev = st->winpos;
168 st->data[st->winpos] = c;
169
170 /*
171 * Advance the window pointer.
172 */
173 st->winpos = (st->winpos + 1) & (WINSIZE - 1);
174 }
175
176 #define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
177
lz77_compress(struct LZ77Context * ctx,const unsigned char * data,int len)178 static void lz77_compress(struct LZ77Context *ctx,
179 const unsigned char *data, int len)
180 {
181 struct LZ77InternalContext *st = ctx->ictx;
182 int i, distance, off, nmatch, matchlen, advance;
183 struct Match defermatch, matches[MAXMATCH];
184 int deferchr;
185
186 assert(st->npending <= HASHCHARS);
187
188 /*
189 * Add any pending characters from last time to the window. (We
190 * might not be able to.)
191 *
192 * This leaves st->pending empty in the usual case (when len >=
193 * HASHCHARS); otherwise it leaves st->pending empty enough that
194 * adding all the remaining 'len' characters will not push it past
195 * HASHCHARS in size.
196 */
197 for (i = 0; i < st->npending; i++) {
198 unsigned char foo[HASHCHARS];
199 int j;
200 if (len + st->npending - i < HASHCHARS) {
201 /* Update the pending array. */
202 for (j = i; j < st->npending; j++)
203 st->pending[j - i] = st->pending[j];
204 break;
205 }
206 for (j = 0; j < HASHCHARS; j++)
207 foo[j] = (i + j < st->npending ? st->pending[i + j] :
208 data[i + j - st->npending]);
209 lz77_advance(st, foo[0], lz77_hash(foo));
210 }
211 st->npending -= i;
212
213 defermatch.distance = 0; /* appease compiler */
214 defermatch.len = 0;
215 deferchr = '\0';
216 while (len > 0) {
217
218 if (len >= HASHCHARS) {
219 /*
220 * Hash the next few characters.
221 */
222 int hash = lz77_hash(data);
223
224 /*
225 * Look the hash up in the corresponding hash chain and see
226 * what we can find.
227 */
228 nmatch = 0;
229 for (off = st->hashtab[hash].first;
230 off != INVALID; off = st->win[off].next) {
231 /* distance = 1 if off == st->winpos-1 */
232 /* distance = WINSIZE if off == st->winpos */
233 distance =
234 WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
235 for (i = 0; i < HASHCHARS; i++)
236 if (CHARAT(i) != CHARAT(i - distance))
237 break;
238 if (i == HASHCHARS) {
239 matches[nmatch].distance = distance;
240 matches[nmatch].len = 3;
241 if (++nmatch >= MAXMATCH)
242 break;
243 }
244 }
245 } else {
246 nmatch = 0;
247 }
248
249 if (nmatch > 0) {
250 /*
251 * We've now filled up matches[] with nmatch potential
252 * matches. Follow them down to find the longest. (We
253 * assume here that it's always worth favouring a
254 * longer match over a shorter one.)
255 */
256 matchlen = HASHCHARS;
257 while (matchlen < len) {
258 int j;
259 for (i = j = 0; i < nmatch; i++) {
260 if (CHARAT(matchlen) ==
261 CHARAT(matchlen - matches[i].distance)) {
262 matches[j++] = matches[i];
263 }
264 }
265 if (j == 0)
266 break;
267 matchlen++;
268 nmatch = j;
269 }
270
271 /*
272 * We've now got all the longest matches. We favour the
273 * shorter distances, which means we go with matches[0].
274 * So see if we want to defer it or throw it away.
275 */
276 matches[0].len = matchlen;
277 if (defermatch.len > 0) {
278 if (matches[0].len > defermatch.len + 1) {
279 /* We have a better match. Emit the deferred char,
280 * and defer this match. */
281 ctx->literal(ctx, (unsigned char) deferchr);
282 defermatch = matches[0];
283 deferchr = data[0];
284 advance = 1;
285 } else {
286 /* We don't have a better match. Do the deferred one. */
287 ctx->match(ctx, defermatch.distance, defermatch.len);
288 advance = defermatch.len - 1;
289 defermatch.len = 0;
290 }
291 } else {
292 /* There was no deferred match. Defer this one. */
293 defermatch = matches[0];
294 deferchr = data[0];
295 advance = 1;
296 }
297 } else {
298 /*
299 * We found no matches. Emit the deferred match, if
300 * any; otherwise emit a literal.
301 */
302 if (defermatch.len > 0) {
303 ctx->match(ctx, defermatch.distance, defermatch.len);
304 advance = defermatch.len - 1;
305 defermatch.len = 0;
306 } else {
307 ctx->literal(ctx, data[0]);
308 advance = 1;
309 }
310 }
311
312 /*
313 * Now advance the position by `advance' characters,
314 * keeping the window and hash chains consistent.
315 */
316 while (advance > 0) {
317 if (len >= HASHCHARS) {
318 lz77_advance(st, *data, lz77_hash(data));
319 } else {
320 assert(st->npending < HASHCHARS);
321 st->pending[st->npending++] = *data;
322 }
323 data++;
324 len--;
325 advance--;
326 }
327 }
328 }
329
330 /* ----------------------------------------------------------------------
331 * Zlib compression. We always use the static Huffman tree option.
332 * Mostly this is because it's hard to scan a block in advance to
333 * work out better trees; dynamic trees are great when you're
334 * compressing a large file under no significant time constraint,
335 * but when you're compressing little bits in real time, things get
336 * hairier.
337 *
338 * I suppose it's possible that I could compute Huffman trees based
339 * on the frequencies in the _previous_ block, as a sort of
340 * heuristic, but I'm not confident that the gain would balance out
341 * having to transmit the trees.
342 */
343
344 struct Outbuf {
345 strbuf *outbuf;
346 unsigned long outbits;
347 int noutbits;
348 bool firstblock;
349 };
350
outbits(struct Outbuf * out,unsigned long bits,int nbits)351 static void outbits(struct Outbuf *out, unsigned long bits, int nbits)
352 {
353 assert(out->noutbits + nbits <= 32);
354 out->outbits |= bits << out->noutbits;
355 out->noutbits += nbits;
356 while (out->noutbits >= 8) {
357 put_byte(out->outbuf, out->outbits & 0xFF);
358 out->outbits >>= 8;
359 out->noutbits -= 8;
360 }
361 }
362
363 static const unsigned char mirrorbytes[256] = {
364 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
365 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
366 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
367 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
368 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
369 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
370 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
371 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
372 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
373 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
374 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
375 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
376 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
377 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
378 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
379 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
380 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
381 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
382 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
383 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
384 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
385 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
386 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
387 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
388 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
389 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
390 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
391 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
392 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
393 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
394 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
395 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff,
396 };
397
398 typedef struct {
399 short code, extrabits;
400 int min, max;
401 } coderecord;
402
403 static const coderecord lencodes[] = {
404 {257, 0, 3, 3},
405 {258, 0, 4, 4},
406 {259, 0, 5, 5},
407 {260, 0, 6, 6},
408 {261, 0, 7, 7},
409 {262, 0, 8, 8},
410 {263, 0, 9, 9},
411 {264, 0, 10, 10},
412 {265, 1, 11, 12},
413 {266, 1, 13, 14},
414 {267, 1, 15, 16},
415 {268, 1, 17, 18},
416 {269, 2, 19, 22},
417 {270, 2, 23, 26},
418 {271, 2, 27, 30},
419 {272, 2, 31, 34},
420 {273, 3, 35, 42},
421 {274, 3, 43, 50},
422 {275, 3, 51, 58},
423 {276, 3, 59, 66},
424 {277, 4, 67, 82},
425 {278, 4, 83, 98},
426 {279, 4, 99, 114},
427 {280, 4, 115, 130},
428 {281, 5, 131, 162},
429 {282, 5, 163, 194},
430 {283, 5, 195, 226},
431 {284, 5, 227, 257},
432 {285, 0, 258, 258},
433 };
434
435 static const coderecord distcodes[] = {
436 {0, 0, 1, 1},
437 {1, 0, 2, 2},
438 {2, 0, 3, 3},
439 {3, 0, 4, 4},
440 {4, 1, 5, 6},
441 {5, 1, 7, 8},
442 {6, 2, 9, 12},
443 {7, 2, 13, 16},
444 {8, 3, 17, 24},
445 {9, 3, 25, 32},
446 {10, 4, 33, 48},
447 {11, 4, 49, 64},
448 {12, 5, 65, 96},
449 {13, 5, 97, 128},
450 {14, 6, 129, 192},
451 {15, 6, 193, 256},
452 {16, 7, 257, 384},
453 {17, 7, 385, 512},
454 {18, 8, 513, 768},
455 {19, 8, 769, 1024},
456 {20, 9, 1025, 1536},
457 {21, 9, 1537, 2048},
458 {22, 10, 2049, 3072},
459 {23, 10, 3073, 4096},
460 {24, 11, 4097, 6144},
461 {25, 11, 6145, 8192},
462 {26, 12, 8193, 12288},
463 {27, 12, 12289, 16384},
464 {28, 13, 16385, 24576},
465 {29, 13, 24577, 32768},
466 };
467
zlib_literal(struct LZ77Context * ectx,unsigned char c)468 static void zlib_literal(struct LZ77Context *ectx, unsigned char c)
469 {
470 struct Outbuf *out = (struct Outbuf *) ectx->userdata;
471
472 if (c <= 143) {
473 /* 0 through 143 are 8 bits long starting at 00110000. */
474 outbits(out, mirrorbytes[0x30 + c], 8);
475 } else {
476 /* 144 through 255 are 9 bits long starting at 110010000. */
477 outbits(out, 1 + 2 * mirrorbytes[0x90 - 144 + c], 9);
478 }
479 }
480
zlib_match(struct LZ77Context * ectx,int distance,int len)481 static void zlib_match(struct LZ77Context *ectx, int distance, int len)
482 {
483 const coderecord *d, *l;
484 int i, j, k;
485 struct Outbuf *out = (struct Outbuf *) ectx->userdata;
486
487 while (len > 0) {
488 int thislen;
489
490 /*
491 * We can transmit matches of lengths 3 through 258
492 * inclusive. So if len exceeds 258, we must transmit in
493 * several steps, with 258 or less in each step.
494 *
495 * Specifically: if len >= 261, we can transmit 258 and be
496 * sure of having at least 3 left for the next step. And if
497 * len <= 258, we can just transmit len. But if len == 259
498 * or 260, we must transmit len-3.
499 */
500 thislen = (len > 260 ? 258 : len <= 258 ? len : len - 3);
501 len -= thislen;
502
503 /*
504 * Binary-search to find which length code we're
505 * transmitting.
506 */
507 i = -1;
508 j = lenof(lencodes);
509 while (1) {
510 assert(j - i >= 2);
511 k = (j + i) / 2;
512 if (thislen < lencodes[k].min)
513 j = k;
514 else if (thislen > lencodes[k].max)
515 i = k;
516 else {
517 l = &lencodes[k];
518 break; /* found it! */
519 }
520 }
521
522 /*
523 * Transmit the length code. 256-279 are seven bits
524 * starting at 0000000; 280-287 are eight bits starting at
525 * 11000000.
526 */
527 if (l->code <= 279) {
528 outbits(out, mirrorbytes[(l->code - 256) * 2], 7);
529 } else {
530 outbits(out, mirrorbytes[0xc0 - 280 + l->code], 8);
531 }
532
533 /*
534 * Transmit the extra bits.
535 */
536 if (l->extrabits)
537 outbits(out, thislen - l->min, l->extrabits);
538
539 /*
540 * Binary-search to find which distance code we're
541 * transmitting.
542 */
543 i = -1;
544 j = lenof(distcodes);
545 while (1) {
546 assert(j - i >= 2);
547 k = (j + i) / 2;
548 if (distance < distcodes[k].min)
549 j = k;
550 else if (distance > distcodes[k].max)
551 i = k;
552 else {
553 d = &distcodes[k];
554 break; /* found it! */
555 }
556 }
557
558 /*
559 * Transmit the distance code. Five bits starting at 00000.
560 */
561 outbits(out, mirrorbytes[d->code * 8], 5);
562
563 /*
564 * Transmit the extra bits.
565 */
566 if (d->extrabits)
567 outbits(out, distance - d->min, d->extrabits);
568 }
569 }
570
571 struct ssh_zlib_compressor {
572 struct LZ77Context ectx;
573 ssh_compressor sc;
574 };
575
zlib_compress_init(void)576 ssh_compressor *zlib_compress_init(void)
577 {
578 struct Outbuf *out;
579 struct ssh_zlib_compressor *comp = snew(struct ssh_zlib_compressor);
580
581 lz77_init(&comp->ectx);
582 comp->sc.vt = &ssh_zlib;
583 comp->ectx.literal = zlib_literal;
584 comp->ectx.match = zlib_match;
585
586 out = snew(struct Outbuf);
587 out->outbuf = NULL;
588 out->outbits = out->noutbits = 0;
589 out->firstblock = true;
590 comp->ectx.userdata = out;
591
592 return &comp->sc;
593 }
594
zlib_compress_cleanup(ssh_compressor * sc)595 void zlib_compress_cleanup(ssh_compressor *sc)
596 {
597 struct ssh_zlib_compressor *comp =
598 container_of(sc, struct ssh_zlib_compressor, sc);
599 struct Outbuf *out = (struct Outbuf *)comp->ectx.userdata;
600 if (out->outbuf)
601 strbuf_free(out->outbuf);
602 sfree(out);
603 sfree(comp->ectx.ictx);
604 sfree(comp);
605 }
606
zlib_compress_block(ssh_compressor * sc,const unsigned char * block,int len,unsigned char ** outblock,int * outlen,int minlen)607 void zlib_compress_block(ssh_compressor *sc,
608 const unsigned char *block, int len,
609 unsigned char **outblock, int *outlen,
610 int minlen)
611 {
612 struct ssh_zlib_compressor *comp =
613 container_of(sc, struct ssh_zlib_compressor, sc);
614 struct Outbuf *out = (struct Outbuf *) comp->ectx.userdata;
615 bool in_block;
616
617 assert(!out->outbuf);
618 out->outbuf = strbuf_new_nm();
619
620 /*
621 * If this is the first block, output the Zlib (RFC1950) header
622 * bytes 78 9C. (Deflate compression, 32K window size, default
623 * algorithm.)
624 */
625 if (out->firstblock) {
626 outbits(out, 0x9C78, 16);
627 out->firstblock = false;
628
629 in_block = false;
630 } else
631 in_block = true;
632
633 if (!in_block) {
634 /*
635 * Start a Deflate (RFC1951) fixed-trees block. We
636 * transmit a zero bit (BFINAL=0), followed by a zero
637 * bit and a one bit (BTYPE=01). Of course these are in
638 * the wrong order (01 0).
639 */
640 outbits(out, 2, 3);
641 }
642
643 /*
644 * Do the compression.
645 */
646 lz77_compress(&comp->ectx, block, len);
647
648 /*
649 * End the block (by transmitting code 256, which is
650 * 0000000 in fixed-tree mode), and transmit some empty
651 * blocks to ensure we have emitted the byte containing the
652 * last piece of genuine data. There are three ways we can
653 * do this:
654 *
655 * - Minimal flush. Output end-of-block and then open a
656 * new static block. This takes 9 bits, which is
657 * guaranteed to flush out the last genuine code in the
658 * closed block; but allegedly zlib can't handle it.
659 *
660 * - Zlib partial flush. Output EOB, open and close an
661 * empty static block, and _then_ open the new block.
662 * This is the best zlib can handle.
663 *
664 * - Zlib sync flush. Output EOB, then an empty
665 * _uncompressed_ block (000, then sync to byte
666 * boundary, then send bytes 00 00 FF FF). Then open the
667 * new block.
668 *
669 * For the moment, we will use Zlib partial flush.
670 */
671 outbits(out, 0, 7); /* close block */
672 outbits(out, 2, 3 + 7); /* empty static block */
673 outbits(out, 2, 3); /* open new block */
674
675 /*
676 * If we've been asked to pad out the compressed data until it's
677 * at least a given length, do so by emitting further empty static
678 * blocks.
679 */
680 while (out->outbuf->len < minlen) {
681 outbits(out, 0, 7); /* close block */
682 outbits(out, 2, 3); /* open new static block */
683 }
684
685 *outlen = out->outbuf->len;
686 *outblock = (unsigned char *)strbuf_to_str(out->outbuf);
687 out->outbuf = NULL;
688 }
689
690 /* ----------------------------------------------------------------------
691 * Zlib decompression. Of course, even though our compressor always
692 * uses static trees, our _decompressor_ has to be capable of
693 * handling dynamic trees if it sees them.
694 */
695
696 /*
697 * The way we work the Huffman decode is to have a table lookup on
698 * the first N bits of the input stream (in the order they arrive,
699 * of course, i.e. the first bit of the Huffman code is in bit 0).
700 * Each table entry lists the number of bits to consume, plus
701 * either an output code or a pointer to a secondary table.
702 */
703 struct zlib_table;
704 struct zlib_tableentry;
705
706 struct zlib_tableentry {
707 unsigned char nbits;
708 short code;
709 struct zlib_table *nexttable;
710 };
711
712 struct zlib_table {
713 int mask; /* mask applied to input bit stream */
714 struct zlib_tableentry *table;
715 };
716
717 #define MAXCODELEN 16
718 #define MAXSYMS 288
719
720 /*
721 * Build a single-level decode table for elements
722 * [minlength,maxlength) of the provided code/length tables, and
723 * recurse to build subtables.
724 */
zlib_mkonetab(int * codes,unsigned char * lengths,int nsyms,int pfx,int pfxbits,int bits)725 static struct zlib_table *zlib_mkonetab(int *codes, unsigned char *lengths,
726 int nsyms,
727 int pfx, int pfxbits, int bits)
728 {
729 struct zlib_table *tab = snew(struct zlib_table);
730 int pfxmask = (1 << pfxbits) - 1;
731 int nbits, i, j, code;
732
733 tab->table = snewn((size_t)1 << bits, struct zlib_tableentry);
734 tab->mask = (1 << bits) - 1;
735
736 for (code = 0; code <= tab->mask; code++) {
737 tab->table[code].code = -1;
738 tab->table[code].nbits = 0;
739 tab->table[code].nexttable = NULL;
740 }
741
742 for (i = 0; i < nsyms; i++) {
743 if (lengths[i] <= pfxbits || (codes[i] & pfxmask) != pfx)
744 continue;
745 code = (codes[i] >> pfxbits) & tab->mask;
746 for (j = code; j <= tab->mask; j += 1 << (lengths[i] - pfxbits)) {
747 tab->table[j].code = i;
748 nbits = lengths[i] - pfxbits;
749 if (tab->table[j].nbits < nbits)
750 tab->table[j].nbits = nbits;
751 }
752 }
753 for (code = 0; code <= tab->mask; code++) {
754 if (tab->table[code].nbits <= bits)
755 continue;
756 /* Generate a subtable. */
757 tab->table[code].code = -1;
758 nbits = tab->table[code].nbits - bits;
759 if (nbits > 7)
760 nbits = 7;
761 tab->table[code].nbits = bits;
762 tab->table[code].nexttable = zlib_mkonetab(codes, lengths, nsyms,
763 pfx | (code << pfxbits),
764 pfxbits + bits, nbits);
765 }
766
767 return tab;
768 }
769
770 /*
771 * Build a decode table, given a set of Huffman tree lengths.
772 */
zlib_mktable(unsigned char * lengths,int nlengths)773 static struct zlib_table *zlib_mktable(unsigned char *lengths,
774 int nlengths)
775 {
776 int count[MAXCODELEN], startcode[MAXCODELEN], codes[MAXSYMS];
777 int code, maxlen;
778 int i, j;
779
780 /* Count the codes of each length. */
781 maxlen = 0;
782 for (i = 1; i < MAXCODELEN; i++)
783 count[i] = 0;
784 for (i = 0; i < nlengths; i++) {
785 count[lengths[i]]++;
786 if (maxlen < lengths[i])
787 maxlen = lengths[i];
788 }
789 /* Determine the starting code for each length block. */
790 code = 0;
791 for (i = 1; i < MAXCODELEN; i++) {
792 startcode[i] = code;
793 code += count[i];
794 code <<= 1;
795 }
796 /* Determine the code for each symbol. Mirrored, of course. */
797 for (i = 0; i < nlengths; i++) {
798 code = startcode[lengths[i]]++;
799 codes[i] = 0;
800 for (j = 0; j < lengths[i]; j++) {
801 codes[i] = (codes[i] << 1) | (code & 1);
802 code >>= 1;
803 }
804 }
805
806 /*
807 * Now we have the complete list of Huffman codes. Build a
808 * table.
809 */
810 return zlib_mkonetab(codes, lengths, nlengths, 0, 0,
811 maxlen < 9 ? maxlen : 9);
812 }
813
zlib_freetable(struct zlib_table ** ztab)814 static int zlib_freetable(struct zlib_table **ztab)
815 {
816 struct zlib_table *tab;
817 int code;
818
819 if (ztab == NULL)
820 return -1;
821
822 if (*ztab == NULL)
823 return 0;
824
825 tab = *ztab;
826
827 for (code = 0; code <= tab->mask; code++)
828 if (tab->table[code].nexttable != NULL)
829 zlib_freetable(&tab->table[code].nexttable);
830
831 sfree(tab->table);
832 tab->table = NULL;
833
834 sfree(tab);
835 *ztab = NULL;
836
837 return (0);
838 }
839
840 struct zlib_decompress_ctx {
841 struct zlib_table *staticlentable, *staticdisttable;
842 struct zlib_table *currlentable, *currdisttable, *lenlentable;
843 enum {
844 START, OUTSIDEBLK,
845 TREES_HDR, TREES_LENLEN, TREES_LEN, TREES_LENREP,
846 INBLK, GOTLENSYM, GOTLEN, GOTDISTSYM,
847 UNCOMP_LEN, UNCOMP_NLEN, UNCOMP_DATA
848 } state;
849 int sym, hlit, hdist, hclen, lenptr, lenextrabits, lenaddon, len,
850 lenrep;
851 int uncomplen;
852 unsigned char lenlen[19];
853
854 /*
855 * Array that accumulates the code lengths sent in the header of a
856 * dynamic-Huffman-tree block.
857 *
858 * There are 286 actual symbols in the literal/length alphabet
859 * (256 literals plus 20 length categories), and 30 symbols in the
860 * distance alphabet. However, the block header transmits the
861 * number of code lengths for the former alphabet as a 5-bit value
862 * HLIT to be added to 257, and the latter as a 5-bit value HDIST
863 * to be added to 1. This means that the number of _code lengths_
864 * can go as high as 288 for the symbol alphabet and 32 for the
865 * distance alphabet - each of those values being 2 more than the
866 * maximum number of actual symbols.
867 *
868 * It's tempting to rule that sending out-of-range HLIT or HDIST
869 * is therefore just illegal, and to fault it when we initially
870 * receive that header. But instead I've chosen to permit the
871 * Huffman-code definition to include code length entries for
872 * those unused symbols; if a header of that form is transmitted,
873 * then the effect will be that in the main body of the block,
874 * some bit sequence(s) will generate an illegal symbol number,
875 * and _that_ will be faulted as a decoding error.
876 *
877 * Rationale: this can already happen! The standard Huffman code
878 * used in a _static_ block for the literal/length alphabet is
879 * defined in such a way that it includes codes for symbols 287
880 * and 288, which are then never actually sent in the body of the
881 * block. And I think that if the standard static tree definition
882 * is willing to include Huffman codes that don't correspond to a
883 * symbol, then it's an excessive restriction on dynamic tables
884 * not to permit them to do the same. In particular, it would be
885 * strange for a dynamic block not to be able to exactly mimic
886 * either or both of the Huffman codes used by a static block for
887 * the corresponding alphabet.
888 *
889 * So we place no constraint on HLIT or HDIST during code
890 * construction, and we make this array large enough to include
891 * the maximum number of code lengths that can possibly arise as a
892 * result. It's only trying to _use_ the junk Huffman codes after
893 * table construction is completed that will provoke a decode
894 * error.
895 */
896 unsigned char lengths[288 + 32];
897
898 unsigned long bits;
899 int nbits;
900 unsigned char window[WINSIZE];
901 int winpos;
902 strbuf *outblk;
903
904 ssh_decompressor dc;
905 };
906
zlib_decompress_init(void)907 ssh_decompressor *zlib_decompress_init(void)
908 {
909 struct zlib_decompress_ctx *dctx = snew(struct zlib_decompress_ctx);
910 unsigned char lengths[288];
911
912 memset(lengths, 8, 144);
913 memset(lengths + 144, 9, 256 - 144);
914 memset(lengths + 256, 7, 280 - 256);
915 memset(lengths + 280, 8, 288 - 280);
916 dctx->staticlentable = zlib_mktable(lengths, 288);
917 memset(lengths, 5, 32);
918 dctx->staticdisttable = zlib_mktable(lengths, 32);
919 dctx->state = START; /* even before header */
920 dctx->currlentable = dctx->currdisttable = dctx->lenlentable = NULL;
921 dctx->bits = 0;
922 dctx->nbits = 0;
923 dctx->winpos = 0;
924 dctx->outblk = NULL;
925
926 dctx->dc.vt = &ssh_zlib;
927 return &dctx->dc;
928 }
929
zlib_decompress_cleanup(ssh_decompressor * dc)930 void zlib_decompress_cleanup(ssh_decompressor *dc)
931 {
932 struct zlib_decompress_ctx *dctx =
933 container_of(dc, struct zlib_decompress_ctx, dc);
934
935 if (dctx->currlentable && dctx->currlentable != dctx->staticlentable)
936 zlib_freetable(&dctx->currlentable);
937 if (dctx->currdisttable && dctx->currdisttable != dctx->staticdisttable)
938 zlib_freetable(&dctx->currdisttable);
939 if (dctx->lenlentable)
940 zlib_freetable(&dctx->lenlentable);
941 zlib_freetable(&dctx->staticlentable);
942 zlib_freetable(&dctx->staticdisttable);
943 if (dctx->outblk)
944 strbuf_free(dctx->outblk);
945 sfree(dctx);
946 }
947
zlib_huflookup(unsigned long * bitsp,int * nbitsp,struct zlib_table * tab)948 static int zlib_huflookup(unsigned long *bitsp, int *nbitsp,
949 struct zlib_table *tab)
950 {
951 unsigned long bits = *bitsp;
952 int nbits = *nbitsp;
953 while (1) {
954 struct zlib_tableentry *ent;
955 ent = &tab->table[bits & tab->mask];
956 if (ent->nbits > nbits)
957 return -1; /* not enough data */
958 bits >>= ent->nbits;
959 nbits -= ent->nbits;
960 if (ent->code == -1)
961 tab = ent->nexttable;
962 else {
963 *bitsp = bits;
964 *nbitsp = nbits;
965 return ent->code;
966 }
967
968 if (!tab) {
969 /*
970 * There was a missing entry in the table, presumably
971 * due to an invalid Huffman table description, and the
972 * subsequent data has attempted to use the missing
973 * entry. Return a decoding failure.
974 */
975 return -2;
976 }
977 }
978 }
979
zlib_emit_char(struct zlib_decompress_ctx * dctx,int c)980 static void zlib_emit_char(struct zlib_decompress_ctx *dctx, int c)
981 {
982 dctx->window[dctx->winpos] = c;
983 dctx->winpos = (dctx->winpos + 1) & (WINSIZE - 1);
984 put_byte(dctx->outblk, c);
985 }
986
987 #define EATBITS(n) ( dctx->nbits -= (n), dctx->bits >>= (n) )
988
zlib_decompress_block(ssh_decompressor * dc,const unsigned char * block,int len,unsigned char ** outblock,int * outlen)989 bool zlib_decompress_block(ssh_decompressor *dc,
990 const unsigned char *block, int len,
991 unsigned char **outblock, int *outlen)
992 {
993 struct zlib_decompress_ctx *dctx =
994 container_of(dc, struct zlib_decompress_ctx, dc);
995 const coderecord *rec;
996 int code, blktype, rep, dist, nlen, header;
997 static const unsigned char lenlenmap[] = {
998 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
999 };
1000
1001 assert(!dctx->outblk);
1002 dctx->outblk = strbuf_new_nm();
1003
1004 while (len > 0 || dctx->nbits > 0) {
1005 while (dctx->nbits < 24 && len > 0) {
1006 dctx->bits |= (*block++) << dctx->nbits;
1007 dctx->nbits += 8;
1008 len--;
1009 }
1010 switch (dctx->state) {
1011 case START:
1012 /* Expect 16-bit zlib header. */
1013 if (dctx->nbits < 16)
1014 goto finished; /* done all we can */
1015
1016 /*
1017 * The header is stored as a big-endian 16-bit integer,
1018 * in contrast to the general little-endian policy in
1019 * the rest of the format :-(
1020 */
1021 header = (((dctx->bits & 0xFF00) >> 8) |
1022 ((dctx->bits & 0x00FF) << 8));
1023 EATBITS(16);
1024
1025 /*
1026 * Check the header:
1027 *
1028 * - bits 8-11 should be 1000 (Deflate/RFC1951)
1029 * - bits 12-15 should be at most 0111 (window size)
1030 * - bit 5 should be zero (no dictionary present)
1031 * - we don't care about bits 6-7 (compression rate)
1032 * - bits 0-4 should be set up to make the whole thing
1033 * a multiple of 31 (checksum).
1034 */
1035 if ((header & 0x0F00) != 0x0800 ||
1036 (header & 0xF000) > 0x7000 ||
1037 (header & 0x0020) != 0x0000 ||
1038 (header % 31) != 0)
1039 goto decode_error;
1040
1041 dctx->state = OUTSIDEBLK;
1042 break;
1043 case OUTSIDEBLK:
1044 /* Expect 3-bit block header. */
1045 if (dctx->nbits < 3)
1046 goto finished; /* done all we can */
1047 EATBITS(1);
1048 blktype = dctx->bits & 3;
1049 EATBITS(2);
1050 if (blktype == 0) {
1051 int to_eat = dctx->nbits & 7;
1052 dctx->state = UNCOMP_LEN;
1053 EATBITS(to_eat); /* align to byte boundary */
1054 } else if (blktype == 1) {
1055 dctx->currlentable = dctx->staticlentable;
1056 dctx->currdisttable = dctx->staticdisttable;
1057 dctx->state = INBLK;
1058 } else if (blktype == 2) {
1059 dctx->state = TREES_HDR;
1060 }
1061 break;
1062 case TREES_HDR:
1063 /*
1064 * Dynamic block header. Five bits of HLIT, five of
1065 * HDIST, four of HCLEN.
1066 */
1067 if (dctx->nbits < 5 + 5 + 4)
1068 goto finished; /* done all we can */
1069 dctx->hlit = 257 + (dctx->bits & 31);
1070 EATBITS(5);
1071 dctx->hdist = 1 + (dctx->bits & 31);
1072 EATBITS(5);
1073 dctx->hclen = 4 + (dctx->bits & 15);
1074 EATBITS(4);
1075 dctx->lenptr = 0;
1076 dctx->state = TREES_LENLEN;
1077 memset(dctx->lenlen, 0, sizeof(dctx->lenlen));
1078 break;
1079 case TREES_LENLEN:
1080 if (dctx->nbits < 3)
1081 goto finished;
1082 while (dctx->lenptr < dctx->hclen && dctx->nbits >= 3) {
1083 dctx->lenlen[lenlenmap[dctx->lenptr++]] =
1084 (unsigned char) (dctx->bits & 7);
1085 EATBITS(3);
1086 }
1087 if (dctx->lenptr == dctx->hclen) {
1088 dctx->lenlentable = zlib_mktable(dctx->lenlen, 19);
1089 dctx->state = TREES_LEN;
1090 dctx->lenptr = 0;
1091 }
1092 break;
1093 case TREES_LEN:
1094 if (dctx->lenptr >= dctx->hlit + dctx->hdist) {
1095 dctx->currlentable = zlib_mktable(dctx->lengths, dctx->hlit);
1096 dctx->currdisttable = zlib_mktable(dctx->lengths + dctx->hlit,
1097 dctx->hdist);
1098 zlib_freetable(&dctx->lenlentable);
1099 dctx->lenlentable = NULL;
1100 dctx->state = INBLK;
1101 break;
1102 }
1103 code =
1104 zlib_huflookup(&dctx->bits, &dctx->nbits, dctx->lenlentable);
1105 if (code == -1)
1106 goto finished;
1107 if (code == -2)
1108 goto decode_error;
1109 if (code < 16)
1110 dctx->lengths[dctx->lenptr++] = code;
1111 else {
1112 dctx->lenextrabits = (code == 16 ? 2 : code == 17 ? 3 : 7);
1113 dctx->lenaddon = (code == 18 ? 11 : 3);
1114 dctx->lenrep = (code == 16 && dctx->lenptr > 0 ?
1115 dctx->lengths[dctx->lenptr - 1] : 0);
1116 dctx->state = TREES_LENREP;
1117 }
1118 break;
1119 case TREES_LENREP:
1120 if (dctx->nbits < dctx->lenextrabits)
1121 goto finished;
1122 rep =
1123 dctx->lenaddon +
1124 (dctx->bits & ((1 << dctx->lenextrabits) - 1));
1125 EATBITS(dctx->lenextrabits);
1126 while (rep > 0 && dctx->lenptr < dctx->hlit + dctx->hdist) {
1127 dctx->lengths[dctx->lenptr] = dctx->lenrep;
1128 dctx->lenptr++;
1129 rep--;
1130 }
1131 dctx->state = TREES_LEN;
1132 break;
1133 case INBLK:
1134 code =
1135 zlib_huflookup(&dctx->bits, &dctx->nbits, dctx->currlentable);
1136 if (code == -1)
1137 goto finished;
1138 if (code == -2)
1139 goto decode_error;
1140 if (code < 256)
1141 zlib_emit_char(dctx, code);
1142 else if (code == 256) {
1143 dctx->state = OUTSIDEBLK;
1144 if (dctx->currlentable != dctx->staticlentable) {
1145 zlib_freetable(&dctx->currlentable);
1146 dctx->currlentable = NULL;
1147 }
1148 if (dctx->currdisttable != dctx->staticdisttable) {
1149 zlib_freetable(&dctx->currdisttable);
1150 dctx->currdisttable = NULL;
1151 }
1152 } else if (code < 286) {
1153 dctx->state = GOTLENSYM;
1154 dctx->sym = code;
1155 } else {
1156 /* literal/length symbols 286 and 287 are invalid */
1157 goto decode_error;
1158 }
1159 break;
1160 case GOTLENSYM:
1161 rec = &lencodes[dctx->sym - 257];
1162 if (dctx->nbits < rec->extrabits)
1163 goto finished;
1164 dctx->len =
1165 rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
1166 EATBITS(rec->extrabits);
1167 dctx->state = GOTLEN;
1168 break;
1169 case GOTLEN:
1170 code =
1171 zlib_huflookup(&dctx->bits, &dctx->nbits,
1172 dctx->currdisttable);
1173 if (code == -1)
1174 goto finished;
1175 if (code == -2)
1176 goto decode_error;
1177 if (code >= 30) /* dist symbols 30 and 31 are invalid */
1178 goto decode_error;
1179 dctx->state = GOTDISTSYM;
1180 dctx->sym = code;
1181 break;
1182 case GOTDISTSYM:
1183 rec = &distcodes[dctx->sym];
1184 if (dctx->nbits < rec->extrabits)
1185 goto finished;
1186 dist = rec->min + (dctx->bits & ((1 << rec->extrabits) - 1));
1187 EATBITS(rec->extrabits);
1188 dctx->state = INBLK;
1189 while (dctx->len--)
1190 zlib_emit_char(dctx, dctx->window[(dctx->winpos - dist) &
1191 (WINSIZE - 1)]);
1192 break;
1193 case UNCOMP_LEN:
1194 /*
1195 * Uncompressed block. We expect to see a 16-bit LEN.
1196 */
1197 if (dctx->nbits < 16)
1198 goto finished;
1199 dctx->uncomplen = dctx->bits & 0xFFFF;
1200 EATBITS(16);
1201 dctx->state = UNCOMP_NLEN;
1202 break;
1203 case UNCOMP_NLEN:
1204 /*
1205 * Uncompressed block. We expect to see a 16-bit NLEN,
1206 * which should be the one's complement of the previous
1207 * LEN.
1208 */
1209 if (dctx->nbits < 16)
1210 goto finished;
1211 nlen = dctx->bits & 0xFFFF;
1212 EATBITS(16);
1213 if (dctx->uncomplen != (nlen ^ 0xFFFF))
1214 goto decode_error;
1215 if (dctx->uncomplen == 0)
1216 dctx->state = OUTSIDEBLK; /* block is empty */
1217 else
1218 dctx->state = UNCOMP_DATA;
1219 break;
1220 case UNCOMP_DATA:
1221 if (dctx->nbits < 8)
1222 goto finished;
1223 zlib_emit_char(dctx, dctx->bits & 0xFF);
1224 EATBITS(8);
1225 if (--dctx->uncomplen == 0)
1226 dctx->state = OUTSIDEBLK; /* end of uncompressed block */
1227 break;
1228 }
1229 }
1230
1231 finished:
1232 *outlen = dctx->outblk->len;
1233 *outblock = (unsigned char *)strbuf_to_str(dctx->outblk);
1234 dctx->outblk = NULL;
1235 return true;
1236
1237 decode_error:
1238 *outblock = NULL;
1239 *outlen = 0;
1240 return false;
1241 }
1242
1243 const ssh_compression_alg ssh_zlib = {
1244 .name = "zlib",
1245 .delayed_name = "zlib@openssh.com", /* delayed version */
1246 .compress_new = zlib_compress_init,
1247 .compress_free = zlib_compress_cleanup,
1248 .compress = zlib_compress_block,
1249 .decompress_new = zlib_decompress_init,
1250 .decompress_free = zlib_decompress_cleanup,
1251 .decompress = zlib_decompress_block,
1252 .text_name = "zlib (RFC1950)",
1253 };
1254