1// Copyright 2017 The Wuffs Authors. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// https://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15pub status "#bad Huffman code (over-subscribed)" 16pub status "#bad Huffman code (under-subscribed)" 17pub status "#bad Huffman code length count" 18pub status "#bad Huffman code length repetition" 19pub status "#bad Huffman code" 20pub status "#bad Huffman minimum code length" 21pub status "#bad block" 22pub status "#bad distance" 23pub status "#bad distance code count" 24pub status "#bad literal/length code count" 25pub status "#inconsistent stored block length" 26pub status "#missing end-of-block code" 27pub status "#no Huffman codes" 28 29pri status "#internal error: inconsistent Huffman decoder state" 30pri status "#internal error: inconsistent I/O" 31pri status "#internal error: inconsistent distance" 32pri status "#internal error: inconsistent n_bits" 33 34// TODO: replace the placeholder 1 value with either 0 or (32768 + 512), 35// depending on whether we'll move decoder.history into the workbuf. 36pub const decoder_workbuf_len_max_incl_worst_case base.u64 = 1 37 38// The next two tables were created by script/print-deflate-magic-numbers.go. 39// 40// The u32 values' meanings are the same as the decoder.huffs u32 values. In 41// particular, bit 30 indicates a base number + extra bits, bits 23-8 are the 42// base number and bits 7-4 are the number of those extra bits. 43// 44// Some trailing elements are 0x08000000. Bit 27 indicates an invalid value. 45 46pri const lcode_magic_numbers array[32] base.u32 = [ 47 0x40000000, 0x40000100, 0x40000200, 0x40000300, 0x40000400, 0x40000500, 0x40000600, 0x40000700, 48 0x40000810, 0x40000A10, 0x40000C10, 0x40000E10, 0x40001020, 0x40001420, 0x40001820, 0x40001C20, 49 0x40002030, 0x40002830, 0x40003030, 0x40003830, 0x40004040, 0x40005040, 0x40006040, 0x40007040, 50 0x40008050, 0x4000A050, 0x4000C050, 0x4000E050, 0x4000FF00, 0x08000000, 0x08000000, 0x08000000, 51] 52 53pri const dcode_magic_numbers array[32] base.u32 = [ 54 0x40000000, 0x40000100, 0x40000200, 0x40000300, 0x40000410, 0x40000610, 0x40000820, 0x40000C20, 55 0x40001030, 0x40001830, 0x40002040, 0x40003040, 0x40004050, 0x40006050, 0x40008060, 0x4000C060, 56 0x40010070, 0x40018070, 0x40020080, 0x40030080, 0x40040090, 0x40060090, 0x400800A0, 0x400C00A0, 57 0x401000B0, 0x401800B0, 0x402000C0, 0x403000C0, 0x404000D0, 0x406000D0, 0x08000000, 0x08000000, 58] 59 60// huffs_table_size is the smallest power of 2 that is greater than or equal to 61// the worst-case size of the Huffman tables. See 62// script/print-deflate-huff-table-size.go which calculates that, for a 9-bit 63// primary table, the worst-case size is 852 for the Lit/Len table and 592 for 64// the Distance table. 65pri const huffs_table_size base.u32 = 1024 66pri const huffs_table_mask base.u32 = 1023 67 68pub struct decoder?( 69 // These fields yield src's bits in Least Significant Bits order. 70 bits : base.u32, 71 n_bits : base.u32, 72 73 // history_index indexes the history array, defined below. 74 history_index : base.u32, 75 76 // n_huffs_bits is discussed in the huffs field comment. 77 n_huffs_bits : array[2] base.u32[..= 9], 78 79 // end_of_block is whether decode_huffman_xxx saw an end-of-block code. 80 // 81 // TODO: can decode_huffman_xxx signal this in band instead of out of band? 82 end_of_block : base.bool, 83 84 util : base.utility, 85)( 86 // huffs and n_huffs_bits are the lookup tables for Huffman decodings. 87 // 88 // There are up to 2 Huffman decoders active at any one time. As per this 89 // package's README.md: 90 // - huffs[0] is used for clcode and lcode. 91 // - huffs[1] is used for dcode. 92 // 93 // The initial table key is the low n_huffs_bits of the decoder.bits field. 94 // Keys longer than 9 bits require a two step lookup, the first step 95 // examines the low 9 bits, the second step examines the remaining bits. 96 // Two steps are required at most, as keys are at most 15 bits long. 97 // 98 // Using decoder.bits's low n_huffs_bits as a table key is valid even if 99 // decoder.n_bits is less than n_huffs_bits, because the immediate next 100 // step after indexing the table by the key is to compare decoder.n_bits to 101 // the table value's number of decoder.bits to consume. If it compares 102 // less, then more source bytes are read and the table lookup re-tried. 103 // 104 // The table value's bits: 105 // - bit 31 indicates a literal. 106 // - bit 30 indicates a base number + extra bits. 107 // - bit 29 indicates end-of-block. 108 // - bit 28 indicates a redirect to another part of the table. 109 // - bit 27 indicates an invalid value. 110 // - bits 24 ..= 26 are zero. 111 // - bits 8 ..= 23 are the redirect offset, literal (in bits [8 ..= 15]) 112 // or base number. 113 // - bits 4 ..= 7 are the redirected table's size in bits or the number 114 // of extra bits. 115 // - bits 0 ..= 3 are the number of decoder.bits to consume. 116 // 117 // Exactly one of the eight bits [24 ..= 31] should be set. 118 huffs : array[2] array[huffs_table_size] base.u32, 119 120 // history holds up to the last 32KiB of decoded output, if the decoding 121 // was incomplete (e.g. due to a short read or write). RFC 1951 (DEFLATE) 122 // gives the maximum distance in a length-distance back-reference as 32768, 123 // or 0x8000. 124 // 125 // It is a ringbuffer, so that the most distant byte in the decoding isn't 126 // necessarily history[0]. The ringbuffer is full (i.e. it holds 32KiB of 127 // history) if and only if history_index >= 0x8000. 128 // 129 // history[history_index & 0x7FFF] is where the next byte of decoded output 130 // will be written. 131 history : array[0x8000] base.u8, // 32 KiB. 132 133 // code_lengths is used to pass out-of-band data to init_huff. 134 // 135 // code_lengths[args.n_codes0 + i] holds the number of bits in the i'th 136 // code. 137 code_lengths : array[320] base.u8, 138) 139 140pub func decoder.add_history!(hist: slice base.u8) { 141 var s : slice base.u8 142 var n_copied : base.u64 143 var already_full : base.u32[..= 0x8000] 144 145 s = args.hist 146 if s.length() >= 0x8000 { 147 // If s is longer than the ringbuffer, we can ignore the previous value 148 // of history_index, as we will overwrite the whole ringbuffer. 149 s = s.suffix(up_to: 0x8000) 150 this.history[..].copy_from_slice!(s: s) 151 this.history_index = 0x8000 152 } else { 153 // Otherwise, append s to the history ringbuffer starting at the 154 // previous history_index (modulo 0x8000). 155 n_copied = this.history[this.history_index & 0x7FFF ..].copy_from_slice!(s: s) 156 if n_copied < s.length() { 157 // a_slice.copy_from(s:b_slice) returns the minimum of the two 158 // slice lengths. If that value is less than b_slice.length(), then 159 // not all of b_slice was copied. 160 // 161 // In terms of the history ringbuffer, that means that we have to 162 // wrap around and copy the remainder of s over the start of the 163 // history ringbuffer. 164 s = s[n_copied ..] 165 n_copied = this.history[..].copy_from_slice!(s: s) 166 // Set history_index (modulo 0x8000) to the length of this 167 // remainder. The &0x7FFF is redundant, but proves to the compiler 168 // that the conversion to u32 will not overflow. The +0x8000 is to 169 // maintain that the history ringbuffer is full if and only if 170 // history_index >= 0x8000. 171 this.history_index = ((n_copied & 0x7FFF) as base.u32) + 0x8000 172 } else { 173 // We didn't need to wrap around. 174 already_full = 0 175 if this.history_index >= 0x8000 { 176 already_full = 0x8000 177 } 178 this.history_index = (this.history_index & 0x7FFF) + ((n_copied & 0x7FFF) as base.u32) + already_full 179 } 180 } 181} 182 183pub func decoder.workbuf_len() base.range_ii_u64 { 184 return this.util.make_range_ii_u64( 185 min_incl: decoder_workbuf_len_max_incl_worst_case, 186 max_incl: decoder_workbuf_len_max_incl_worst_case) 187} 188 189pub func decoder.decode_io_writer?(dst: base.io_writer, src: base.io_reader, workbuf: slice base.u8) { 190 var mark : base.u64 191 var status : base.status 192 193 while true { 194 mark = args.dst.mark() 195 status =? this.decode_blocks?(dst: args.dst, src: args.src) 196 if not status.is_suspension() { 197 return status 198 } 199 // TODO: should "since" be "since!", as the return value lets you 200 // modify the state of args.dst, so future mutations (via the slice) 201 // can change the veracity of any args.dst assertions? 202 this.add_history!(hist: args.dst.since(mark: mark)) 203 yield? status 204 } 205} 206 207pri func decoder.decode_blocks?(dst: base.io_writer, src: base.io_reader) { 208 var final : base.u32 209 var b0 : base.u32[..= 255] 210 var type : base.u32 211 var status : base.status 212 213 while.outer final == 0 { 214 while this.n_bits < 3, 215 post this.n_bits >= 3, 216 { 217 b0 = args.src.read_u8_as_u32?() 218 this.bits |= b0 << this.n_bits 219 this.n_bits += 8 220 } 221 final = this.bits & 0x01 222 type = (this.bits >> 1) & 0x03 223 this.bits >>= 3 224 this.n_bits -= 3 225 226 if type == 0 { 227 this.decode_uncompressed?(dst: args.dst, src: args.src) 228 continue 229 } else if type == 1 { 230 status = this.init_fixed_huffman!() 231 // TODO: "if status.is_error()" is probably more idiomatic, but for 232 // some mysterious, idiosyncratic reason, performs noticably worse 233 // for gcc (but not for clang). 234 // 235 // See git commit 3bf9573 "Work around strange status.is_error 236 // performance". 237 if not status.is_ok() { 238 return status 239 } 240 } else if type == 2 { 241 this.init_dynamic_huffman?(src: args.src) 242 } else { 243 return "#bad block" 244 } 245 246 this.end_of_block = false 247 while true { 248 status = this.decode_huffman_fast!(dst: args.dst, src: args.src) 249 if status.is_error() { 250 return status 251 } 252 if this.end_of_block { 253 continue.outer 254 } 255 this.decode_huffman_slow?(dst: args.dst, src: args.src) 256 if this.end_of_block { 257 continue.outer 258 } 259 } 260 } 261} 262 263// decode_uncompressed decodes an uncompresed block as per the RFC section 264// 3.2.4. 265pri func decoder.decode_uncompressed?(dst: base.io_writer, src: base.io_reader) { 266 var length : base.u32 267 var n_copied : base.u32 268 269 // TODO: make this "if" into a function invariant? 270 // 271 // Ditto for decode_huffman_slow and decode_huffman_fast. 272 if (this.n_bits >= 8) or ((this.bits >> (this.n_bits & 7)) <> 0) { 273 return "#internal error: inconsistent n_bits" 274 } 275 this.n_bits = 0 276 this.bits = 0 277 278 length = args.src.read_u32le?() 279 if (length.low_bits(n: 16) + length.high_bits(n: 16)) <> 0xFFFF { 280 return "#inconsistent stored block length" 281 } 282 length = length.low_bits(n: 16) 283 while true { 284 n_copied = args.dst.copy_n_from_reader!(n: length, r: args.src) 285 if length <= n_copied { 286 return ok 287 } 288 length -= n_copied 289 if args.dst.available() == 0 { 290 yield? base."$short write" 291 } else { 292 yield? base."$short read" 293 } 294 } 295} 296 297// init_fixed_huffman initializes this.huffs as per the RFC section 3.2.6. 298pri func decoder.init_fixed_huffman!() base.status { 299 var i : base.u32 300 var status : base.status 301 302 while i < 144 { 303 this.code_lengths[i] = 8 304 i += 1 305 } 306 while i < 256 { 307 this.code_lengths[i] = 9 308 i += 1 309 } 310 while i < 280 { 311 this.code_lengths[i] = 7 312 i += 1 313 } 314 while i < 288 { 315 this.code_lengths[i] = 8 316 i += 1 317 } 318 while i < 320 { 319 this.code_lengths[i] = 5 320 i += 1 321 } 322 323 status = this.init_huff!(which: 0, n_codes0: 0, n_codes1: 288, base_symbol: 257) 324 if status.is_error() { 325 return status 326 } 327 status = this.init_huff!(which: 1, n_codes0: 288, n_codes1: 320, base_symbol: 0) 328 if status.is_error() { 329 return status 330 } 331 return ok 332} 333 334// init_dynamic_huffman initializes this.huffs as per the RFC section 3.2.7. 335pri func decoder.init_dynamic_huffman?(src: base.io_reader) { 336 var bits : base.u32 337 var n_bits : base.u32 338 var b0 : base.u32[..= 255] 339 var n_lit : base.u32[..= 288] 340 var n_dist : base.u32[..= 32] 341 var n_clen : base.u32[..= 19] 342 var i : base.u32 343 var b1 : base.u32[..= 255] 344 var status : base.status 345 var mask : base.u32[..= 511] 346 var table_entry : base.u32 347 var table_entry_n_bits : base.u32[..= 15] 348 var b2 : base.u32[..= 255] 349 var n_extra_bits : base.u32[..= 7] 350 var rep_symbol : base.u8[..= 15] 351 var rep_count : base.u32 352 var b3 : base.u32[..= 255] 353 354 bits = this.bits 355 n_bits = this.n_bits 356 while n_bits < 14, 357 post n_bits >= 14, 358 { 359 b0 = args.src.read_u8_as_u32?() 360 bits |= b0 << n_bits 361 n_bits += 8 362 } 363 n_lit = bits.low_bits(n: 5) + 257 364 if n_lit > 286 { 365 return "#bad literal/length code count" 366 } 367 bits >>= 5 368 n_dist = bits.low_bits(n: 5) + 1 369 if n_dist > 30 { 370 return "#bad distance code count" 371 } 372 bits >>= 5 373 n_clen = bits.low_bits(n: 4) + 4 374 bits >>= 4 375 n_bits -= 14 376 377 // Read the clcode Huffman table: H-CL. 378 i = 0 379 while i < n_clen { 380 while n_bits < 3, 381 inv i < n_clen, 382 post n_bits >= 3, 383 { 384 b1 = args.src.read_u8_as_u32?() 385 bits |= b1 << n_bits 386 n_bits += 8 387 } 388 assert i < 19 via "a < b: a < c; c <= b"(c: n_clen) 389 this.code_lengths[code_order[i]] = (bits & 0x07) as base.u8 390 bits >>= 3 391 n_bits -= 3 392 i += 1 393 } 394 while i < 19 { 395 this.code_lengths[code_order[i]] = 0 396 i += 1 397 } 398 status = this.init_huff!(which: 0, n_codes0: 0, n_codes1: 19, base_symbol: 0xFFF) 399 if status.is_error() { 400 return status 401 } 402 403 // Decode the code lengths for the next two Huffman tables. 404 mask = ((1 as base.u32) << (this.n_huffs_bits[0])) - 1 405 i = 0 406 while i < (n_lit + n_dist) { 407 assert i < (288 + 32) via "a < (b + c): a < (b0 + c0); b0 <= b; c0 <= c"(b0: n_lit, c0: n_dist) 408 409 // Decode a clcode symbol from H-CL. 410 while true, 411 inv i < 320, 412 { 413 table_entry = this.huffs[0][bits & mask] 414 table_entry_n_bits = table_entry & 15 415 if n_bits >= table_entry_n_bits { 416 bits >>= table_entry_n_bits 417 n_bits -= table_entry_n_bits 418 break 419 } 420 assert n_bits < 15 via "a < b: a < c; c <= b"(c: table_entry_n_bits) 421 b2 = args.src.read_u8_as_u32?() 422 bits |= b2 << n_bits 423 n_bits += 8 424 } 425 // For H-CL, there should be no redirections and all symbols should be 426 // literals. 427 if (table_entry >> 24) <> 0x80 { 428 return "#internal error: inconsistent Huffman decoder state" 429 } 430 table_entry = (table_entry >> 8) & 0xFF 431 432 // Write a literal code length. 433 if table_entry < 16 { 434 this.code_lengths[i] = table_entry as base.u8 435 i += 1 436 continue 437 } 438 439 // Write a repeated code length. 440 n_extra_bits = 0 441 rep_symbol = 0 442 rep_count = 0 443 if table_entry == 16 { 444 n_extra_bits = 2 445 if i <= 0 { 446 return "#bad Huffman code length repetition" 447 } 448 rep_symbol = this.code_lengths[i - 1] & 15 449 rep_count = 3 450 assert rep_count <= 11 451 } else if table_entry == 17 { 452 n_extra_bits = 3 453 rep_symbol = 0 454 rep_count = 3 455 assert rep_count <= 11 456 } else if table_entry == 18 { 457 n_extra_bits = 7 458 rep_symbol = 0 459 rep_count = 11 460 assert rep_count <= 11 461 } else { 462 return "#internal error: inconsistent Huffman decoder state" 463 } 464 while n_bits < n_extra_bits, 465 inv i < 320, 466 inv rep_count <= 11, 467 post n_bits >= n_extra_bits, 468 { 469 assert n_bits < 7 via "a < b: a < c; c <= b"(c: n_extra_bits) 470 b3 = args.src.read_u8_as_u32?() 471 bits |= b3 << n_bits 472 n_bits += 8 473 } 474 rep_count += bits.low_bits(n: n_extra_bits) 475 bits >>= n_extra_bits 476 n_bits -= n_extra_bits 477 478 while rep_count > 0 { 479 // TODO: hoist this check up one level? 480 if i >= (n_lit + n_dist) { 481 return "#bad Huffman code length count" 482 } 483 assert i < (288 + 32) via "a < (b + c): a < (b0 + c0); b0 <= b; c0 <= c"(b0: n_lit, c0: n_dist) 484 this.code_lengths[i] = rep_symbol 485 i += 1 486 rep_count -= 1 487 } 488 } 489 490 if i <> (n_lit + n_dist) { 491 return "#bad Huffman code length count" 492 } 493 if this.code_lengths[256] == 0 { 494 return "#missing end-of-block code" 495 } 496 497 status = this.init_huff!(which: 0, n_codes0: 0, n_codes1: n_lit, base_symbol: 257) 498 if status.is_error() { 499 return status 500 } 501 status = this.init_huff!(which: 1, n_codes0: n_lit, n_codes1: n_lit + n_dist, base_symbol: 0) 502 if status.is_error() { 503 return status 504 } 505 506 this.bits = bits 507 this.n_bits = n_bits 508} 509 510// TODO: make named constants for 15, 19, 319, etc. 511 512pri func decoder.init_huff!(which: base.u32[..= 1], n_codes0: base.u32[..= 288], n_codes1: base.u32[..= 320], base_symbol: base.u32) base.status { 513 var counts : array[16] base.u16[..= 320] 514 var i : base.u32 515 var remaining : base.u32 516 var offsets : array[16] base.u16[..= 320] 517 var n_symbols : base.u32[..= 320] 518 var count : base.u32[..= 320] 519 var symbols : array[320] base.u16[..= 319] 520 var min_cl : base.u32[..= 9] 521 var max_cl : base.u32[..= 15] 522 var initial_high_bits : base.u32 523 var prev_cl : base.u32[..= 15] 524 var prev_redirect_key : base.u32 525 var top : base.u32[..= huffs_table_size] 526 var next_top : base.u32[..= huffs_table_size] 527 var code : base.u32 528 var key : base.u32 529 var value : base.u32 530 var cl : base.u32[..= 15] 531 var redirect_key : base.u32[..= 511] 532 var j : base.u32[..= 16] 533 var reversed_key : base.u32[..= 511] 534 var symbol : base.u32[..= 319] 535 var high_bits : base.u32 536 var delta : base.u32 537 538 // For the clcode example in this package's README.md: 539 // - n_codes0 = 0 540 // - n_codes1 = 19 541 // - code_lengths[ 0] = 3 542 // - code_lengths[ 1] = 0 543 // - code_lengths[ 2] = 0 544 // - code_lengths[ 3] = 5 545 // - code_lengths[ 4] = 3 546 // - code_lengths[ 5] = 3 547 // - code_lengths[ 6] = 3 548 // - code_lengths[ 7] = 3 549 // - code_lengths[ 8] = 3 550 // - code_lengths[ 9] = 3 551 // - code_lengths[10] = 0 552 // - code_lengths[11] = 0 553 // - code_lengths[12] = 0 554 // - code_lengths[13] = 0 555 // - code_lengths[14] = 0 556 // - code_lengths[15] = 0 557 // - code_lengths[16] = 0 558 // - code_lengths[17] = 4 559 // - code_lengths[18] = 5 560 561 // Calculate counts. 562 // 563 // For the clcode example in this package's README.md: 564 // - counts[0] = 9 565 // - counts[1] = 0 566 // - counts[2] = 0 567 // - counts[3] = 7 568 // - counts[4] = 1 569 // - counts[5] = 2 570 // - all other counts elements are 0. 571 i = args.n_codes0 572 while i < args.n_codes1 { 573 assert i < 320 via "a < b: a < c; c <= b"(c: args.n_codes1) 574 // TODO: this if should be unnecessary. Have some way to assert that, 575 // for all j, counts[j] <= i, and thus counts[j]++ will not overflow. 576 if counts[this.code_lengths[i] & 15] >= 320 { 577 return "#internal error: inconsistent Huffman decoder state" 578 } 579 counts[this.code_lengths[i] & 15] += 1 580 i += 1 581 } 582 if ((counts[0] as base.u32) + args.n_codes0) == args.n_codes1 { 583 return "#no Huffman codes" 584 } 585 586 // Check that the Huffman code completely covers all possible input bits. 587 remaining = 1 // There is 1 possible 0-bit code. 588 i = 1 589 while i <= 15 { 590 if remaining > (1 << 30) { 591 return "#internal error: inconsistent Huffman decoder state" 592 } 593 // Each iteration doubles the number of possible remaining codes. 594 remaining <<= 1 595 if remaining < (counts[i] as base.u32) { 596 return "#bad Huffman code (over-subscribed)" 597 } 598 remaining -= counts[i] as base.u32 599 i += 1 600 } 601 if remaining <> 0 { 602 // As a special case, allow a degenerate H-D Huffman table, with only 603 // one 1-bit code, for the smallest possible distance. 604 if (args.which == 1) and (counts[1] == 1) and 605 (this.code_lengths[args.n_codes0] == 1) and 606 (((counts[0] as base.u32) + args.n_codes0 + 1) == args.n_codes1) { 607 608 this.n_huffs_bits[1] = 1 609 this.huffs[1][0] = dcode_magic_numbers[0] | 1 610 this.huffs[1][1] = dcode_magic_numbers[31] | 1 611 return ok 612 } 613 614 return "#bad Huffman code (under-subscribed)" 615 } 616 617 // Calculate offsets and n_symbols. 618 // 619 // For the clcode example in this package's README.md: 620 // - offsets[0] = 0 621 // - offsets[1] = 0 622 // - offsets[2] = 0 623 // - offsets[3] = 0 624 // - offsets[4] = 7 625 // - offsets[5] = 8 626 // - offsets[6] = 10 627 // - all other offsets elements are 10. 628 // - n_symbols = 10 629 i = 1 630 while i <= 15 { 631 offsets[i] = n_symbols as base.u16 632 count = counts[i] as base.u32 633 if n_symbols > (320 - count) { 634 return "#internal error: inconsistent Huffman decoder state" 635 } 636 assert (n_symbols + count) <= 320 via "(a + b) <= c: a <= (c - b)"() 637 // TODO: change this to n_symbols += count, once the proof engine's 638 // bounds checking can handle it. 639 n_symbols = n_symbols + count 640 i += 1 641 } 642 if n_symbols > 288 { 643 return "#internal error: inconsistent Huffman decoder state" 644 } 645 646 // Calculate symbols. 647 // 648 // For the clcode example in this package's README.md: 649 // - symbols[0] = 0 650 // - symbols[1] = 4 651 // - symbols[2] = 5 652 // - symbols[3] = 6 653 // - symbols[4] = 7 654 // - symbols[5] = 8 655 // - symbols[6] = 9 656 // - symbols[7] = 17 657 // - symbols[8] = 3 658 // - symbols[9] = 18 659 // 660 // As a (local variable) side effect, offsets' values will be updated: 661 // - offsets[3] = 7, formerly 0 662 // - offsets[4] = 8, formerly 7 663 // - offsets[5] = 10, formerly 8 664 i = args.n_codes0 665 while i < args.n_codes1, 666 inv n_symbols <= 288, 667 { 668 assert i < 320 via "a < b: a < c; c <= b"(c: args.n_codes1) 669 // TODO: this if check should be unnecessary. 670 if i < args.n_codes0 { 671 return "#internal error: inconsistent Huffman decoder state" 672 } 673 if this.code_lengths[i] <> 0 { 674 if offsets[this.code_lengths[i] & 15] >= 320 { 675 return "#internal error: inconsistent Huffman decoder state" 676 } 677 symbols[offsets[this.code_lengths[i] & 15]] = (i - args.n_codes0) as base.u16 678 offsets[this.code_lengths[i] & 15] += 1 679 } 680 i += 1 681 } 682 683 // Calculate min_cl and max_cl. 684 // 685 // For the clcode example in this package's README.md: 686 // - min_cl = 3 687 // - max_cl = 5 688 min_cl = 1 689 while true, 690 inv n_symbols <= 288, 691 { 692 if counts[min_cl] <> 0 { 693 break 694 } 695 if min_cl >= 9 { 696 return "#bad Huffman minimum code length" 697 } 698 min_cl += 1 699 } 700 max_cl = 15 701 while true, 702 inv n_symbols <= 288, 703 { 704 if counts[max_cl] <> 0 { 705 break 706 } 707 if max_cl <= 1 { 708 return "#no Huffman codes" 709 } 710 max_cl -= 1 711 } 712 if max_cl <= 9 { 713 this.n_huffs_bits[args.which] = max_cl 714 } else { 715 this.n_huffs_bits[args.which] = 9 716 } 717 718 // Calculate this.huffs[args.which]. 719 // 720 // For the clcode example in this package's README.md: 721 // - this.huffs[0][0b..000] = 0x80000003 (literal, symbols[0]=0x00, code_length=3) 722 // - this.huffs[0][0b..100] = 0x80000403 (literal, symbols[1]=0x04, code_length=3) 723 // - this.huffs[0][0b..010] = 0x80000503 (literal, symbols[2]=0x05, code_length=3) 724 // - this.huffs[0][0b..110] = 0x80000603 (literal, symbols[3]=0x06, code_length=3) 725 // - this.huffs[0][0b..001] = 0x80000703 (literal, symbols[4]=0x07, code_length=3) 726 // - this.huffs[0][0b..101] = 0x80000803 (literal, symbols[5]=0x08, code_length=3) 727 // - this.huffs[0][0b..011] = 0x80000903 (literal, symbols[6]=0x09, code_length=3) 728 // - this.huffs[0][0b.0111] = 0x80001104 (literal, symbols[7]=0x11, code_length=4) 729 // - this.huffs[0][0b01111] = 0x80000305 (literal, symbols[8]=0x03, code_length=5) 730 // - this.huffs[0][0b11111] = 0x80001805 (literal, symbols[9]=0x18, code_length=5) 731 i = 0 732 if (n_symbols <> (offsets[max_cl] as base.u32)) or (n_symbols <> (offsets[15] as base.u32)) { 733 return "#internal error: inconsistent Huffman decoder state" 734 } 735 if (args.n_codes0 + (symbols[0] as base.u32)) >= 320 { 736 return "#internal error: inconsistent Huffman decoder state" 737 } 738 739 initial_high_bits = 1 << 9 740 if max_cl < 9 { 741 initial_high_bits = (1 as base.u32) << max_cl 742 } 743 prev_cl = (this.code_lengths[args.n_codes0 + (symbols[0] as base.u32)] & 15) as base.u32 744 prev_redirect_key = 0xFFFFFFFF 745 top = 0 746 next_top = 512 747 code = 0 748 key = 0 749 value = 0 750 while true, 751 pre code < (1 << 15), 752 pre i < 288, 753 inv n_symbols <= 288, 754 { 755 if (args.n_codes0 + (symbols[i] as base.u32)) >= 320 { 756 return "#internal error: inconsistent Huffman decoder state" 757 } 758 cl = (this.code_lengths[args.n_codes0 + (symbols[i] as base.u32)] & 15) as base.u32 759 if cl > prev_cl { 760 code <<= cl - prev_cl 761 if code >= (1 << 15) { 762 return "#internal error: inconsistent Huffman decoder state" 763 } 764 } 765 // For the remainder of this loop body, prev_cl is the original code 766 // length, cl is possibly clipped by 9, if in the 2nd-level table. 767 prev_cl = cl 768 769 key = code 770 if cl > 9 { 771 cl -= 9 772 assert cl <= 9 773 774 redirect_key = (key >> cl) & 511 775 key = key.low_bits(n: cl) 776 if prev_redirect_key <> redirect_key { 777 prev_redirect_key = redirect_key 778 779 // Calculate the number of bits needed for the 2nd level table. 780 // This computation is similar to "check that the Huffman code 781 // completely covers all possible input bits" above. 782 remaining = (1 as base.u32) << cl 783 j = prev_cl 784 while j <= 15, 785 inv cl <= 9, 786 inv code < (1 << 15), 787 inv i < 288, 788 inv n_symbols <= 288, 789 { 790 if remaining <= (counts[j] as base.u32) { 791 break 792 } 793 remaining -= counts[j] as base.u32 794 if remaining > (1 << 30) { 795 return "#internal error: inconsistent Huffman decoder state" 796 } 797 remaining <<= 1 798 j += 1 799 } 800 if (j <= 9) or (15 < j) { 801 return "#internal error: inconsistent Huffman decoder state" 802 } 803 j -= 9 804 initial_high_bits = (1 as base.u32) << j 805 806 top = next_top 807 if (top + ((1 as base.u32) << j)) > huffs_table_size { 808 return "#internal error: inconsistent Huffman decoder state" 809 } 810 assert (top + ((1 as base.u32) << j)) <= 1024 via "a <= b: a <= c; c <= b"(c: huffs_table_size) 811 next_top = top + ((1 as base.u32) << j) 812 813 redirect_key = (reverse8[redirect_key >> 1] as base.u32) | ((redirect_key & 1) << 8) 814 this.huffs[args.which][redirect_key] = 0x10000009 | (top << 8) | (j << 4) 815 } 816 } 817 if (key >= (1 << 9)) or (counts[prev_cl] <= 0) { 818 return "#internal error: inconsistent Huffman decoder state" 819 } 820 counts[prev_cl] -= 1 821 822 reversed_key = (reverse8[key >> 1] as base.u32) | ((key & 1) << 8) 823 reversed_key >>= 9 - cl 824 825 symbol = symbols[i] as base.u32 826 if symbol == 256 { 827 // End-of-block. 828 value = 0x20000000 | cl 829 } else if (symbol < 256) and (args.which == 0) { 830 // Literal. 831 value = 0x80000000 | (symbol << 8) | cl 832 } else if symbol >= args.base_symbol { 833 // Base number + extra bits. 834 symbol -= args.base_symbol 835 if args.which == 0 { 836 value = lcode_magic_numbers[symbol & 31] | cl 837 } else { 838 value = dcode_magic_numbers[symbol & 31] | cl 839 } 840 } else { 841 return "#internal error: inconsistent Huffman decoder state" 842 } 843 844 // The table uses log2(initial_high_bits) bits, but reversed_key only 845 // has cl bits. We duplicate the key-value pair across all possible 846 // values of the high (log2(initial_high_bits) - cl) bits. 847 high_bits = initial_high_bits 848 delta = (1 as base.u32) << cl 849 while high_bits >= delta, 850 inv code < (1 << 15), 851 inv i < 288, 852 inv n_symbols <= 288, 853 { 854 high_bits -= delta 855 if (top + ((high_bits | reversed_key) & 511)) >= huffs_table_size { 856 return "#internal error: inconsistent Huffman decoder state" 857 } 858 assert (top + ((high_bits | reversed_key) & 511)) < 1024 via "a < b: a < c; c <= b"(c: huffs_table_size) 859 this.huffs[args.which][top + ((high_bits | reversed_key) & 511)] = value 860 } 861 862 i += 1 863 if i >= n_symbols { 864 break 865 } 866 assert i < 288 via "a < b: a < c; c <= b"(c: n_symbols) 867 code += 1 868 if code >= (1 << 15) { 869 return "#internal error: inconsistent Huffman decoder state" 870 } 871 } 872 return ok 873} 874