1 // Copyright (C) 2005 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #ifndef DLIB_ENTROPY_ENCODER_MODEL_KERNEl_5_ 4 #define DLIB_ENTROPY_ENCODER_MODEL_KERNEl_5_ 5 6 #include "../algs.h" 7 #include "entropy_encoder_model_kernel_abstract.h" 8 #include "../assert.h" 9 10 11 12 namespace dlib 13 { 14 15 namespace eemk5 16 { 17 struct node 18 { 19 node* next; 20 node* child_context; 21 node* parent_context; 22 23 unsigned short symbol; 24 unsigned short count; 25 unsigned short total; 26 unsigned short escapes; 27 }; 28 } 29 30 31 template < 32 unsigned long alphabet_size, 33 typename entropy_encoder, 34 unsigned long total_nodes, 35 unsigned long order 36 > 37 class entropy_encoder_model_kernel_5 38 { 39 /*! 40 REQUIREMENTS ON total_nodes 41 - 4096 < total_nodes 42 - this is the total number of nodes that we will use in the tree 43 44 REQUIREMENTS ON order 45 - 0 <= order 46 - this is the maximum depth-1 the tree will be allowed to go (note 47 that the root level is depth 0). 48 49 GENERAL NOTES 50 This implementation follows more or less the implementation 51 strategy laid out by Alistair Moffat in his paper 52 Implementing the PPM data compression scheme. Published in IEEE 53 Transactions on Communications, 38(11):1917-1921, 1990. 54 55 The escape method used will be method D. 56 57 This also uses Dmitry Shkarin's Information Inheritance scheme. 58 (described in "PPM: one step to practicality" and "Improving the 59 Efficiency of the PPM Algorithm") 60 61 62 INITIAL VALUE 63 - root == pointer to an array of total_nodes nodes 64 - next_node == 1 65 - cur == root 66 - cur_order = 0 67 - root->next == 0 68 - root->parent_context == 0 69 - root->child_context == 0 70 - root->escapes == 0 71 - root->total == 0 72 - stack_size == 0 73 - exc_used == false 74 - for all i: exc[i] == 0 75 76 CONVENTION 77 - pop() == stack[stack_size-1].n and stack[stack_size-1].nc 78 - exc_used == something_is_excluded() 79 - is_excluded(symbol) == bit symbol&0x1F from exc[symbol>>5] 80 - &get_entropy_encoder() == coder 81 - root == pointer to an array of total_nodes nodes. 82 this is also the root of the tree. 83 - if (next_node < total_nodes) then 84 - next_node == the next node in root that has not yet been allocated 85 86 - root->next == 0 87 - root->parent_context == 0 88 89 90 - for every node in the tree: 91 { 92 - NOTATION: 93 - The "context" of a node is the string of symbols seen 94 when you go from the root of the tree down (down though 95 child context pointers) to the node, including the symbol at 96 the node itself. (note that the context of the root node 97 is "" or the empty string) 98 - A set of nodes is in the same "context set" if all the node's 99 contexts are of length n and all the node's contexts share 100 the same prefix of length n-1. 101 - The "child context set" of a node is a set of nodes with 102 contexts that are one symbol longer and prefixed by the node's 103 context. For example, if a node has a context "abc" then the 104 nodes for contexts "abca", "abcb", "abcc", etc. are all in 105 the child context set of the node. 106 - The "parent context" of a node is the context that is one 107 symbol shorter than the node's context and includes the 108 symbol in the node. So the parent context of a node with 109 context "abcd" would be the context "bcd". 110 111 112 - if (next != 0) then 113 - next == pointer to the next node in the same context set 114 - if (child_context != 0) then 115 - child_context == pointer to the first node of the child 116 context set for this node. 117 - escapes > 0 118 - if (parent_context != 0) then 119 - parent_context == pointer to the parent context of this node. 120 - else 121 - this node is the root node of the tree 122 123 124 - if (this is not the root node) then 125 - symbol == the symbol represented with this node 126 - count == the number of times this symbol has been seen in its 127 parent context. 128 - else 129 - the root doesn't have a symbol. i.e. the context for the 130 root node is "" or the empty string. 131 132 - total == The sum of the counts of all the nodes 133 in the child context set + escapes. 134 - escapes == the escape count for the context represented 135 by the node. 136 - count > 0 137 } 138 139 140 - cur_order < order 141 - cur_order == the depth of the node cur in the tree. 142 (note that the root node has depth 0) 143 - cur == pointer to the node in the tree who's context matches 144 the most recent symbols we have seen. 145 146 147 !*/ 148 149 typedef eemk5::node node; 150 151 152 public: 153 154 typedef entropy_encoder entropy_encoder_type; 155 156 entropy_encoder_model_kernel_5 ( 157 entropy_encoder& coder 158 ); 159 160 virtual ~entropy_encoder_model_kernel_5 ( 161 ); 162 163 inline void clear( 164 ); 165 166 inline void encode ( 167 unsigned long symbol 168 ); 169 get_entropy_encoder()170 entropy_encoder& get_entropy_encoder ( 171 ) { return coder; } 172 get_alphabet_size()173 static unsigned long get_alphabet_size ( 174 ) { return alphabet_size; } 175 176 private: 177 178 inline eemk5::node* allocate_node ( 179 ); 180 /*! 181 requires 182 - space_left() == true 183 ensures 184 - returns a pointer to a new node 185 !*/ 186 187 inline bool space_left ( 188 ) const; 189 /*! 190 ensures 191 - returns true if there is at least 1 free node left. 192 - returns false otherwise 193 !*/ 194 195 inline void exclude ( 196 unsigned short symbol 197 ); 198 /*! 199 ensures 200 - #is_excluded(symbol) == true 201 - #something_is_excluded() == true 202 !*/ 203 204 inline bool something_is_excluded ( 205 ); 206 /*! 207 ensures 208 - returns true if some symbol has been excluded. 209 returns false otherwise 210 !*/ 211 212 inline bool is_excluded ( 213 unsigned short symbol 214 ); 215 /*! 216 ensures 217 - if (symbol has been excluded) then 218 - returns true 219 - else 220 - returns false 221 !*/ 222 223 inline void clear_exclusions ( 224 ); 225 /*! 226 ensures 227 - for all symbols #is_excluded(symbol) == false 228 - #something_is_excluded() == true 229 !*/ 230 231 inline void scale_counts ( 232 node* n 233 ); 234 /*! 235 ensures 236 - divides all the counts in the child context set of n by 2. 237 - none of the nodes in the child context set will have a count of 0 238 !*/ 239 240 inline void push ( 241 node* n, 242 node* nc 243 ); 244 /*! 245 requires 246 - stack_size < order 247 ensures 248 - #pop(a,b): a == n && b == nc 249 !*/ 250 251 inline void pop ( 252 node*& n, 253 node*& nc 254 ); 255 /*! 256 requires 257 - stack_size > 0 258 ensures 259 - returns the two nodes at the top of the stack 260 !*/ 261 262 struct nodes 263 { 264 node* n; 265 node* nc; 266 }; 267 268 unsigned long next_node; 269 entropy_encoder& coder; 270 node* root; 271 node* cur; 272 unsigned long cur_order; 273 unsigned long exc[alphabet_size/32+1]; 274 bool exc_used; 275 nodes stack[order+1]; 276 unsigned long stack_size; 277 278 // restricted functions 279 entropy_encoder_model_kernel_5(entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>&); // copy constructor 280 entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>& operator=(entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>&); // assignment operator 281 282 }; 283 284 // ---------------------------------------------------------------------------------------- 285 // ---------------------------------------------------------------------------------------- 286 // member function definitions 287 // ---------------------------------------------------------------------------------------- 288 // ---------------------------------------------------------------------------------------- 289 290 template < 291 unsigned long alphabet_size, 292 typename entropy_encoder, 293 unsigned long total_nodes, 294 unsigned long order 295 > 296 entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: entropy_encoder_model_kernel_5(entropy_encoder & coder_)297 entropy_encoder_model_kernel_5 ( 298 entropy_encoder& coder_ 299 ) : 300 next_node(1), 301 coder(coder_), 302 cur_order(0), 303 stack_size(0) 304 { 305 COMPILE_TIME_ASSERT( 1 < alphabet_size && alphabet_size < 65535 ); 306 COMPILE_TIME_ASSERT( 4096 < total_nodes ); 307 308 root = new node[total_nodes]; 309 cur = root; 310 311 root->child_context = 0; 312 root->escapes = 0; 313 root->next = 0; 314 root->parent_context = 0; 315 root->total = 0; 316 317 clear_exclusions(); 318 } 319 320 // ---------------------------------------------------------------------------------------- 321 322 template < 323 unsigned long alphabet_size, 324 typename entropy_encoder, 325 unsigned long total_nodes, 326 unsigned long order 327 > 328 entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: ~entropy_encoder_model_kernel_5()329 ~entropy_encoder_model_kernel_5 ( 330 ) 331 { 332 delete [] root; 333 } 334 335 // ---------------------------------------------------------------------------------------- 336 337 template < 338 unsigned long alphabet_size, 339 typename entropy_encoder, 340 unsigned long total_nodes, 341 unsigned long order 342 > 343 void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: clear()344 clear( 345 ) 346 { 347 next_node = 1; 348 root->child_context = 0; 349 root->escapes = 0; 350 root->total = 0; 351 cur = root; 352 cur_order = 0; 353 stack_size = 0; 354 355 clear_exclusions(); 356 } 357 358 // ---------------------------------------------------------------------------------------- 359 360 template < 361 unsigned long alphabet_size, 362 typename entropy_encoder, 363 unsigned long total_nodes, 364 unsigned long order 365 > 366 void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: encode(unsigned long sym)367 encode ( 368 unsigned long sym 369 ) 370 { 371 unsigned short symbol = static_cast<unsigned short>(sym); 372 node* temp = cur; 373 cur = 0; 374 unsigned short low_count, high_count, total_count; 375 node* new_node = 0; 376 377 // local_order will track the level of temp in the tree 378 unsigned long local_order = cur_order; 379 380 381 unsigned short c; // c == t(a|sk) 382 unsigned short t; // t == T(sk) 383 384 385 if (something_is_excluded()) 386 clear_exclusions(); 387 388 while (true) 389 { 390 low_count = 0; 391 high_count = 0; 392 if (space_left()) 393 { 394 total_count = temp->total; 395 396 if (total_count > 0) 397 { 398 // check if we need to scale the counts 399 if (total_count > 10000) 400 { 401 scale_counts(temp); 402 total_count = temp->total; 403 } 404 405 406 // find the symbol we are looking for and put a pointer to it 407 // into found_symbol. If it isn't found then found_symbol == 0. 408 // also, low_count and high_count will be correctly set. 409 node* n = temp->child_context; 410 node* found_symbol = 0; 411 node* last = 0; 412 if (something_is_excluded()) 413 { 414 node* templast = 0; 415 while (true) 416 { 417 if (is_excluded(n->symbol) == false) 418 { 419 exclude(n->symbol); 420 if (found_symbol == 0) 421 { 422 high_count += n->count; 423 if (n->symbol == symbol) 424 { 425 found_symbol = n; 426 last = templast; 427 low_count = high_count - n->count; 428 } 429 } 430 } 431 else 432 { 433 total_count -= n->count; 434 } 435 436 if (n->next == 0) 437 break; 438 templast = n; 439 n = n->next; 440 } 441 } 442 else 443 { 444 while (true) 445 { 446 high_count += n->count; 447 exclude(n->symbol); 448 449 if (n->symbol == symbol) 450 { 451 found_symbol = n; 452 low_count = high_count - n->count; 453 break; 454 } 455 456 if (n->next == 0) 457 break; 458 last = n; 459 n = n->next; 460 } 461 } 462 463 464 465 466 467 // if we found the symbol 468 if (found_symbol) 469 { 470 n = found_symbol; 471 if (new_node != 0) 472 { 473 new_node->parent_context = found_symbol; 474 } 475 476 477 coder.encode(low_count,high_count,total_count); 478 c = n->count += 8; 479 t = temp->total += 8; 480 481 // move this node to the front 482 if (last) 483 { 484 last->next = n->next; 485 n->next = temp->child_context; 486 temp->child_context = n; 487 } 488 489 490 if (cur == 0) 491 { 492 if (local_order >= order) 493 { 494 cur = n->parent_context; 495 cur_order = local_order; 496 } 497 else 498 { 499 cur_order = local_order+1; 500 cur = n; 501 } 502 } 503 504 break; 505 506 } 507 // if we hit the end of the context set without finding the symbol 508 else 509 { 510 // finish excluding all the symbols 511 while (n->next) 512 { 513 exclude(n->symbol); 514 n = n->next; 515 } 516 517 if (new_node != 0) 518 { 519 new_node->parent_context = allocate_node(); 520 new_node = new_node->parent_context; 521 } 522 else 523 { 524 new_node = allocate_node(); 525 } 526 527 n->next = new_node; 528 529 // write an escape to a lower context 530 coder.encode(high_count,total_count,total_count); 531 } 532 533 } 534 else // if (total_count == 0) 535 { 536 // this means that temp->child_context == 0 so we should make 537 // a new node here. 538 if (new_node != 0) 539 { 540 new_node->parent_context = allocate_node(); 541 new_node = new_node->parent_context; 542 } 543 else 544 { 545 new_node = allocate_node(); 546 } 547 548 temp->child_context = new_node; 549 } 550 551 if (cur == 0 && local_order < order) 552 { 553 cur = new_node; 554 cur_order = local_order+1; 555 } 556 557 // fill out the new node 558 new_node->child_context = 0; 559 new_node->escapes = 0; 560 new_node->next = 0; 561 new_node->total = 0; 562 push(new_node,temp); 563 564 if (temp != root) 565 { 566 temp = temp->parent_context; 567 --local_order; 568 continue; 569 } 570 571 t = 2056; 572 c = 8; 573 574 // since this is the root we are going to the order-(-1) context 575 // so we can just take care of that here. 576 new_node->parent_context = root; 577 coder.encode(symbol,symbol+1,alphabet_size); 578 579 if (cur == 0) 580 { 581 cur = root; 582 cur_order = 0; 583 } 584 break; 585 } 586 else 587 { 588 // there isn't enough space so we should throw away the tree 589 clear(); 590 temp = cur; 591 local_order = cur_order; 592 cur = 0; 593 new_node = 0; 594 } 595 } // while (true) 596 597 598 // initialize the counts and symbol for any new nodes we have added 599 // to the tree. 600 node* n, *nc; 601 while (stack_size > 0) 602 { 603 pop(n,nc); 604 605 n->symbol = static_cast<unsigned short>(symbol); 606 607 // if nc is not a determnistic context 608 if (nc->total) 609 { 610 unsigned long temp2 = t-c+nc->total - nc->escapes - nc->escapes; 611 unsigned long temp = nc->total; 612 temp *= c; 613 temp /= (temp2|1); // this oring by 1 is just to make sure that temp2 is never zero 614 temp += 2; 615 if (temp > 50000) temp = 50000; 616 n->count = static_cast<unsigned short>(temp); 617 618 619 nc->escapes += 4; 620 nc->total += static_cast<unsigned short>(temp) + 4; 621 } 622 else 623 { 624 n->count = 3 + 5*(c)/(t-c); 625 626 nc->escapes = 4; 627 nc->total = n->count + 4; 628 } 629 630 while (nc->total > 10000) 631 { 632 scale_counts(nc); 633 } 634 } 635 } 636 637 // ---------------------------------------------------------------------------------------- 638 // ---------------------------------------------------------------------------------------- 639 // private member function definitions 640 // ---------------------------------------------------------------------------------------- 641 // ---------------------------------------------------------------------------------------- 642 643 template < 644 unsigned long alphabet_size, 645 typename entropy_encoder, 646 unsigned long total_nodes, 647 unsigned long order 648 > 649 eemk5::node* entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: allocate_node()650 allocate_node ( 651 ) 652 { 653 node* temp; 654 temp = root + next_node; 655 ++next_node; 656 return temp; 657 } 658 659 // ---------------------------------------------------------------------------------------- 660 661 template < 662 unsigned long alphabet_size, 663 typename entropy_encoder, 664 unsigned long total_nodes, 665 unsigned long order 666 > 667 bool entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: space_left()668 space_left ( 669 ) const 670 { 671 return (next_node < total_nodes); 672 } 673 674 // ---------------------------------------------------------------------------------------- 675 676 template < 677 unsigned long alphabet_size, 678 typename entropy_encoder, 679 unsigned long total_nodes, 680 unsigned long order 681 > 682 void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: exclude(unsigned short symbol)683 exclude ( 684 unsigned short symbol 685 ) 686 { 687 exc_used = true; 688 unsigned long temp = 1; 689 temp <<= symbol&0x1F; 690 exc[symbol>>5] |= temp; 691 } 692 693 // ---------------------------------------------------------------------------------------- 694 695 template < 696 unsigned long alphabet_size, 697 typename entropy_encoder, 698 unsigned long total_nodes, 699 unsigned long order 700 > 701 bool entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: is_excluded(unsigned short symbol)702 is_excluded ( 703 unsigned short symbol 704 ) 705 { 706 unsigned long temp = 1; 707 temp <<= symbol&0x1F; 708 return ((exc[symbol>>5]&temp) != 0); 709 } 710 711 // ---------------------------------------------------------------------------------------- 712 713 template < 714 unsigned long alphabet_size, 715 typename entropy_encoder, 716 unsigned long total_nodes, 717 unsigned long order 718 > 719 void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: clear_exclusions()720 clear_exclusions ( 721 ) 722 { 723 exc_used = false; 724 for (unsigned long i = 0; i < alphabet_size/32+1; ++i) 725 { 726 exc[i] = 0; 727 } 728 } 729 730 // ---------------------------------------------------------------------------------------- 731 732 template < 733 unsigned long alphabet_size, 734 typename entropy_encoder, 735 unsigned long total_nodes, 736 unsigned long order 737 > 738 bool entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: something_is_excluded()739 something_is_excluded ( 740 ) 741 { 742 return exc_used; 743 } 744 745 // ---------------------------------------------------------------------------------------- 746 747 template < 748 unsigned long alphabet_size, 749 typename entropy_encoder, 750 unsigned long total_nodes, 751 unsigned long order 752 > 753 void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: push(node * n,node * nc)754 push ( 755 node* n, 756 node* nc 757 ) 758 { 759 stack[stack_size].n = n; 760 stack[stack_size].nc = nc; 761 ++stack_size; 762 } 763 764 // ---------------------------------------------------------------------------------------- 765 766 template < 767 unsigned long alphabet_size, 768 typename entropy_encoder, 769 unsigned long total_nodes, 770 unsigned long order 771 > 772 void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: pop(node * & n,node * & nc)773 pop ( 774 node*& n, 775 node*& nc 776 ) 777 { 778 --stack_size; 779 n = stack[stack_size].n; 780 nc = stack[stack_size].nc; 781 } 782 783 // ---------------------------------------------------------------------------------------- 784 785 template < 786 unsigned long alphabet_size, 787 typename entropy_encoder, 788 unsigned long total_nodes, 789 unsigned long order 790 > 791 void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>:: scale_counts(node * temp)792 scale_counts ( 793 node* temp 794 ) 795 { 796 if (temp->escapes > 1) 797 temp->escapes >>= 1; 798 temp->total = temp->escapes; 799 800 node* n = temp->child_context; 801 while (n != 0) 802 { 803 if (n->count > 1) 804 n->count >>= 1; 805 806 temp->total += n->count; 807 n = n->next; 808 } 809 } 810 811 // ---------------------------------------------------------------------------------------- 812 813 } 814 815 #endif // DLIB_ENTROPY_ENCODER_MODEL_KERNEl_5_ 816 817 818