1 // Copyright (C) 2005  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #ifndef DLIB_ENTROPY_ENCODER_MODEL_KERNEl_5_
4 #define DLIB_ENTROPY_ENCODER_MODEL_KERNEl_5_
5 
6 #include "../algs.h"
7 #include "entropy_encoder_model_kernel_abstract.h"
8 #include "../assert.h"
9 
10 
11 
12 namespace dlib
13 {
14 
15     namespace eemk5
16     {
17         struct node
18         {
19             node* next;
20             node* child_context;
21             node* parent_context;
22 
23             unsigned short symbol;
24             unsigned short count;
25             unsigned short total;
26             unsigned short escapes;
27         };
28     }
29 
30 
31     template <
32         unsigned long alphabet_size,
33         typename entropy_encoder,
34         unsigned long total_nodes,
35         unsigned long order
36         >
37     class entropy_encoder_model_kernel_5
38     {
39         /*!
40             REQUIREMENTS ON total_nodes
41                 - 4096 < total_nodes
42                 - this is the total number of nodes that we will use in the tree
43 
44             REQUIREMENTS ON order
45                 - 0 <= order
46                 - this is the maximum depth-1 the tree will be allowed to go (note
47                   that the root level is depth 0).
48 
49             GENERAL NOTES
50                 This implementation follows more or less the implementation
51                 strategy laid out by Alistair Moffat in his paper
52                 Implementing the PPM data compression scheme.  Published in IEEE
53                 Transactions on Communications, 38(11):1917-1921, 1990.
54 
55                 The escape method used will be method D.
56 
57                 This also uses Dmitry Shkarin's Information Inheritance scheme.
58                 (described in "PPM: one step to practicality" and "Improving the
59                 Efficiency of the PPM Algorithm")
60 
61 
62             INITIAL VALUE
63                 - root == pointer to an array of total_nodes nodes
64                 - next_node == 1
65                 - cur == root
66                 - cur_order = 0
67                 - root->next == 0
68                 - root->parent_context == 0
69                 - root->child_context == 0
70                 - root->escapes == 0
71                 - root->total == 0
72                 - stack_size == 0
73                 - exc_used == false
74                 - for all i: exc[i] == 0
75 
76             CONVENTION
77                 - pop() == stack[stack_size-1].n and stack[stack_size-1].nc
78                 - exc_used == something_is_excluded()
79                 - is_excluded(symbol) == bit symbol&0x1F from exc[symbol>>5]
80                 - &get_entropy_encoder() == coder
81                 - root == pointer to an array of total_nodes nodes.
82                   this is also the root of the tree.
83                 - if (next_node < total_nodes) then
84                     - next_node == the next node in root that has not yet been allocated
85 
86                 - root->next == 0
87                 - root->parent_context == 0
88 
89 
90                 - for every node in the tree:
91                   {
92                     - NOTATION:
93                         - The "context" of a node is the string of symbols seen
94                           when you go from the root of the tree down (down though
95                           child context pointers) to the node, including the symbol at
96                           the node itself.  (note that the context of the root node
97                           is "" or the empty string)
98                         - A set of nodes is in the same "context set" if all the node's
99                           contexts are of length n and all the node's contexts share
100                           the same prefix of length n-1.
101                         - The "child context set" of a node is a set of nodes with
102                           contexts that are one symbol longer and prefixed by the node's
103                           context.  For example, if a node has a context "abc" then the
104                           nodes for contexts "abca", "abcb", "abcc", etc. are all in
105                           the child context set of the node.
106                         - The "parent context" of a node is the context that is one
107                           symbol shorter than the node's context and includes the
108                           symbol in the node.  So the parent context of a node with
109                           context "abcd" would be the context "bcd".
110 
111 
112                     - if (next != 0) then
113                         - next == pointer to the next node in the same context set
114                     - if (child_context != 0) then
115                         - child_context == pointer to the first node of the child
116                           context set for this node.
117                         - escapes > 0
118                     - if (parent_context != 0) then
119                         - parent_context == pointer to the parent context of this node.
120                     - else
121                         - this node is the root node of the tree
122 
123 
124                     - if (this is not the root node) then
125                         - symbol == the symbol represented with this node
126                         - count == the number of times this symbol has been seen in its
127                           parent context.
128                     - else
129                         - the root doesn't have a symbol.  i.e. the context for the
130                           root node is "" or the empty string.
131 
132                     - total == The sum of the counts of all the nodes
133                       in the child context set + escapes.
134                     - escapes == the escape count for the context represented
135                       by the node.
136                     - count > 0
137                 }
138 
139 
140                 - cur_order < order
141                 - cur_order == the depth of the node cur in the tree.
142                   (note that the root node has depth 0)
143                 - cur == pointer to the node in the tree who's context matches
144                   the most recent symbols we have seen.
145 
146 
147         !*/
148 
149         typedef eemk5::node node;
150 
151 
152     public:
153 
154         typedef entropy_encoder entropy_encoder_type;
155 
156         entropy_encoder_model_kernel_5 (
157             entropy_encoder& coder
158         );
159 
160         virtual ~entropy_encoder_model_kernel_5 (
161         );
162 
163         inline void clear(
164         );
165 
166         inline void encode (
167             unsigned long symbol
168         );
169 
get_entropy_encoder()170         entropy_encoder& get_entropy_encoder (
171         ) { return coder; }
172 
get_alphabet_size()173         static unsigned long get_alphabet_size (
174         ) { return alphabet_size; }
175 
176     private:
177 
178         inline eemk5::node* allocate_node (
179         );
180         /*!
181             requires
182                 - space_left() == true
183             ensures
184                 - returns a pointer to a new node
185         !*/
186 
187         inline bool space_left (
188         ) const;
189         /*!
190             ensures
191                 - returns true if there is at least 1 free node left.
192                 - returns false otherwise
193         !*/
194 
195         inline void exclude (
196             unsigned short symbol
197         );
198         /*!
199             ensures
200                 - #is_excluded(symbol) == true
201                 - #something_is_excluded() == true
202         !*/
203 
204         inline bool something_is_excluded (
205         );
206         /*!
207             ensures
208                 - returns true if some symbol has been excluded.
209                   returns false otherwise
210         !*/
211 
212         inline bool is_excluded (
213             unsigned short symbol
214         );
215         /*!
216             ensures
217                 - if (symbol has been excluded) then
218                     - returns true
219                 - else
220                     - returns false
221         !*/
222 
223         inline void clear_exclusions (
224         );
225         /*!
226             ensures
227                 - for all symbols #is_excluded(symbol) == false
228                 - #something_is_excluded() == true
229         !*/
230 
231         inline void scale_counts (
232             node* n
233         );
234         /*!
235             ensures
236                 - divides all the counts in the child context set of n by 2.
237                 - none of the nodes in the child context set will have a count of 0
238         !*/
239 
240         inline void push (
241             node* n,
242             node* nc
243         );
244         /*!
245             requires
246                 - stack_size < order
247             ensures
248                 - #pop(a,b): a == n && b == nc
249         !*/
250 
251         inline void pop (
252             node*& n,
253             node*& nc
254         );
255         /*!
256             requires
257                 - stack_size > 0
258             ensures
259                 - returns the two nodes at the top of the stack
260         !*/
261 
262         struct nodes
263         {
264             node* n;
265             node* nc;
266         };
267 
268         unsigned long next_node;
269         entropy_encoder& coder;
270         node* root;
271         node* cur;
272         unsigned long cur_order;
273         unsigned long exc[alphabet_size/32+1];
274         bool exc_used;
275         nodes stack[order+1];
276         unsigned long stack_size;
277 
278         // restricted functions
279         entropy_encoder_model_kernel_5(entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>&);        // copy constructor
280         entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>& operator=(entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>&);    // assignment operator
281 
282     };
283 
284 // ----------------------------------------------------------------------------------------
285 // ----------------------------------------------------------------------------------------
286     // member function definitions
287 // ----------------------------------------------------------------------------------------
288 // ----------------------------------------------------------------------------------------
289 
290     template <
291         unsigned long alphabet_size,
292         typename entropy_encoder,
293         unsigned long total_nodes,
294         unsigned long order
295         >
296     entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
entropy_encoder_model_kernel_5(entropy_encoder & coder_)297     entropy_encoder_model_kernel_5 (
298         entropy_encoder& coder_
299     ) :
300         next_node(1),
301         coder(coder_),
302         cur_order(0),
303         stack_size(0)
304     {
305         COMPILE_TIME_ASSERT( 1 < alphabet_size && alphabet_size < 65535 );
306         COMPILE_TIME_ASSERT( 4096 < total_nodes );
307 
308         root = new node[total_nodes];
309         cur = root;
310 
311         root->child_context = 0;
312         root->escapes = 0;
313         root->next = 0;
314         root->parent_context = 0;
315         root->total = 0;
316 
317         clear_exclusions();
318     }
319 
320 // ----------------------------------------------------------------------------------------
321 
322     template <
323         unsigned long alphabet_size,
324         typename entropy_encoder,
325         unsigned long total_nodes,
326         unsigned long order
327         >
328     entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
~entropy_encoder_model_kernel_5()329     ~entropy_encoder_model_kernel_5 (
330     )
331     {
332         delete [] root;
333     }
334 
335 // ----------------------------------------------------------------------------------------
336 
337     template <
338         unsigned long alphabet_size,
339         typename entropy_encoder,
340         unsigned long total_nodes,
341         unsigned long order
342         >
343     void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
clear()344     clear(
345     )
346     {
347         next_node = 1;
348         root->child_context = 0;
349         root->escapes = 0;
350         root->total = 0;
351         cur = root;
352         cur_order = 0;
353         stack_size = 0;
354 
355         clear_exclusions();
356     }
357 
358 // ----------------------------------------------------------------------------------------
359 
360     template <
361         unsigned long alphabet_size,
362         typename entropy_encoder,
363         unsigned long total_nodes,
364         unsigned long order
365         >
366     void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
encode(unsigned long sym)367     encode (
368         unsigned long sym
369     )
370     {
371         unsigned short symbol = static_cast<unsigned short>(sym);
372         node* temp = cur;
373         cur = 0;
374         unsigned short low_count, high_count, total_count;
375         node* new_node = 0;
376 
377         // local_order will track the level of temp in the tree
378         unsigned long local_order = cur_order;
379 
380 
381         unsigned short c; // c == t(a|sk)
382         unsigned short t; // t == T(sk)
383 
384 
385         if (something_is_excluded())
386             clear_exclusions();
387 
388         while (true)
389         {
390             low_count = 0;
391             high_count = 0;
392             if (space_left())
393             {
394                 total_count = temp->total;
395 
396                 if (total_count > 0)
397                 {
398                     // check if we need to scale the counts
399                     if (total_count > 10000)
400                     {
401                         scale_counts(temp);
402                         total_count = temp->total;
403                     }
404 
405 
406                     // find the symbol we are looking for and put a pointer to it
407                     // into found_symbol.  If it isn't found then found_symbol == 0.
408                     // also, low_count and high_count will be correctly set.
409                     node* n = temp->child_context;
410                     node* found_symbol = 0;
411                     node* last = 0;
412                     if (something_is_excluded())
413                     {
414                         node* templast = 0;
415                         while (true)
416                         {
417                             if (is_excluded(n->symbol) == false)
418                             {
419                                 exclude(n->symbol);
420                                 if (found_symbol == 0)
421                                 {
422                                     high_count += n->count;
423                                     if (n->symbol == symbol)
424                                     {
425                                         found_symbol = n;
426                                         last = templast;
427                                         low_count = high_count - n->count;
428                                     }
429                                 }
430                             }
431                             else
432                             {
433                                 total_count -= n->count;
434                             }
435 
436                             if (n->next == 0)
437                                 break;
438                             templast = n;
439                             n = n->next;
440                         }
441                     }
442                     else
443                     {
444                         while (true)
445                         {
446                             high_count += n->count;
447                             exclude(n->symbol);
448 
449                             if (n->symbol == symbol)
450                             {
451                                 found_symbol = n;
452                                 low_count = high_count - n->count;
453                                 break;
454                             }
455 
456                             if (n->next == 0)
457                                 break;
458                             last = n;
459                             n = n->next;
460                         }
461                     }
462 
463 
464 
465 
466 
467                     // if we found the symbol
468                     if (found_symbol)
469                     {
470                         n = found_symbol;
471                         if (new_node != 0)
472                         {
473                             new_node->parent_context = found_symbol;
474                         }
475 
476 
477                         coder.encode(low_count,high_count,total_count);
478                         c = n->count += 8;
479                         t = temp->total += 8;
480 
481                         // move this node to the front
482                         if (last)
483                         {
484                             last->next = n->next;
485                             n->next = temp->child_context;
486                             temp->child_context = n;
487                         }
488 
489 
490                         if (cur == 0)
491                         {
492                             if (local_order >= order)
493                             {
494                                 cur = n->parent_context;
495                                 cur_order = local_order;
496                             }
497                             else
498                             {
499                                 cur_order = local_order+1;
500                                 cur = n;
501                             }
502                         }
503 
504                         break;
505 
506                     }
507                     // if we hit the end of the context set without finding the symbol
508                     else
509                     {
510                         // finish excluding all the symbols
511                         while (n->next)
512                         {
513                             exclude(n->symbol);
514                             n = n->next;
515                         }
516 
517                         if (new_node != 0)
518                         {
519                             new_node->parent_context = allocate_node();
520                             new_node = new_node->parent_context;
521                         }
522                         else
523                         {
524                             new_node = allocate_node();
525                         }
526 
527                         n->next = new_node;
528 
529                         // write an escape to a lower context
530                         coder.encode(high_count,total_count,total_count);
531                     }
532 
533                 }
534                 else // if (total_count == 0)
535                 {
536                     // this means that temp->child_context == 0 so we should make
537                     // a new node here.
538                     if (new_node != 0)
539                     {
540                         new_node->parent_context = allocate_node();
541                         new_node = new_node->parent_context;
542                     }
543                     else
544                     {
545                         new_node = allocate_node();
546                     }
547 
548                     temp->child_context = new_node;
549                 }
550 
551                 if (cur == 0 && local_order < order)
552                 {
553                     cur = new_node;
554                     cur_order = local_order+1;
555                 }
556 
557                 // fill out the new node
558                 new_node->child_context = 0;
559                 new_node->escapes = 0;
560                 new_node->next = 0;
561                 new_node->total = 0;
562                 push(new_node,temp);
563 
564                 if (temp != root)
565                 {
566                     temp = temp->parent_context;
567                     --local_order;
568                     continue;
569                 }
570 
571                 t = 2056;
572                 c = 8;
573 
574                 // since this is the root we are going to the order-(-1) context
575                 // so we can just take care of that here.
576                 new_node->parent_context = root;
577                 coder.encode(symbol,symbol+1,alphabet_size);
578 
579                 if (cur == 0)
580                 {
581                     cur = root;
582                     cur_order = 0;
583                 }
584                 break;
585             }
586             else
587             {
588                 // there isn't enough space so we should throw away the tree
589                 clear();
590                 temp = cur;
591                 local_order = cur_order;
592                 cur = 0;
593                 new_node = 0;
594             }
595         } // while (true)
596 
597 
598         // initialize the counts and symbol for any new nodes we have added
599         // to the tree.
600         node* n, *nc;
601         while (stack_size > 0)
602         {
603             pop(n,nc);
604 
605             n->symbol = static_cast<unsigned short>(symbol);
606 
607             // if nc is not a determnistic context
608             if (nc->total)
609             {
610                 unsigned long temp2 = t-c+nc->total - nc->escapes - nc->escapes;
611                 unsigned long temp = nc->total;
612                 temp *= c;
613                 temp /= (temp2|1); // this oring by 1 is just to make sure that temp2 is never zero
614                 temp += 2;
615                 if (temp > 50000) temp = 50000;
616                 n->count = static_cast<unsigned short>(temp);
617 
618 
619                 nc->escapes += 4;
620                 nc->total += static_cast<unsigned short>(temp) + 4;
621             }
622             else
623             {
624                 n->count = 3 + 5*(c)/(t-c);
625 
626                 nc->escapes = 4;
627                 nc->total = n->count + 4;
628             }
629 
630             while (nc->total > 10000)
631             {
632                 scale_counts(nc);
633             }
634         }
635     }
636 
637 // ----------------------------------------------------------------------------------------
638 // ----------------------------------------------------------------------------------------
639     // private member function definitions
640 // ----------------------------------------------------------------------------------------
641 // ----------------------------------------------------------------------------------------
642 
643     template <
644         unsigned long alphabet_size,
645         typename entropy_encoder,
646         unsigned long total_nodes,
647         unsigned long order
648         >
649     eemk5::node* entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
allocate_node()650     allocate_node (
651     )
652     {
653         node* temp;
654         temp = root + next_node;
655         ++next_node;
656         return temp;
657     }
658 
659 // ----------------------------------------------------------------------------------------
660 
661     template <
662         unsigned long alphabet_size,
663         typename entropy_encoder,
664         unsigned long total_nodes,
665         unsigned long order
666         >
667     bool entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
space_left()668     space_left (
669     ) const
670     {
671         return (next_node < total_nodes);
672     }
673 
674 // ----------------------------------------------------------------------------------------
675 
676     template <
677         unsigned long alphabet_size,
678         typename entropy_encoder,
679         unsigned long total_nodes,
680         unsigned long order
681         >
682     void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
exclude(unsigned short symbol)683     exclude (
684         unsigned short symbol
685     )
686     {
687         exc_used = true;
688         unsigned long temp = 1;
689         temp <<= symbol&0x1F;
690         exc[symbol>>5] |= temp;
691     }
692 
693 // ----------------------------------------------------------------------------------------
694 
695     template <
696         unsigned long alphabet_size,
697         typename entropy_encoder,
698         unsigned long total_nodes,
699         unsigned long order
700         >
701     bool entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
is_excluded(unsigned short symbol)702     is_excluded (
703         unsigned short symbol
704     )
705     {
706         unsigned long temp = 1;
707         temp <<= symbol&0x1F;
708         return ((exc[symbol>>5]&temp) != 0);
709     }
710 
711 // ----------------------------------------------------------------------------------------
712 
713     template <
714         unsigned long alphabet_size,
715         typename entropy_encoder,
716         unsigned long total_nodes,
717         unsigned long order
718         >
719     void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
clear_exclusions()720     clear_exclusions (
721     )
722     {
723         exc_used = false;
724         for (unsigned long i = 0; i < alphabet_size/32+1; ++i)
725         {
726             exc[i] = 0;
727         }
728     }
729 
730 // ----------------------------------------------------------------------------------------
731 
732     template <
733         unsigned long alphabet_size,
734         typename entropy_encoder,
735         unsigned long total_nodes,
736         unsigned long order
737         >
738     bool entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
something_is_excluded()739     something_is_excluded (
740     )
741     {
742         return exc_used;
743     }
744 
745 // ----------------------------------------------------------------------------------------
746 
747     template <
748         unsigned long alphabet_size,
749         typename entropy_encoder,
750         unsigned long total_nodes,
751         unsigned long order
752         >
753     void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
push(node * n,node * nc)754     push (
755         node* n,
756         node* nc
757     )
758     {
759         stack[stack_size].n = n;
760         stack[stack_size].nc = nc;
761         ++stack_size;
762     }
763 
764 // ----------------------------------------------------------------------------------------
765 
766     template <
767         unsigned long alphabet_size,
768         typename entropy_encoder,
769         unsigned long total_nodes,
770         unsigned long order
771         >
772     void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
pop(node * & n,node * & nc)773     pop (
774         node*& n,
775         node*& nc
776     )
777     {
778         --stack_size;
779         n = stack[stack_size].n;
780         nc = stack[stack_size].nc;
781     }
782 
783 // ----------------------------------------------------------------------------------------
784 
785     template <
786         unsigned long alphabet_size,
787         typename entropy_encoder,
788         unsigned long total_nodes,
789         unsigned long order
790         >
791     void entropy_encoder_model_kernel_5<alphabet_size,entropy_encoder,total_nodes,order>::
scale_counts(node * temp)792     scale_counts (
793         node* temp
794     )
795     {
796         if (temp->escapes > 1)
797             temp->escapes >>= 1;
798         temp->total = temp->escapes;
799 
800         node* n = temp->child_context;
801         while (n != 0)
802         {
803             if (n->count > 1)
804                 n->count >>= 1;
805 
806             temp->total += n->count;
807             n = n->next;
808         }
809     }
810 
811 // ----------------------------------------------------------------------------------------
812 
813 }
814 
815 #endif // DLIB_ENTROPY_ENCODER_MODEL_KERNEl_5_
816 
817 
818