1 /* Bcj2Enc.c -- BCJ2 Encoder (Converter for x86 code)
2 2014-11-10 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 /* #define SHOW_STAT */
7 
8 #ifdef SHOW_STAT
9 #include <stdio.h>
10 #define PRF(x) x
11 #else
12 #define PRF(x)
13 #endif
14 
15 #include <windows.h>
16 #include <string.h>
17 
18 #include "Bcj2.h"
19 #include "CpuArch.h"
20 
21 #define CProb UInt16
22 
23 #define kTopValue ((UInt32)1 << 24)
24 #define kNumModelBits 11
25 #define kBitModelTotal (1 << kNumModelBits)
26 #define kNumMoveBits 5
27 
Bcj2Enc_Init(CBcj2Enc * p)28 void Bcj2Enc_Init(CBcj2Enc *p)
29 {
30   unsigned i;
31 
32   p->state = BCJ2_ENC_STATE_OK;
33   p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
34 
35   p->prevByte = 0;
36 
37   p->cache = 0;
38   p->range = 0xFFFFFFFF;
39   p->low = 0;
40   p->cacheSize = 1;
41 
42   p->ip = 0;
43 
44   p->fileIp = 0;
45   p->fileSize = 0;
46   p->relatLimit = BCJ2_RELAT_LIMIT;
47 
48   p->tempPos = 0;
49 
50   p->flushPos = 0;
51 
52   for (i = 0; i < sizeof(p->probs) / sizeof(p->probs[0]); i++)
53     p->probs[i] = kBitModelTotal >> 1;
54 }
55 
RangeEnc_ShiftLow(CBcj2Enc * p)56 static Bool MY_FAST_CALL RangeEnc_ShiftLow(CBcj2Enc *p)
57 {
58   if ((UInt32)p->low < (UInt32)0xFF000000 || (UInt32)(p->low >> 32) != 0)
59   {
60     Byte *buf = p->bufs[BCJ2_STREAM_RC];
61     do
62     {
63       if (buf == p->lims[BCJ2_STREAM_RC])
64       {
65         p->state = BCJ2_STREAM_RC;
66         p->bufs[BCJ2_STREAM_RC] = buf;
67         return True;
68       }
69       *buf++ = (Byte)(p->cache + (Byte)(p->low >> 32));
70       p->cache = 0xFF;
71     }
72     while (--p->cacheSize);
73     p->bufs[BCJ2_STREAM_RC] = buf;
74     p->cache = (Byte)((UInt32)p->low >> 24);
75   }
76   p->cacheSize++;
77   p->low = (UInt32)p->low << 8;
78   return False;
79 }
80 
Bcj2Enc_Encode_2(CBcj2Enc * p)81 static void Bcj2Enc_Encode_2(CBcj2Enc *p)
82 {
83   if (BCJ2_IS_32BIT_STREAM(p->state))
84   {
85     Byte *cur = p->bufs[p->state];
86     if (cur == p->lims[p->state])
87       return;
88     SetBe32(cur, p->tempTarget);
89     p->bufs[p->state] = cur + 4;
90   }
91 
92   p->state = BCJ2_ENC_STATE_ORIG;
93 
94   for (;;)
95   {
96     if (p->range < kTopValue)
97     {
98       if (RangeEnc_ShiftLow(p))
99         return;
100       p->range <<= 8;
101     }
102 
103     {
104       {
105         const Byte *src = p->src;
106         const Byte *srcLim;
107         Byte *dest;
108         SizeT num = p->srcLim - src;
109 
110         if (p->finishMode == BCJ2_ENC_FINISH_MODE_CONTINUE)
111         {
112           if (num <= 4)
113             return;
114           num -= 4;
115         }
116         else if (num == 0)
117           break;
118 
119         dest = p->bufs[BCJ2_STREAM_MAIN];
120         if (num > (SizeT)(p->lims[BCJ2_STREAM_MAIN] - dest))
121         {
122           num = p->lims[BCJ2_STREAM_MAIN] - dest;
123           if (num == 0)
124           {
125             p->state = BCJ2_STREAM_MAIN;
126             return;
127           }
128         }
129 
130         srcLim = src + num;
131 
132         if (p->prevByte == 0x0F && (src[0] & 0xF0) == 0x80)
133           *dest = src[0];
134         else for (;;)
135         {
136           Byte b = *src;
137           *dest = b;
138           if (b != 0x0F)
139           {
140             if ((b & 0xFE) == 0xE8)
141               break;
142             dest++;
143             if (++src != srcLim)
144               continue;
145             break;
146           }
147           dest++;
148           if (++src == srcLim)
149             break;
150           if ((*src & 0xF0) != 0x80)
151             continue;
152           *dest = *src;
153           break;
154         }
155 
156         num = src - p->src;
157 
158         if (src == srcLim)
159         {
160           p->prevByte = src[-1];
161           p->bufs[BCJ2_STREAM_MAIN] = dest;
162           p->src = src;
163           p->ip += (UInt32)num;
164           continue;
165         }
166 
167         {
168           Byte context = (Byte)(num == 0 ? p->prevByte : src[-1]);
169           Bool needConvert;
170 
171           p->bufs[BCJ2_STREAM_MAIN] = dest + 1;
172           p->ip += (UInt32)num + 1;
173           src++;
174 
175           needConvert = False;
176 
177           if ((SizeT)(p->srcLim - src) >= 4)
178           {
179             UInt32 relatVal = GetUi32(src);
180             if ((p->fileSize == 0 || (UInt32)(p->ip + 4 + relatVal - p->fileIp) < p->fileSize)
181                 && ((relatVal + p->relatLimit) >> 1) < p->relatLimit)
182               needConvert = True;
183           }
184 
185           {
186             UInt32 bound;
187             unsigned ttt;
188             Byte b = src[-1];
189             CProb *prob = p->probs + (unsigned)(b == 0xE8 ? 2 + (unsigned)context : (b == 0xE9 ? 1 : 0));
190 
191             ttt = *prob;
192             bound = (p->range >> kNumModelBits) * ttt;
193 
194             if (!needConvert)
195             {
196               p->range = bound;
197               *prob = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits));
198               p->src = src;
199               p->prevByte = b;
200               continue;
201             }
202 
203             p->low += bound;
204             p->range -= bound;
205             *prob = (CProb)(ttt - (ttt >> kNumMoveBits));
206 
207             {
208               UInt32 relatVal = GetUi32(src);
209               UInt32 absVal;
210               p->ip += 4;
211               absVal = p->ip + relatVal;
212               p->prevByte = src[3];
213               src += 4;
214               p->src = src;
215               {
216                 unsigned cj = (b == 0xE8) ? BCJ2_STREAM_CALL : BCJ2_STREAM_JUMP;
217                 Byte *cur = p->bufs[cj];
218                 if (cur == p->lims[cj])
219                 {
220                   p->state = cj;
221                   p->tempTarget = absVal;
222                   return;
223                 }
224                 SetBe32(cur, absVal);
225                 p->bufs[cj] = cur + 4;
226               }
227             }
228           }
229         }
230       }
231     }
232   }
233 
234   if (p->finishMode != BCJ2_ENC_FINISH_MODE_END_STREAM)
235     return;
236 
237   for (; p->flushPos < 5; p->flushPos++)
238     if (RangeEnc_ShiftLow(p))
239       return;
240   p->state = BCJ2_ENC_STATE_OK;
241 }
242 
243 
Bcj2Enc_Encode(CBcj2Enc * p)244 void Bcj2Enc_Encode(CBcj2Enc *p)
245 {
246   PRF(printf("\n"));
247   PRF(printf("---- ip = %8d   tempPos = %8d   src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
248 
249   if (p->tempPos != 0)
250   {
251     unsigned extra = 0;
252 
253     for (;;)
254     {
255       const Byte *src = p->src;
256       const Byte *srcLim = p->srcLim;
257       unsigned finishMode = p->finishMode;
258 
259       p->src = p->temp;
260       p->srcLim = p->temp + p->tempPos;
261       if (src != srcLim)
262         p->finishMode = BCJ2_ENC_FINISH_MODE_CONTINUE;
263 
264       PRF(printf("     ip = %8d   tempPos = %8d   src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
265 
266       Bcj2Enc_Encode_2(p);
267 
268       {
269         unsigned num = (unsigned)(p->src - p->temp);
270         unsigned tempPos = p->tempPos - num;
271         unsigned i;
272         p->tempPos = tempPos;
273         for (i = 0; i < tempPos; i++)
274           p->temp[i] = p->temp[i + num];
275 
276         p->src = src;
277         p->srcLim = srcLim;
278         p->finishMode = finishMode;
279 
280         if (p->state != BCJ2_ENC_STATE_ORIG || src == srcLim)
281           return;
282 
283         if (extra >= tempPos)
284         {
285           p->src = src - tempPos;
286           p->tempPos = 0;
287           break;
288         }
289 
290         p->temp[tempPos] = src[0];
291         p->tempPos = tempPos + 1;
292         p->src = src + 1;
293         extra++;
294       }
295     }
296   }
297 
298   PRF(printf("++++ ip = %8d   tempPos = %8d   src = %8d\n", p->ip, p->tempPos, p->srcLim - p->src));
299 
300   Bcj2Enc_Encode_2(p);
301 
302   if (p->state == BCJ2_ENC_STATE_ORIG)
303   {
304     const Byte *src = p->src;
305     unsigned rem = (unsigned)(p->srcLim - src);
306     unsigned i;
307     for (i = 0; i < rem; i++)
308       p->temp[i] = src[i];
309     p->tempPos = rem;
310     p->src = src + rem;
311   }
312 }
313